Compare commits
120 Commits
feature/se
...
ci/support
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cc93d2d483 | ||
|
|
7642c17ec0 | ||
|
|
cb60dcf352 | ||
|
|
5ffe05d519 | ||
|
|
8e2f07c941 | ||
|
|
0b6e615075 | ||
|
|
be251c6fb3 | ||
|
|
efb1e89e33 | ||
|
|
529c447413 | ||
|
|
1eaf95c06b | ||
|
|
138ed17d8b | ||
|
|
a880c41d89 | ||
|
|
2a9ae61dce | ||
|
|
1f21911fa1 | ||
|
|
6f0a58f5d2 | ||
|
|
8206dce821 | ||
|
|
ced1afaa8a | ||
|
|
d6c602c567 | ||
|
|
a252a7fefd | ||
|
|
83b06c21cc | ||
|
|
f5214da54c | ||
|
|
e3d4dd0127 | ||
|
|
d0ee0d72f5 | ||
|
|
521f0550cd | ||
|
|
8a09691e91 | ||
|
|
459ad7d9c9 | ||
|
|
d102d27731 | ||
|
|
01810c40a1 | ||
|
|
b7d33e1cbf | ||
|
|
5b34b5a78c | ||
|
|
c091d2316b | ||
|
|
e8862b8a8b | ||
|
|
1b46ab699d | ||
|
|
ac1995f63f | ||
|
|
de93669652 | ||
|
|
dffc124920 | ||
|
|
932ceb0287 | ||
|
|
824d48fd85 | ||
|
|
47fdab0382 | ||
|
|
ed7ddc6375 | ||
|
|
cf06f4a8c0 | ||
|
|
a2fa21f65c | ||
|
|
61e915968f | ||
|
|
4949b22457 | ||
|
|
1fb0eb94c2 | ||
|
|
9aefb554bc | ||
|
|
a4338669a9 | ||
|
|
1fa9ea496c | ||
|
|
31756a2233 | ||
|
|
166583621b | ||
|
|
ca952c4674 | ||
|
|
4054778b6c | ||
|
|
56a5f00015 | ||
|
|
a96d50c481 | ||
|
|
4806212f46 | ||
|
|
2486f3c6b2 | ||
|
|
f25bebf6ee | ||
|
|
22dad6d0fc | ||
|
|
03eab66d35 | ||
|
|
97b1ab23d8 | ||
|
|
9fff0ba430 | ||
|
|
7d3e91b2e6 | ||
|
|
74957a9ec5 | ||
|
|
2d035c46cf | ||
|
|
53445fe72a | ||
|
|
37cc8956c5 | ||
|
|
197c82f921 | ||
|
|
2c52493a9c | ||
|
|
2ee2ba6b8c | ||
|
|
bafcf1694a | ||
|
|
95792aab15 | ||
|
|
38ae2c3a3e | ||
|
|
249d3c1b7f | ||
|
|
9647f94f89 | ||
|
|
afc288d2cf | ||
|
|
df01ce6aad | ||
|
|
aea93bc96b | ||
|
|
4e84f30f8b | ||
|
|
b20a0a4fa5 | ||
|
|
6eb1babc86 | ||
|
|
9a9c2f76a2 | ||
|
|
56cc171287 | ||
|
|
0295637ed6 | ||
|
|
9c6dd37316 | ||
|
|
524d13209a | ||
|
|
9199db3927 | ||
|
|
a0652c7c73 | ||
|
|
89c262ee20 | ||
|
|
7f9cf559cf | ||
|
|
bbe039c868 | ||
|
|
4e5c09a2a5 | ||
|
|
7f65598332 | ||
|
|
75315ed91e | ||
|
|
7fe7d17b43 | ||
|
|
7e517b5801 | ||
|
|
38ba9021d1 | ||
|
|
ddebad48d3 | ||
|
|
1cebf2e296 | ||
|
|
1d6e67d837 | ||
|
|
cfb4b6e4ce | ||
|
|
f418c403d6 | ||
|
|
be4221af46 | ||
|
|
ca07606b05 | ||
|
|
baf1bf2eb7 | ||
|
|
4ef3a8d72b | ||
|
|
09dd756eff | ||
|
|
ec8ef6210c | ||
|
|
a9b7a4d7a9 | ||
|
|
5119d5ccf9 | ||
|
|
91efd1d03d | ||
|
|
aa776226b0 | ||
|
|
e9435150e9 | ||
|
|
d399b966e6 | ||
|
|
f5f0e25384 | ||
|
|
04de33e5f7 | ||
|
|
37dfea25e1 | ||
|
|
e2166bc25f | ||
|
|
b5e8f039bf | ||
|
|
67bfdf47a5 | ||
|
|
9f898f68db |
@@ -1,12 +1,9 @@
|
|||||||
when:
|
|
||||||
- event: [push, pull_request]
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
# ===========================================
|
# ===========================================
|
||||||
# PR VALIDATION: Parallel type checks (PRs only)
|
# PR VALIDATION: Parallel type checks (PRs only)
|
||||||
# ===========================================
|
# ===========================================
|
||||||
typecheck-backend:
|
typecheck-backend:
|
||||||
image: node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- cd backend
|
- cd backend
|
||||||
- npm ci --prefer-offline
|
- npm ci --prefer-offline
|
||||||
@@ -16,7 +13,7 @@ steps:
|
|||||||
event: pull_request
|
event: pull_request
|
||||||
|
|
||||||
typecheck-cannaiq:
|
typecheck-cannaiq:
|
||||||
image: node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- cd cannaiq
|
- cd cannaiq
|
||||||
- npm ci --prefer-offline
|
- npm ci --prefer-offline
|
||||||
@@ -26,7 +23,7 @@ steps:
|
|||||||
event: pull_request
|
event: pull_request
|
||||||
|
|
||||||
typecheck-findadispo:
|
typecheck-findadispo:
|
||||||
image: node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- cd findadispo/frontend
|
- cd findadispo/frontend
|
||||||
- npm ci --prefer-offline
|
- npm ci --prefer-offline
|
||||||
@@ -36,7 +33,7 @@ steps:
|
|||||||
event: pull_request
|
event: pull_request
|
||||||
|
|
||||||
typecheck-findagram:
|
typecheck-findagram:
|
||||||
image: node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- cd findagram/frontend
|
- cd findagram/frontend
|
||||||
- npm ci --prefer-offline
|
- npm ci --prefer-offline
|
||||||
@@ -45,6 +42,31 @@ steps:
|
|||||||
when:
|
when:
|
||||||
event: pull_request
|
event: pull_request
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# AUTO-MERGE: Merge PR after all checks pass
|
||||||
|
# ===========================================
|
||||||
|
auto-merge:
|
||||||
|
image: alpine:latest
|
||||||
|
environment:
|
||||||
|
GITEA_TOKEN:
|
||||||
|
from_secret: gitea_token
|
||||||
|
commands:
|
||||||
|
- apk add --no-cache curl
|
||||||
|
- |
|
||||||
|
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||||
|
curl -s -X POST \
|
||||||
|
-H "Authorization: token $GITEA_TOKEN" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"Do":"merge"}' \
|
||||||
|
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||||
|
depends_on:
|
||||||
|
- typecheck-backend
|
||||||
|
- typecheck-cannaiq
|
||||||
|
- typecheck-findadispo
|
||||||
|
- typecheck-findagram
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
# ===========================================
|
# ===========================================
|
||||||
# MASTER DEPLOY: Parallel Docker builds
|
# MASTER DEPLOY: Parallel Docker builds
|
||||||
# ===========================================
|
# ===========================================
|
||||||
@@ -65,14 +87,14 @@ steps:
|
|||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
build_args:
|
build_args:
|
||||||
- APP_BUILD_VERSION=${CI_COMMIT_SHA:0:8}
|
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
||||||
- APP_GIT_SHA=${CI_COMMIT_SHA}
|
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
||||||
- APP_BUILD_TIME=${CI_PIPELINE_CREATED}
|
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
||||||
- CONTAINER_IMAGE_TAG=${CI_COMMIT_SHA:0:8}
|
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: [push, manual]
|
||||||
|
|
||||||
docker-cannaiq:
|
docker-cannaiq:
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
@@ -93,7 +115,7 @@ steps:
|
|||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: [push, manual]
|
||||||
|
|
||||||
docker-findadispo:
|
docker-findadispo:
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
@@ -114,7 +136,7 @@ steps:
|
|||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: [push, manual]
|
||||||
|
|
||||||
docker-findagram:
|
docker-findagram:
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
@@ -135,10 +157,35 @@ steps:
|
|||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: [push, manual]
|
||||||
|
|
||||||
# ===========================================
|
# ===========================================
|
||||||
# STAGE 3: Deploy (after all Docker builds)
|
# STAGE 3: Run Database Migrations (before deploy)
|
||||||
|
# ===========================================
|
||||||
|
migrate:
|
||||||
|
image: code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8}
|
||||||
|
environment:
|
||||||
|
CANNAIQ_DB_HOST:
|
||||||
|
from_secret: db_host
|
||||||
|
CANNAIQ_DB_PORT:
|
||||||
|
from_secret: db_port
|
||||||
|
CANNAIQ_DB_NAME:
|
||||||
|
from_secret: db_name
|
||||||
|
CANNAIQ_DB_USER:
|
||||||
|
from_secret: db_user
|
||||||
|
CANNAIQ_DB_PASS:
|
||||||
|
from_secret: db_pass
|
||||||
|
commands:
|
||||||
|
- cd /app
|
||||||
|
- node dist/db/migrate.js
|
||||||
|
depends_on:
|
||||||
|
- docker-backend
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: [push, manual]
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# STAGE 4: Deploy (after migrations)
|
||||||
# ===========================================
|
# ===========================================
|
||||||
deploy:
|
deploy:
|
||||||
image: bitnami/kubectl:latest
|
image: bitnami/kubectl:latest
|
||||||
@@ -150,17 +197,17 @@ steps:
|
|||||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||||
- chmod 600 ~/.kube/config
|
- chmod 600 ~/.kube/config
|
||||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/scraper-worker scraper-worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||||
depends_on:
|
depends_on:
|
||||||
- docker-backend
|
- migrate
|
||||||
- docker-cannaiq
|
- docker-cannaiq
|
||||||
- docker-findadispo
|
- docker-findadispo
|
||||||
- docker-findagram
|
- docker-findagram
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: [push, manual]
|
||||||
213
.woodpecker/ci.yml
Normal file
213
.woodpecker/ci.yml
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
steps:
|
||||||
|
# ===========================================
|
||||||
|
# PR VALIDATION: Parallel type checks (PRs only)
|
||||||
|
# ===========================================
|
||||||
|
typecheck-backend:
|
||||||
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
|
commands:
|
||||||
|
- cd backend
|
||||||
|
- npm ci --prefer-offline
|
||||||
|
- npx tsc --noEmit
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
typecheck-cannaiq:
|
||||||
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
|
commands:
|
||||||
|
- cd cannaiq
|
||||||
|
- npm ci --prefer-offline
|
||||||
|
- npx tsc --noEmit
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
typecheck-findadispo:
|
||||||
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
|
commands:
|
||||||
|
- cd findadispo/frontend
|
||||||
|
- npm ci --prefer-offline
|
||||||
|
- npx tsc --noEmit 2>/dev/null || true
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
typecheck-findagram:
|
||||||
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
|
commands:
|
||||||
|
- cd findagram/frontend
|
||||||
|
- npm ci --prefer-offline
|
||||||
|
- npx tsc --noEmit 2>/dev/null || true
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# AUTO-MERGE: Merge PR after all checks pass
|
||||||
|
# ===========================================
|
||||||
|
auto-merge:
|
||||||
|
image: alpine:latest
|
||||||
|
environment:
|
||||||
|
GITEA_TOKEN:
|
||||||
|
from_secret: gitea_token
|
||||||
|
commands:
|
||||||
|
- apk add --no-cache curl
|
||||||
|
- |
|
||||||
|
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||||
|
curl -s -X POST \
|
||||||
|
-H "Authorization: token $GITEA_TOKEN" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"Do":"merge"}' \
|
||||||
|
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||||
|
depends_on:
|
||||||
|
- typecheck-backend
|
||||||
|
- typecheck-cannaiq
|
||||||
|
- typecheck-findadispo
|
||||||
|
- typecheck-findagram
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# MASTER DEPLOY: Parallel Docker builds
|
||||||
|
# ===========================================
|
||||||
|
docker-backend:
|
||||||
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
|
settings:
|
||||||
|
registry: code.cannabrands.app
|
||||||
|
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
||||||
|
tags:
|
||||||
|
- latest
|
||||||
|
- ${CI_COMMIT_SHA:0:8}
|
||||||
|
dockerfile: backend/Dockerfile
|
||||||
|
context: backend
|
||||||
|
username:
|
||||||
|
from_secret: registry_username
|
||||||
|
password:
|
||||||
|
from_secret: registry_password
|
||||||
|
platforms: linux/amd64
|
||||||
|
provenance: false
|
||||||
|
build_args:
|
||||||
|
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
||||||
|
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
||||||
|
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
||||||
|
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: [push, manual]
|
||||||
|
|
||||||
|
docker-cannaiq:
|
||||||
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
|
settings:
|
||||||
|
registry: code.cannabrands.app
|
||||||
|
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
||||||
|
tags:
|
||||||
|
- latest
|
||||||
|
- ${CI_COMMIT_SHA:0:8}
|
||||||
|
dockerfile: cannaiq/Dockerfile
|
||||||
|
context: cannaiq
|
||||||
|
username:
|
||||||
|
from_secret: registry_username
|
||||||
|
password:
|
||||||
|
from_secret: registry_password
|
||||||
|
platforms: linux/amd64
|
||||||
|
provenance: false
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: [push, manual]
|
||||||
|
|
||||||
|
docker-findadispo:
|
||||||
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
|
settings:
|
||||||
|
registry: code.cannabrands.app
|
||||||
|
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
||||||
|
tags:
|
||||||
|
- latest
|
||||||
|
- ${CI_COMMIT_SHA:0:8}
|
||||||
|
dockerfile: findadispo/frontend/Dockerfile
|
||||||
|
context: findadispo/frontend
|
||||||
|
username:
|
||||||
|
from_secret: registry_username
|
||||||
|
password:
|
||||||
|
from_secret: registry_password
|
||||||
|
platforms: linux/amd64
|
||||||
|
provenance: false
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: [push, manual]
|
||||||
|
|
||||||
|
docker-findagram:
|
||||||
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
|
settings:
|
||||||
|
registry: code.cannabrands.app
|
||||||
|
repo: code.cannabrands.app/creationshop/findagram-frontend
|
||||||
|
tags:
|
||||||
|
- latest
|
||||||
|
- ${CI_COMMIT_SHA:0:8}
|
||||||
|
dockerfile: findagram/frontend/Dockerfile
|
||||||
|
context: findagram/frontend
|
||||||
|
username:
|
||||||
|
from_secret: registry_username
|
||||||
|
password:
|
||||||
|
from_secret: registry_password
|
||||||
|
platforms: linux/amd64
|
||||||
|
provenance: false
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: [push, manual]
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# STAGE 3: Run Database Migrations (before deploy)
|
||||||
|
# ===========================================
|
||||||
|
migrate:
|
||||||
|
image: code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8}
|
||||||
|
environment:
|
||||||
|
CANNAIQ_DB_HOST:
|
||||||
|
from_secret: db_host
|
||||||
|
CANNAIQ_DB_PORT:
|
||||||
|
from_secret: db_port
|
||||||
|
CANNAIQ_DB_NAME:
|
||||||
|
from_secret: db_name
|
||||||
|
CANNAIQ_DB_USER:
|
||||||
|
from_secret: db_user
|
||||||
|
CANNAIQ_DB_PASS:
|
||||||
|
from_secret: db_pass
|
||||||
|
commands:
|
||||||
|
- cd /app
|
||||||
|
- node dist/db/migrate.js
|
||||||
|
depends_on:
|
||||||
|
- docker-backend
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: [push, manual]
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# STAGE 4: Deploy (after migrations)
|
||||||
|
# ===========================================
|
||||||
|
deploy:
|
||||||
|
image: bitnami/kubectl:latest
|
||||||
|
environment:
|
||||||
|
KUBECONFIG_CONTENT:
|
||||||
|
from_secret: kubeconfig_data
|
||||||
|
commands:
|
||||||
|
- mkdir -p ~/.kube
|
||||||
|
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||||
|
- chmod 600 ~/.kube/config
|
||||||
|
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
|
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
|
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
|
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
|
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
|
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||||
|
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||||
|
depends_on:
|
||||||
|
- migrate
|
||||||
|
- docker-cannaiq
|
||||||
|
- docker-findadispo
|
||||||
|
- docker-findagram
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: [push, manual]
|
||||||
156
CLAUDE.md
156
CLAUDE.md
@@ -119,7 +119,42 @@ npx tsx src/db/migrate.ts
|
|||||||
- Importing it at runtime causes startup crashes if env vars aren't perfect
|
- Importing it at runtime causes startup crashes if env vars aren't perfect
|
||||||
- `pool.ts` uses lazy initialization - only validates when first query is made
|
- `pool.ts` uses lazy initialization - only validates when first query is made
|
||||||
|
|
||||||
### 6. LOCAL DEVELOPMENT BY DEFAULT
|
### 6. ALL API ROUTES REQUIRE AUTHENTICATION — NO EXCEPTIONS
|
||||||
|
|
||||||
|
**Every API router MUST apply `authMiddleware` at the router level.**
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { authMiddleware } from '../auth/middleware';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
router.use(authMiddleware); // REQUIRED - first line after router creation
|
||||||
|
```
|
||||||
|
|
||||||
|
**Authentication flow (see `src/auth/middleware.ts`):**
|
||||||
|
1. Check Bearer token (JWT or API token) → grant access if valid
|
||||||
|
2. Check trusted origins (cannaiq.co, findadispo.com, localhost, etc.) → grant access
|
||||||
|
3. Check trusted IPs (127.0.0.1, ::1, internal pod IPs) → grant access
|
||||||
|
4. **Return 401 Unauthorized** if none of the above
|
||||||
|
|
||||||
|
**NEVER create API routes without auth middleware:**
|
||||||
|
- No "public" endpoints that bypass authentication
|
||||||
|
- No "read-only" exceptions
|
||||||
|
- No "analytics-only" exceptions
|
||||||
|
- If an endpoint exists under `/api/*`, it MUST be protected
|
||||||
|
|
||||||
|
**When creating new route files:**
|
||||||
|
1. Import `authMiddleware` from `../auth/middleware`
|
||||||
|
2. Add `router.use(authMiddleware)` immediately after creating the router
|
||||||
|
3. Document security requirements in file header comments
|
||||||
|
|
||||||
|
**Trusted origins (defined in middleware):**
|
||||||
|
- `https://cannaiq.co`
|
||||||
|
- `https://findadispo.com`
|
||||||
|
- `https://findagram.co`
|
||||||
|
- `*.cannabrands.app` domains
|
||||||
|
- `localhost:*` for development
|
||||||
|
|
||||||
|
### 7. LOCAL DEVELOPMENT BY DEFAULT
|
||||||
|
|
||||||
**Quick Start:**
|
**Quick Start:**
|
||||||
```bash
|
```bash
|
||||||
@@ -213,22 +248,23 @@ CannaiQ has **TWO databases** with distinct purposes:
|
|||||||
| Table | Purpose | Row Count |
|
| Table | Purpose | Row Count |
|
||||||
|-------|---------|-----------|
|
|-------|---------|-----------|
|
||||||
| `dispensaries` | Store/dispensary records | ~188+ rows |
|
| `dispensaries` | Store/dispensary records | ~188+ rows |
|
||||||
| `dutchie_products` | Product catalog | ~37,000+ rows |
|
| `store_products` | Product catalog | ~37,000+ rows |
|
||||||
| `dutchie_product_snapshots` | Price/stock history | ~millions |
|
| `store_product_snapshots` | Price/stock history | ~millions |
|
||||||
| `store_products` | Canonical product schema | ~37,000+ rows |
|
|
||||||
| `store_product_snapshots` | Canonical snapshot schema | growing |
|
|
||||||
|
|
||||||
**LEGACY TABLES (EMPTY - DO NOT USE):**
|
**LEGACY TABLES (EMPTY - DO NOT USE):**
|
||||||
|
|
||||||
| Table | Status | Action |
|
| Table | Status | Action |
|
||||||
|-------|--------|--------|
|
|-------|--------|--------|
|
||||||
| `stores` | EMPTY (0 rows) | Use `dispensaries` instead |
|
| `stores` | EMPTY (0 rows) | Use `dispensaries` instead |
|
||||||
| `products` | EMPTY (0 rows) | Use `dutchie_products` or `store_products` |
|
| `products` | EMPTY (0 rows) | Use `store_products` instead |
|
||||||
|
| `dutchie_products` | LEGACY (0 rows) | Use `store_products` instead |
|
||||||
|
| `dutchie_product_snapshots` | LEGACY (0 rows) | Use `store_product_snapshots` instead |
|
||||||
| `categories` | EMPTY (0 rows) | Categories stored in product records |
|
| `categories` | EMPTY (0 rows) | Categories stored in product records |
|
||||||
|
|
||||||
**Code must NEVER:**
|
**Code must NEVER:**
|
||||||
- Query the `stores` table (use `dispensaries`)
|
- Query the `stores` table (use `dispensaries`)
|
||||||
- Query the `products` table (use `dutchie_products` or `store_products`)
|
- Query the `products` table (use `store_products`)
|
||||||
|
- Query the `dutchie_products` table (use `store_products`)
|
||||||
- Query the `categories` table (categories are in product records)
|
- Query the `categories` table (categories are in product records)
|
||||||
|
|
||||||
**CRITICAL RULES:**
|
**CRITICAL RULES:**
|
||||||
@@ -343,23 +379,23 @@ npx tsx src/scripts/etl/042_legacy_import.ts
|
|||||||
- SCHEMA ONLY - no data inserts from legacy tables
|
- SCHEMA ONLY - no data inserts from legacy tables
|
||||||
|
|
||||||
**ETL Script 042** (`backend/src/scripts/etl/042_legacy_import.ts`):
|
**ETL Script 042** (`backend/src/scripts/etl/042_legacy_import.ts`):
|
||||||
- Copies data from `dutchie_products` → `store_products`
|
- Copies data from legacy `dutchie_legacy.dutchie_products` → `store_products`
|
||||||
- Copies data from `dutchie_product_snapshots` → `store_product_snapshots`
|
- Copies data from legacy `dutchie_legacy.dutchie_product_snapshots` → `store_product_snapshots`
|
||||||
- Extracts brands from product data into `brands` table
|
- Extracts brands from product data into `brands` table
|
||||||
- Links dispensaries to chains and states
|
- Links dispensaries to chains and states
|
||||||
- INSERT-ONLY and IDEMPOTENT (uses ON CONFLICT DO NOTHING)
|
- INSERT-ONLY and IDEMPOTENT (uses ON CONFLICT DO NOTHING)
|
||||||
- Run manually: `cd backend && npx tsx src/scripts/etl/042_legacy_import.ts`
|
- Run manually: `cd backend && npx tsx src/scripts/etl/042_legacy_import.ts`
|
||||||
|
|
||||||
**Tables touched by ETL:**
|
**Tables touched by ETL:**
|
||||||
| Source Table | Target Table |
|
| Source Table (dutchie_legacy) | Target Table (dutchie_menus) |
|
||||||
|--------------|--------------|
|
|-------------------------------|------------------------------|
|
||||||
| `dutchie_products` | `store_products` |
|
| `dutchie_products` | `store_products` |
|
||||||
| `dutchie_product_snapshots` | `store_product_snapshots` |
|
| `dutchie_product_snapshots` | `store_product_snapshots` |
|
||||||
| (brand names extracted) | `brands` |
|
| (brand names extracted) | `brands` |
|
||||||
| (state codes mapped) | `dispensaries.state_id` |
|
| (state codes mapped) | `dispensaries.state_id` |
|
||||||
| (chain names matched) | `dispensaries.chain_id` |
|
| (chain names matched) | `dispensaries.chain_id` |
|
||||||
|
|
||||||
**Legacy tables remain intact** - `dutchie_products` and `dutchie_product_snapshots` are not modified.
|
**Note:** The legacy `dutchie_products` and `dutchie_product_snapshots` tables in `dutchie_legacy` are read-only sources. All new crawl data goes directly to `store_products` and `store_product_snapshots`.
|
||||||
|
|
||||||
**Migration 045** (`backend/migrations/045_add_image_columns.sql`):
|
**Migration 045** (`backend/migrations/045_add_image_columns.sql`):
|
||||||
- Adds `thumbnail_url` to `store_products` and `store_product_snapshots`
|
- Adds `thumbnail_url` to `store_products` and `store_product_snapshots`
|
||||||
@@ -451,6 +487,7 @@ const result = await pool.query(`
|
|||||||
16. **Running `lsof -ti:PORT | xargs kill`** or similar process-killing commands
|
16. **Running `lsof -ti:PORT | xargs kill`** or similar process-killing commands
|
||||||
17. **Using hardcoded database names** in code or comments
|
17. **Using hardcoded database names** in code or comments
|
||||||
18. **Creating or connecting to a second database**
|
18. **Creating or connecting to a second database**
|
||||||
|
19. **Creating API routes without authMiddleware** (all `/api/*` routes MUST be protected)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -459,15 +496,66 @@ const result = await pool.query(`
|
|||||||
### Local Storage Structure
|
### Local Storage Structure
|
||||||
|
|
||||||
```
|
```
|
||||||
/storage/products/{brand}/{state}/{product_id}/
|
/storage/images/products/{state}/{store}/{brand}/{product}/
|
||||||
image-{hash}.webp
|
image-{hash}.webp
|
||||||
image-{hash}-medium.webp
|
|
||||||
image-{hash}-thumb.webp
|
|
||||||
|
|
||||||
/storage/brands/{brand}/
|
/storage/images/brands/{brand}/
|
||||||
logo-{hash}.webp
|
logo-{hash}.webp
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Image Proxy API (On-Demand Resizing)
|
||||||
|
|
||||||
|
Images are stored at full resolution and resized on-demand via the `/img` endpoint.
|
||||||
|
|
||||||
|
**Endpoint:** `GET /img/<path>?<params>`
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
| Param | Description | Example |
|
||||||
|
|-------|-------------|---------|
|
||||||
|
| `w` | Width in pixels (max 4000) | `?w=200` |
|
||||||
|
| `h` | Height in pixels (max 4000) | `?h=200` |
|
||||||
|
| `q` | Quality 1-100 (default 80) | `?q=70` |
|
||||||
|
| `fit` | Resize mode: cover, contain, fill, inside, outside | `?fit=cover` |
|
||||||
|
| `blur` | Blur sigma 0.3-1000 | `?blur=5` |
|
||||||
|
| `gray` | Grayscale (1 = enabled) | `?gray=1` |
|
||||||
|
| `format` | Output: webp, jpeg, png, avif (default webp) | `?format=jpeg` |
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```bash
|
||||||
|
# Thumbnail (50px)
|
||||||
|
GET /img/products/az/store/brand/product/image-abc123.webp?w=50
|
||||||
|
|
||||||
|
# Card image (200px, cover fit)
|
||||||
|
GET /img/products/az/store/brand/product/image-abc123.webp?w=200&h=200&fit=cover
|
||||||
|
|
||||||
|
# JPEG at 70% quality
|
||||||
|
GET /img/products/az/store/brand/product/image-abc123.webp?w=400&format=jpeg&q=70
|
||||||
|
|
||||||
|
# Grayscale blur
|
||||||
|
GET /img/products/az/store/brand/product/image-abc123.webp?w=200&gray=1&blur=3
|
||||||
|
```
|
||||||
|
|
||||||
|
**Frontend Usage:**
|
||||||
|
```typescript
|
||||||
|
import { getImageUrl, ImageSizes } from '../lib/images';
|
||||||
|
|
||||||
|
// Returns /img/products/.../image.webp?w=50 for local images
|
||||||
|
// Returns original URL for remote images (CDN, etc.)
|
||||||
|
const thumbUrl = getImageUrl(product.image_url, ImageSizes.thumb);
|
||||||
|
const cardUrl = getImageUrl(product.image_url, ImageSizes.medium);
|
||||||
|
const detailUrl = getImageUrl(product.image_url, ImageSizes.detail);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Size Presets:**
|
||||||
|
| Preset | Width | Use Case |
|
||||||
|
|--------|-------|----------|
|
||||||
|
| `thumb` | 50px | Table thumbnails |
|
||||||
|
| `small` | 100px | Small cards |
|
||||||
|
| `medium` | 200px | Grid cards |
|
||||||
|
| `large` | 400px | Large cards |
|
||||||
|
| `detail` | 600px | Product detail |
|
||||||
|
| `full` | - | No resize |
|
||||||
|
|
||||||
### Storage Adapter
|
### Storage Adapter
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
@@ -480,8 +568,9 @@ import { saveImage, getImageUrl } from '../utils/storage-adapter';
|
|||||||
|
|
||||||
| File | Purpose |
|
| File | Purpose |
|
||||||
|------|---------|
|
|------|---------|
|
||||||
| `backend/src/utils/local-storage.ts` | Local filesystem adapter |
|
| `backend/src/utils/image-storage.ts` | Image download and storage |
|
||||||
| `backend/src/utils/storage-adapter.ts` | Unified storage abstraction |
|
| `backend/src/routes/image-proxy.ts` | On-demand image resizing endpoint |
|
||||||
|
| `cannaiq/src/lib/images.ts` | Frontend image URL helper |
|
||||||
| `docker-compose.local.yml` | Local stack without MinIO |
|
| `docker-compose.local.yml` | Local stack without MinIO |
|
||||||
| `start-local.sh` | Convenience startup script |
|
| `start-local.sh` | Convenience startup script |
|
||||||
|
|
||||||
@@ -829,7 +918,7 @@ export default defineConfig({
|
|||||||
|
|
||||||
18) **Dashboard Architecture**
|
18) **Dashboard Architecture**
|
||||||
- **Frontend**: Rebuild the frontend with `VITE_API_URL` pointing to the correct backend and redeploy.
|
- **Frontend**: Rebuild the frontend with `VITE_API_URL` pointing to the correct backend and redeploy.
|
||||||
- **Backend**: `/api/dashboard/stats` MUST use the canonical DB pool. Use the correct tables: `dutchie_products`, `dispensaries`, and views like `v_dashboard_stats`, `v_latest_snapshots`.
|
- **Backend**: `/api/dashboard/stats` MUST use the canonical DB pool. Use the correct tables: `store_products`, `dispensaries`, and views like `v_dashboard_stats`, `v_latest_snapshots`.
|
||||||
|
|
||||||
19) **Deployment (Gitea + Kubernetes)**
|
19) **Deployment (Gitea + Kubernetes)**
|
||||||
- **Registry**: Gitea at `code.cannabrands.app/creationshop/dispensary-scraper`
|
- **Registry**: Gitea at `code.cannabrands.app/creationshop/dispensary-scraper`
|
||||||
@@ -1195,3 +1284,32 @@ Every analytics v2 endpoint must:
|
|||||||
---
|
---
|
||||||
|
|
||||||
# END Analytics V2 spec extension
|
# END Analytics V2 spec extension
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## WordPress Plugin Versioning
|
||||||
|
|
||||||
|
The WordPress plugin version is tracked in `wordpress-plugin/VERSION`.
|
||||||
|
|
||||||
|
**Current version:** Check `wordpress-plugin/VERSION` for the latest version.
|
||||||
|
|
||||||
|
**Versioning rules:**
|
||||||
|
- **Minor bumps (x.x.N)**: Bug fixes, small improvements - default for most changes
|
||||||
|
- **Middle bumps (x.N.0)**: New features, significant improvements
|
||||||
|
- **Major bumps (N.0.0)**: Breaking changes, major rewrites - only when user explicitly requests
|
||||||
|
|
||||||
|
**When making WP plugin changes:**
|
||||||
|
1. Read `wordpress-plugin/VERSION` to get current version
|
||||||
|
2. Bump the version number (minor by default)
|
||||||
|
3. Update both files:
|
||||||
|
- `wordpress-plugin/VERSION`
|
||||||
|
- Plugin header `Version:` in `cannaiq-menus.php` and/or `crawlsy-menus.php`
|
||||||
|
- The `define('..._VERSION', '...')` constant in each plugin file
|
||||||
|
|
||||||
|
**Plugin files:**
|
||||||
|
| File | Brand | API URL |
|
||||||
|
|------|-------|---------|
|
||||||
|
| `cannaiq-menus.php` | CannaIQ | `https://cannaiq.co/api/v1` |
|
||||||
|
| `crawlsy-menus.php` | Crawlsy (legacy) | `https://cannaiq.co/api/v1` |
|
||||||
|
|
||||||
|
Both plugins use the same API endpoint. The Crawlsy version exists for backward compatibility with existing installations.
|
||||||
|
|||||||
3
backend/.gitignore
vendored
Normal file
3
backend/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
|
||||||
|
# IP2Location database (downloaded separately)
|
||||||
|
data/ip2location/
|
||||||
@@ -1,17 +1,17 @@
|
|||||||
# Build stage
|
# Build stage
|
||||||
# Image: code.cannabrands.app/creationshop/dispensary-scraper
|
# Image: code.cannabrands.app/creationshop/dispensary-scraper
|
||||||
FROM node:20-slim AS builder
|
FROM code.cannabrands.app/creationshop/node:20-slim AS builder
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm ci
|
RUN npm install
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# Production stage
|
# Production stage
|
||||||
FROM node:20-slim
|
FROM code.cannabrands.app/creationshop/node:20-slim
|
||||||
|
|
||||||
# Build arguments for version info
|
# Build arguments for version info
|
||||||
ARG APP_BUILD_VERSION=dev
|
ARG APP_BUILD_VERSION=dev
|
||||||
@@ -25,8 +25,9 @@ ENV APP_GIT_SHA=${APP_GIT_SHA}
|
|||||||
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
||||||
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
||||||
|
|
||||||
# Install Chromium dependencies
|
# Install Chromium dependencies and curl for HTTP requests
|
||||||
RUN apt-get update && apt-get install -y \
|
RUN apt-get update && apt-get install -y \
|
||||||
|
curl \
|
||||||
chromium \
|
chromium \
|
||||||
fonts-liberation \
|
fonts-liberation \
|
||||||
libnss3 \
|
libnss3 \
|
||||||
@@ -43,10 +44,13 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm ci --omit=dev
|
RUN npm install --omit=dev
|
||||||
|
|
||||||
COPY --from=builder /app/dist ./dist
|
COPY --from=builder /app/dist ./dist
|
||||||
|
|
||||||
|
# Copy migrations for auto-migrate on startup
|
||||||
|
COPY migrations ./migrations
|
||||||
|
|
||||||
# Create local images directory for when MinIO is not configured
|
# Create local images directory for when MinIO is not configured
|
||||||
RUN mkdir -p /app/public/images/products
|
RUN mkdir -p /app/public/images/products
|
||||||
|
|
||||||
|
|||||||
394
backend/docs/BRAND_INTELLIGENCE_API.md
Normal file
394
backend/docs/BRAND_INTELLIGENCE_API.md
Normal file
@@ -0,0 +1,394 @@
|
|||||||
|
# Brand Intelligence API
|
||||||
|
|
||||||
|
## Endpoint
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/analytics/v2/brand/:name/intelligence
|
||||||
|
```
|
||||||
|
|
||||||
|
## Query Parameters
|
||||||
|
|
||||||
|
| Param | Type | Default | Description |
|
||||||
|
|-------|------|---------|-------------|
|
||||||
|
| `window` | `7d\|30d\|90d` | `30d` | Time window for trend calculations |
|
||||||
|
| `state` | string | - | Filter by state code (e.g., `AZ`) |
|
||||||
|
| `category` | string | - | Filter by category (e.g., `Flower`) |
|
||||||
|
|
||||||
|
## Response Payload Schema
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface BrandIntelligenceResult {
|
||||||
|
brand_name: string;
|
||||||
|
window: '7d' | '30d' | '90d';
|
||||||
|
generated_at: string; // ISO timestamp when data was computed
|
||||||
|
|
||||||
|
performance_snapshot: PerformanceSnapshot;
|
||||||
|
alerts: Alerts;
|
||||||
|
sku_performance: SkuPerformance[];
|
||||||
|
retail_footprint: RetailFootprint;
|
||||||
|
competitive_landscape: CompetitiveLandscape;
|
||||||
|
inventory_health: InventoryHealth;
|
||||||
|
promo_performance: PromoPerformance;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 1: Performance Snapshot
|
||||||
|
|
||||||
|
Summary cards with key brand metrics.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface PerformanceSnapshot {
|
||||||
|
active_skus: number; // Total products in catalog
|
||||||
|
total_revenue_30d: number | null; // Estimated from qty × price
|
||||||
|
total_stores: number; // Active retail partners
|
||||||
|
new_stores_30d: number; // New distribution in window
|
||||||
|
market_share: number | null; // % of category SKUs
|
||||||
|
avg_wholesale_price: number | null;
|
||||||
|
price_position: 'premium' | 'value' | 'competitive';
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label | Helper Text |
|
||||||
|
|-------|-------------------|-------------|
|
||||||
|
| `active_skus` | Active Products | X total in catalog |
|
||||||
|
| `total_revenue_30d` | Monthly Revenue | Estimated from sales |
|
||||||
|
| `total_stores` | Retail Distribution | Active retail partners |
|
||||||
|
| `new_stores_30d` | New Opportunities | X new in last 30 days |
|
||||||
|
| `market_share` | Category Position | % of category |
|
||||||
|
| `avg_wholesale_price` | Avg Wholesale | Per unit |
|
||||||
|
| `price_position` | Pricing Tier | Premium/Value/Market Rate |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 2: Alerts
|
||||||
|
|
||||||
|
Issues requiring attention.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface Alerts {
|
||||||
|
lost_stores_30d_count: number;
|
||||||
|
lost_skus_30d_count: number;
|
||||||
|
competitor_takeover_count: number;
|
||||||
|
avg_oos_duration_days: number | null;
|
||||||
|
avg_reorder_lag_days: number | null;
|
||||||
|
items: AlertItem[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface AlertItem {
|
||||||
|
type: 'lost_store' | 'delisted_sku' | 'shelf_loss' | 'extended_oos';
|
||||||
|
severity: 'critical' | 'warning';
|
||||||
|
store_name?: string;
|
||||||
|
product_name?: string;
|
||||||
|
competitor_brand?: string;
|
||||||
|
days_since?: number;
|
||||||
|
state_code?: string;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `lost_stores_30d_count` | Accounts at Risk |
|
||||||
|
| `lost_skus_30d_count` | Delisted SKUs |
|
||||||
|
| `competitor_takeover_count` | Shelf Losses |
|
||||||
|
| `avg_oos_duration_days` | Avg Stockout Length |
|
||||||
|
| `avg_reorder_lag_days` | Avg Restock Time |
|
||||||
|
| `severity: critical` | Urgent |
|
||||||
|
| `severity: warning` | Watch |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 3: SKU Performance (Product Velocity)
|
||||||
|
|
||||||
|
How fast each SKU sells.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface SkuPerformance {
|
||||||
|
store_product_id: number;
|
||||||
|
product_name: string;
|
||||||
|
category: string | null;
|
||||||
|
daily_velocity: number; // Units/day estimate
|
||||||
|
velocity_status: 'hot' | 'steady' | 'slow' | 'stale';
|
||||||
|
retail_price: number | null;
|
||||||
|
on_sale: boolean;
|
||||||
|
stores_carrying: number;
|
||||||
|
stock_status: 'in_stock' | 'low_stock' | 'out_of_stock';
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `daily_velocity` | Daily Rate |
|
||||||
|
| `velocity_status` | Momentum |
|
||||||
|
| `velocity_status: hot` | Hot |
|
||||||
|
| `velocity_status: steady` | Steady |
|
||||||
|
| `velocity_status: slow` | Slow |
|
||||||
|
| `velocity_status: stale` | Stale |
|
||||||
|
| `retail_price` | Retail Price |
|
||||||
|
| `on_sale` | Promo (badge) |
|
||||||
|
|
||||||
|
**Velocity Thresholds:**
|
||||||
|
- `hot`: >= 5 units/day
|
||||||
|
- `steady`: >= 1 unit/day
|
||||||
|
- `slow`: >= 0.1 units/day
|
||||||
|
- `stale`: < 0.1 units/day
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 4: Retail Footprint
|
||||||
|
|
||||||
|
Store placement and coverage.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface RetailFootprint {
|
||||||
|
total_stores: number;
|
||||||
|
in_stock_count: number;
|
||||||
|
out_of_stock_count: number;
|
||||||
|
penetration_by_region: RegionPenetration[];
|
||||||
|
whitespace_stores: WhitespaceStore[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface RegionPenetration {
|
||||||
|
state_code: string;
|
||||||
|
store_count: number;
|
||||||
|
percent_reached: number; // % of state's dispensaries
|
||||||
|
in_stock: number;
|
||||||
|
out_of_stock: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface WhitespaceStore {
|
||||||
|
store_id: number;
|
||||||
|
store_name: string;
|
||||||
|
state_code: string;
|
||||||
|
city: string | null;
|
||||||
|
category_fit: number; // How many competing brands they carry
|
||||||
|
competitor_brands: string[];
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `penetration_by_region` | Market Coverage by Region |
|
||||||
|
| `percent_reached` | X% reached |
|
||||||
|
| `in_stock` | X stocked |
|
||||||
|
| `out_of_stock` | X out |
|
||||||
|
| `whitespace_stores` | Expansion Opportunities |
|
||||||
|
| `category_fit` | X fit |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 5: Competitive Landscape
|
||||||
|
|
||||||
|
Market positioning vs competitors.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface CompetitiveLandscape {
|
||||||
|
brand_price_position: 'premium' | 'value' | 'competitive';
|
||||||
|
market_share_trend: MarketSharePoint[];
|
||||||
|
competitors: Competitor[];
|
||||||
|
head_to_head_skus: HeadToHead[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MarketSharePoint {
|
||||||
|
date: string;
|
||||||
|
share_percent: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Competitor {
|
||||||
|
brand_name: string;
|
||||||
|
store_overlap_percent: number;
|
||||||
|
price_position: 'premium' | 'value' | 'competitive';
|
||||||
|
avg_price: number | null;
|
||||||
|
sku_count: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface HeadToHead {
|
||||||
|
product_name: string;
|
||||||
|
brand_price: number;
|
||||||
|
competitor_brand: string;
|
||||||
|
competitor_price: number;
|
||||||
|
price_diff_percent: number;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `price_position: premium` | Premium Tier |
|
||||||
|
| `price_position: value` | Value Leader |
|
||||||
|
| `price_position: competitive` | Market Rate |
|
||||||
|
| `market_share_trend` | Share of Shelf Trend |
|
||||||
|
| `head_to_head_skus` | Price Comparison |
|
||||||
|
| `store_overlap_percent` | X% store overlap |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 6: Inventory Health
|
||||||
|
|
||||||
|
Stock projections and risk levels.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface InventoryHealth {
|
||||||
|
critical_count: number; // <7 days stock
|
||||||
|
warning_count: number; // 7-14 days stock
|
||||||
|
healthy_count: number; // 14-90 days stock
|
||||||
|
overstocked_count: number; // >90 days stock
|
||||||
|
skus: InventorySku[];
|
||||||
|
overstock_alert: OverstockItem[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface InventorySku {
|
||||||
|
store_product_id: number;
|
||||||
|
product_name: string;
|
||||||
|
store_name: string;
|
||||||
|
days_of_stock: number | null;
|
||||||
|
risk_level: 'critical' | 'elevated' | 'moderate' | 'healthy';
|
||||||
|
current_quantity: number | null;
|
||||||
|
daily_sell_rate: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OverstockItem {
|
||||||
|
product_name: string;
|
||||||
|
store_name: string;
|
||||||
|
excess_units: number;
|
||||||
|
days_of_stock: number;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `risk_level: critical` | Reorder Now |
|
||||||
|
| `risk_level: elevated` | Low Stock |
|
||||||
|
| `risk_level: moderate` | Monitor |
|
||||||
|
| `risk_level: healthy` | Healthy |
|
||||||
|
| `critical_count` | Urgent (<7 days) |
|
||||||
|
| `warning_count` | Low (7-14 days) |
|
||||||
|
| `overstocked_count` | Excess (>90 days) |
|
||||||
|
| `days_of_stock` | X days remaining |
|
||||||
|
| `overstock_alert` | Overstock Alert |
|
||||||
|
| `excess_units` | X excess units |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 7: Promotion Effectiveness
|
||||||
|
|
||||||
|
How promotions impact sales.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface PromoPerformance {
|
||||||
|
avg_baseline_velocity: number | null;
|
||||||
|
avg_promo_velocity: number | null;
|
||||||
|
avg_velocity_lift: number | null; // % increase during promo
|
||||||
|
avg_efficiency_score: number | null; // ROI proxy
|
||||||
|
promotions: Promotion[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Promotion {
|
||||||
|
product_name: string;
|
||||||
|
store_name: string;
|
||||||
|
status: 'active' | 'scheduled' | 'ended';
|
||||||
|
start_date: string;
|
||||||
|
end_date: string | null;
|
||||||
|
regular_price: number;
|
||||||
|
promo_price: number;
|
||||||
|
discount_percent: number;
|
||||||
|
baseline_velocity: number | null;
|
||||||
|
promo_velocity: number | null;
|
||||||
|
velocity_lift: number | null;
|
||||||
|
efficiency_score: number | null;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `avg_baseline_velocity` | Normal Rate |
|
||||||
|
| `avg_promo_velocity` | During Promos |
|
||||||
|
| `avg_velocity_lift` | Avg Sales Lift |
|
||||||
|
| `avg_efficiency_score` | ROI Score |
|
||||||
|
| `velocity_lift` | Sales Lift |
|
||||||
|
| `efficiency_score` | ROI Score |
|
||||||
|
| `status: active` | Live |
|
||||||
|
| `status: scheduled` | Scheduled |
|
||||||
|
| `status: ended` | Ended |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Example Queries
|
||||||
|
|
||||||
|
### Get full payload
|
||||||
|
```javascript
|
||||||
|
const response = await fetch('/api/analytics/v2/brand/Wyld/intelligence?window=30d');
|
||||||
|
const data = await response.json();
|
||||||
|
```
|
||||||
|
|
||||||
|
### Extract summary cards (flattened)
|
||||||
|
```javascript
|
||||||
|
const { performance_snapshot: ps, alerts } = data;
|
||||||
|
|
||||||
|
const summaryCards = {
|
||||||
|
activeProducts: ps.active_skus,
|
||||||
|
monthlyRevenue: ps.total_revenue_30d,
|
||||||
|
retailDistribution: ps.total_stores,
|
||||||
|
newOpportunities: ps.new_stores_30d,
|
||||||
|
categoryPosition: ps.market_share,
|
||||||
|
avgWholesale: ps.avg_wholesale_price,
|
||||||
|
pricingTier: ps.price_position,
|
||||||
|
accountsAtRisk: alerts.lost_stores_30d_count,
|
||||||
|
delistedSkus: alerts.lost_skus_30d_count,
|
||||||
|
shelfLosses: alerts.competitor_takeover_count,
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get top 10 fastest selling SKUs
|
||||||
|
```javascript
|
||||||
|
const topSkus = data.sku_performance
|
||||||
|
.filter(sku => sku.velocity_status === 'hot' || sku.velocity_status === 'steady')
|
||||||
|
.sort((a, b) => b.daily_velocity - a.daily_velocity)
|
||||||
|
.slice(0, 10);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get critical inventory alerts only
|
||||||
|
```javascript
|
||||||
|
const criticalInventory = data.inventory_health.skus
|
||||||
|
.filter(sku => sku.risk_level === 'critical');
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get states with <50% penetration
|
||||||
|
```javascript
|
||||||
|
const underPenetrated = data.retail_footprint.penetration_by_region
|
||||||
|
.filter(region => region.percent_reached < 50)
|
||||||
|
.sort((a, b) => a.percent_reached - b.percent_reached);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get active promotions with positive lift
|
||||||
|
```javascript
|
||||||
|
const effectivePromos = data.promo_performance.promotions
|
||||||
|
.filter(p => p.status === 'active' && p.velocity_lift > 0)
|
||||||
|
.sort((a, b) => b.velocity_lift - a.velocity_lift);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build chart data for market share trend
|
||||||
|
```javascript
|
||||||
|
const chartData = data.competitive_landscape.market_share_trend.map(point => ({
|
||||||
|
x: new Date(point.date),
|
||||||
|
y: point.share_percent,
|
||||||
|
}));
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Notes for Frontend Implementation
|
||||||
|
|
||||||
|
1. **All fields are snake_case** - transform to camelCase if needed
|
||||||
|
2. **Null values are possible** - handle gracefully in UI
|
||||||
|
3. **Arrays may be empty** - show appropriate empty states
|
||||||
|
4. **Timestamps are ISO format** - parse with `new Date()`
|
||||||
|
5. **Percentages are already computed** - no need to multiply by 100
|
||||||
|
6. **The `window` parameter affects trend calculations** - 7d/30d/90d
|
||||||
539
backend/docs/CRAWL_PIPELINE.md
Normal file
539
backend/docs/CRAWL_PIPELINE.md
Normal file
@@ -0,0 +1,539 @@
|
|||||||
|
# Crawl Pipeline Documentation
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The crawl pipeline fetches product data from Dutchie dispensary menus and stores it in the canonical database. This document covers the complete flow from task scheduling to data storage.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Pipeline Stages
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ store_discovery │ Find new dispensaries
|
||||||
|
└─────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ entry_point_discovery│ Resolve slug → platform_dispensary_id
|
||||||
|
└─────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ product_discovery │ Initial product crawl
|
||||||
|
└─────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ product_resync │ Recurring crawl (every 4 hours)
|
||||||
|
└─────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Stage Details
|
||||||
|
|
||||||
|
### 1. Store Discovery
|
||||||
|
**Purpose:** Find new dispensaries to crawl
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/store-discovery.ts`
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
1. Query Dutchie `ConsumerDispensaries` GraphQL for cities/states
|
||||||
|
2. Extract dispensary info (name, address, menu_url)
|
||||||
|
3. Insert into `dutchie_discovery_locations`
|
||||||
|
4. Queue `entry_point_discovery` for each new location
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. Entry Point Discovery
|
||||||
|
**Purpose:** Resolve menu URL slug to platform_dispensary_id (MongoDB ObjectId)
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/entry-point-discovery.ts`
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
1. Load dispensary from database
|
||||||
|
2. Extract slug from `menu_url`:
|
||||||
|
- `/embedded-menu/<slug>` or `/dispensary/<slug>`
|
||||||
|
3. Start stealth session (fingerprint + proxy)
|
||||||
|
4. Query `resolveDispensaryIdWithDetails(slug)` via GraphQL
|
||||||
|
5. Update dispensary with `platform_dispensary_id`
|
||||||
|
6. Queue `product_discovery` task
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```
|
||||||
|
menu_url: https://dutchie.com/embedded-menu/deeply-rooted
|
||||||
|
slug: deeply-rooted
|
||||||
|
platform_dispensary_id: 6405ef617056e8014d79101b
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. Product Discovery
|
||||||
|
**Purpose:** Initial crawl of a new dispensary
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/product-discovery.ts`
|
||||||
|
|
||||||
|
Same as product_resync but for first-time crawls.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 4. Product Resync
|
||||||
|
**Purpose:** Recurring crawl to capture price/stock changes
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/product-resync.ts`
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
|
||||||
|
#### Step 1: Load Dispensary Info
|
||||||
|
```sql
|
||||||
|
SELECT id, name, platform_dispensary_id, menu_url, state
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1 AND crawl_enabled = true
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Step 2: Start Stealth Session
|
||||||
|
- Generate random browser fingerprint
|
||||||
|
- Set locale/timezone matching state
|
||||||
|
- Optional proxy rotation
|
||||||
|
|
||||||
|
#### Step 3: Fetch Products via GraphQL
|
||||||
|
**Endpoint:** `https://dutchie.com/api-3/graphql`
|
||||||
|
|
||||||
|
**Variables:**
|
||||||
|
```javascript
|
||||||
|
{
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId: "<platform_dispensary_id>",
|
||||||
|
pricingType: "rec",
|
||||||
|
Status: "All",
|
||||||
|
types: [],
|
||||||
|
useCache: false,
|
||||||
|
isDefaultSort: true,
|
||||||
|
sortBy: "popularSortIdx",
|
||||||
|
sortDirection: 1,
|
||||||
|
bypassOnlineThresholds: true,
|
||||||
|
isKioskMenu: false,
|
||||||
|
removeProductsBelowOptionThresholds: false
|
||||||
|
},
|
||||||
|
page: 0,
|
||||||
|
perPage: 100
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key Notes:**
|
||||||
|
- `Status: "All"` returns all products (Active returns same count)
|
||||||
|
- `Status: null` returns 0 products (broken)
|
||||||
|
- `pricingType: "rec"` returns BOTH rec and med prices
|
||||||
|
- Paginate until `products.length < perPage` or `allProducts.length >= totalCount`
|
||||||
|
|
||||||
|
#### Step 4: Normalize Data
|
||||||
|
Transform raw Dutchie payload to canonical format via `DutchieNormalizer`.
|
||||||
|
|
||||||
|
#### Step 5: Upsert Products
|
||||||
|
Insert/update `store_products` table with normalized data.
|
||||||
|
|
||||||
|
#### Step 6: Create Snapshots
|
||||||
|
Insert point-in-time record to `store_product_snapshots`.
|
||||||
|
|
||||||
|
#### Step 7: Track Missing Products (OOS Detection)
|
||||||
|
```sql
|
||||||
|
-- Reset consecutive_misses for products IN the feed
|
||||||
|
UPDATE store_products
|
||||||
|
SET consecutive_misses = 0, last_seen_at = NOW()
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND provider = 'dutchie'
|
||||||
|
AND provider_product_id = ANY($2)
|
||||||
|
|
||||||
|
-- Increment for products NOT in feed
|
||||||
|
UPDATE store_products
|
||||||
|
SET consecutive_misses = consecutive_misses + 1
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND provider = 'dutchie'
|
||||||
|
AND provider_product_id NOT IN (...)
|
||||||
|
AND consecutive_misses < 3
|
||||||
|
|
||||||
|
-- Mark OOS at 3 consecutive misses
|
||||||
|
UPDATE store_products
|
||||||
|
SET stock_status = 'oos', is_in_stock = false
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND consecutive_misses >= 3
|
||||||
|
AND stock_status != 'oos'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Step 8: Download Images
|
||||||
|
For new products, download and store images locally.
|
||||||
|
|
||||||
|
#### Step 9: Update Dispensary
|
||||||
|
```sql
|
||||||
|
UPDATE dispensaries SET last_crawl_at = NOW() WHERE id = $1
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## GraphQL Payload Structure
|
||||||
|
|
||||||
|
### Product Fields (from filteredProducts.products[])
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `_id` / `id` | string | MongoDB ObjectId (24 hex chars) |
|
||||||
|
| `Name` | string | Product display name |
|
||||||
|
| `brandName` | string | Brand name |
|
||||||
|
| `brand.name` | string | Brand name (nested) |
|
||||||
|
| `brand.description` | string | Brand description |
|
||||||
|
| `type` | string | Category (Flower, Edible, Concentrate, etc.) |
|
||||||
|
| `subcategory` | string | Subcategory |
|
||||||
|
| `strainType` | string | Hybrid, Indica, Sativa, N/A |
|
||||||
|
| `Status` | string | Always "Active" in feed |
|
||||||
|
| `Image` | string | Primary image URL |
|
||||||
|
| `images[]` | array | All product images |
|
||||||
|
|
||||||
|
### Pricing Fields
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `Prices[]` | number[] | Rec prices per option |
|
||||||
|
| `recPrices[]` | number[] | Rec prices |
|
||||||
|
| `medicalPrices[]` | number[] | Medical prices |
|
||||||
|
| `recSpecialPrices[]` | number[] | Rec sale prices |
|
||||||
|
| `medicalSpecialPrices[]` | number[] | Medical sale prices |
|
||||||
|
| `Options[]` | string[] | Size options ("1/8oz", "1g", etc.) |
|
||||||
|
| `rawOptions[]` | string[] | Raw weight options ("3.5g") |
|
||||||
|
|
||||||
|
### Inventory Fields (POSMetaData.children[])
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `quantity` | number | Total inventory count |
|
||||||
|
| `quantityAvailable` | number | Available for online orders |
|
||||||
|
| `kioskQuantityAvailable` | number | Available for kiosk orders |
|
||||||
|
| `option` | string | Which size option this is for |
|
||||||
|
|
||||||
|
### Potency Fields
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `THCContent.range[]` | number[] | THC percentage |
|
||||||
|
| `CBDContent.range[]` | number[] | CBD percentage |
|
||||||
|
| `cannabinoidsV2[]` | array | Detailed cannabinoid breakdown |
|
||||||
|
|
||||||
|
### Specials (specialData.bogoSpecials[])
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `specialName` | string | Deal name |
|
||||||
|
| `specialType` | string | "bogo", "sale", etc. |
|
||||||
|
| `itemsForAPrice.value` | string | Bundle price |
|
||||||
|
| `bogoRewards[].totalQuantity.quantity` | number | Required quantity |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## OOS Detection Logic
|
||||||
|
|
||||||
|
Products disappear from the Dutchie feed when they go out of stock. We track this via `consecutive_misses`:
|
||||||
|
|
||||||
|
| Scenario | Action |
|
||||||
|
|----------|--------|
|
||||||
|
| Product in feed | `consecutive_misses = 0` |
|
||||||
|
| Product missing 1st time | `consecutive_misses = 1` |
|
||||||
|
| Product missing 2nd time | `consecutive_misses = 2` |
|
||||||
|
| Product missing 3rd time | `consecutive_misses = 3`, mark `stock_status = 'oos'` |
|
||||||
|
| Product returns to feed | `consecutive_misses = 0`, update stock_status |
|
||||||
|
|
||||||
|
**Why 3 misses?**
|
||||||
|
- Protects against false positives from crawl failures
|
||||||
|
- Single bad crawl doesn't trigger mass OOS alerts
|
||||||
|
- Balances detection speed vs accuracy
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Database Tables
|
||||||
|
|
||||||
|
### store_products
|
||||||
|
Current state of each product:
|
||||||
|
- `provider_product_id` - Dutchie's MongoDB ObjectId
|
||||||
|
- `name_raw`, `brand_name_raw` - Raw values from feed
|
||||||
|
- `price_rec`, `price_med` - Current prices
|
||||||
|
- `is_in_stock`, `stock_status` - Availability
|
||||||
|
- `consecutive_misses` - OOS detection counter
|
||||||
|
- `last_seen_at` - Last time product was in feed
|
||||||
|
|
||||||
|
### store_product_snapshots
|
||||||
|
Point-in-time records for historical analysis:
|
||||||
|
- One row per product per crawl
|
||||||
|
- Captures price, stock, potency at that moment
|
||||||
|
- Used for price history, analytics
|
||||||
|
|
||||||
|
### dispensaries
|
||||||
|
Store metadata:
|
||||||
|
- `platform_dispensary_id` - MongoDB ObjectId for GraphQL
|
||||||
|
- `menu_url` - Source URL
|
||||||
|
- `last_crawl_at` - Last successful crawl
|
||||||
|
- `crawl_enabled` - Whether to crawl
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Worker Roles
|
||||||
|
|
||||||
|
Workers pull tasks from the `worker_tasks` queue based on their assigned role.
|
||||||
|
|
||||||
|
| Role | Name | Description | Handler |
|
||||||
|
|------|------|-------------|---------|
|
||||||
|
| `product_resync` | Product Resync | Re-crawl dispensary products for price/stock changes | `handleProductResync` |
|
||||||
|
| `product_discovery` | Product Discovery | Initial product discovery for new dispensaries | `handleProductDiscovery` |
|
||||||
|
| `store_discovery` | Store Discovery | Discover new dispensary locations | `handleStoreDiscovery` |
|
||||||
|
| `entry_point_discovery` | Entry Point Discovery | Resolve platform IDs from menu URLs | `handleEntryPointDiscovery` |
|
||||||
|
| `analytics_refresh` | Analytics Refresh | Refresh materialized views and analytics | `handleAnalyticsRefresh` |
|
||||||
|
|
||||||
|
**API Endpoint:** `GET /api/worker-registry/roles`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Scheduling
|
||||||
|
|
||||||
|
Crawls are scheduled via `worker_tasks` table:
|
||||||
|
|
||||||
|
| Role | Frequency | Description |
|
||||||
|
|------|-----------|-------------|
|
||||||
|
| `product_resync` | Every 4 hours | Regular product refresh |
|
||||||
|
| `product_discovery` | On-demand | First crawl for new stores |
|
||||||
|
| `entry_point_discovery` | On-demand | New store setup |
|
||||||
|
| `store_discovery` | Daily | Find new stores |
|
||||||
|
| `analytics_refresh` | Daily | Refresh analytics materialized views |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Priority & On-Demand Tasks
|
||||||
|
|
||||||
|
Tasks are claimed by workers in order of **priority DESC, created_at ASC**.
|
||||||
|
|
||||||
|
### Priority Levels
|
||||||
|
|
||||||
|
| Priority | Use Case | Example |
|
||||||
|
|----------|----------|---------|
|
||||||
|
| 0 | Scheduled/batch tasks | Daily product_resync generation |
|
||||||
|
| 10 | On-demand/chained tasks | entry_point → product_discovery |
|
||||||
|
| Higher | Urgent/manual triggers | Admin-triggered immediate crawl |
|
||||||
|
|
||||||
|
### Task Chaining
|
||||||
|
|
||||||
|
When a task completes, the system automatically creates follow-up tasks:
|
||||||
|
|
||||||
|
```
|
||||||
|
store_discovery (completed)
|
||||||
|
└─► entry_point_discovery (priority: 10) for each new store
|
||||||
|
|
||||||
|
entry_point_discovery (completed, success)
|
||||||
|
└─► product_discovery (priority: 10) for that store
|
||||||
|
|
||||||
|
product_discovery (completed)
|
||||||
|
└─► [no chain] Store enters regular resync schedule
|
||||||
|
```
|
||||||
|
|
||||||
|
### On-Demand Task Creation
|
||||||
|
|
||||||
|
Use the task service to create high-priority tasks:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Create immediate product resync for a store
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_resync',
|
||||||
|
dispensary_id: 123,
|
||||||
|
platform: 'dutchie',
|
||||||
|
priority: 20, // Higher than batch tasks
|
||||||
|
});
|
||||||
|
|
||||||
|
// Convenience methods with default high priority (10)
|
||||||
|
await taskService.createEntryPointTask(dispensaryId, 'dutchie');
|
||||||
|
await taskService.createProductDiscoveryTask(dispensaryId, 'dutchie');
|
||||||
|
await taskService.createStoreDiscoveryTask('dutchie', 'AZ');
|
||||||
|
```
|
||||||
|
|
||||||
|
### Claim Function
|
||||||
|
|
||||||
|
The `claim_task()` SQL function atomically claims tasks:
|
||||||
|
- Respects priority ordering (higher = first)
|
||||||
|
- Uses `FOR UPDATE SKIP LOCKED` for concurrency
|
||||||
|
- Prevents multiple active tasks per store
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Image Storage
|
||||||
|
|
||||||
|
Images are downloaded from Dutchie's AWS S3 and stored locally with on-demand resizing.
|
||||||
|
|
||||||
|
### Storage Path
|
||||||
|
```
|
||||||
|
/storage/images/products/<state>/<store>/<brand>/<product_id>/image-<hash>.webp
|
||||||
|
/storage/images/brands/<brand>/logo-<hash>.webp
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```
|
||||||
|
/storage/images/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp
|
||||||
|
```
|
||||||
|
|
||||||
|
### Image Proxy API
|
||||||
|
Served via `/img/*` with on-demand resizing using **sharp**:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /img/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp?w=200
|
||||||
|
```
|
||||||
|
|
||||||
|
| Param | Description |
|
||||||
|
|-------|-------------|
|
||||||
|
| `w` | Width in pixels (max 4000) |
|
||||||
|
| `h` | Height in pixels (max 4000) |
|
||||||
|
| `q` | Quality 1-100 (default 80) |
|
||||||
|
| `fit` | cover, contain, fill, inside, outside |
|
||||||
|
| `blur` | Blur sigma (0.3-1000) |
|
||||||
|
| `gray` | Grayscale (1 = enabled) |
|
||||||
|
| `format` | webp, jpeg, png, avif (default webp) |
|
||||||
|
|
||||||
|
### Key Files
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/utils/image-storage.ts` | Download & save images to local filesystem |
|
||||||
|
| `src/routes/image-proxy.ts` | On-demand resize/transform at `/img/*` |
|
||||||
|
|
||||||
|
### Download Rules
|
||||||
|
|
||||||
|
| Scenario | Image Action |
|
||||||
|
|----------|--------------|
|
||||||
|
| **New product (first crawl)** | Download if `primaryImageUrl` exists |
|
||||||
|
| **Existing product (refresh)** | Download only if `local_image_path` is NULL (backfill) |
|
||||||
|
| **Product already has local image** | Skip download entirely |
|
||||||
|
|
||||||
|
**Logic:**
|
||||||
|
- Images are downloaded **once** and never re-downloaded on subsequent crawls
|
||||||
|
- `skipIfExists: true` - filesystem check prevents re-download even if queued
|
||||||
|
- First crawl: all products get images
|
||||||
|
- Refresh crawl: only new products or products missing local images
|
||||||
|
|
||||||
|
### Storage Rules
|
||||||
|
- **NO MinIO** - local filesystem only (`STORAGE_DRIVER=local`)
|
||||||
|
- Store full resolution, resize on-demand via `/img` proxy
|
||||||
|
- Convert to webp for consistency using **sharp**
|
||||||
|
- Preserve original Dutchie URL as fallback in `image_url` column
|
||||||
|
- Local path stored in `local_image_path` column
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Stealth & Anti-Detection
|
||||||
|
|
||||||
|
**PROXIES ARE REQUIRED** - Workers will fail to start if no active proxies are available in the database. All HTTP requests to Dutchie go through a proxy.
|
||||||
|
|
||||||
|
Workers automatically initialize anti-detection systems on startup.
|
||||||
|
|
||||||
|
### Components
|
||||||
|
|
||||||
|
| Component | Purpose | Source |
|
||||||
|
|-----------|---------|--------|
|
||||||
|
| **CrawlRotator** | Coordinates proxy + UA rotation | `src/services/crawl-rotator.ts` |
|
||||||
|
| **ProxyRotator** | Round-robin proxy selection, health tracking | `src/services/crawl-rotator.ts` |
|
||||||
|
| **UserAgentRotator** | Cycles through realistic browser fingerprints | `src/services/crawl-rotator.ts` |
|
||||||
|
| **Dutchie Client** | Curl-based HTTP with auto-retry on 403 | `src/platforms/dutchie/client.ts` |
|
||||||
|
|
||||||
|
### Initialization Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Worker Start
|
||||||
|
│
|
||||||
|
├─► initializeStealth()
|
||||||
|
│ │
|
||||||
|
│ ├─► CrawlRotator.initialize()
|
||||||
|
│ │ └─► Load proxies from `proxies` table
|
||||||
|
│ │
|
||||||
|
│ └─► setCrawlRotator(rotator)
|
||||||
|
│ └─► Wire to Dutchie client
|
||||||
|
│
|
||||||
|
└─► Process tasks...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stealth Session (per task)
|
||||||
|
|
||||||
|
Each crawl task starts a stealth session:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// In product-refresh.ts, entry-point-discovery.ts
|
||||||
|
const session = startSession(dispensary.state || 'AZ', 'America/Phoenix');
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates a new identity with:
|
||||||
|
- **Random fingerprint:** Chrome/Firefox/Safari/Edge on Win/Mac/Linux
|
||||||
|
- **Accept-Language:** Matches timezone (e.g., `America/Phoenix` → `en-US,en;q=0.9`)
|
||||||
|
- **sec-ch-ua headers:** Proper Client Hints for the browser profile
|
||||||
|
|
||||||
|
### On 403 Block
|
||||||
|
|
||||||
|
When Dutchie returns 403, the client automatically:
|
||||||
|
|
||||||
|
1. Records failure on current proxy (increments `failure_count`)
|
||||||
|
2. If proxy has 5+ failures, deactivates it
|
||||||
|
3. Rotates to next healthy proxy
|
||||||
|
4. Rotates fingerprint
|
||||||
|
5. Retries the request
|
||||||
|
|
||||||
|
### Proxy Table Schema
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE proxies (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
host VARCHAR(255) NOT NULL,
|
||||||
|
port INTEGER NOT NULL,
|
||||||
|
username VARCHAR(100),
|
||||||
|
password VARCHAR(100),
|
||||||
|
protocol VARCHAR(10) DEFAULT 'http', -- http, https, socks5
|
||||||
|
is_active BOOLEAN DEFAULT true,
|
||||||
|
last_used_at TIMESTAMPTZ,
|
||||||
|
failure_count INTEGER DEFAULT 0,
|
||||||
|
success_count INTEGER DEFAULT 0,
|
||||||
|
avg_response_time_ms INTEGER,
|
||||||
|
last_failure_at TIMESTAMPTZ,
|
||||||
|
last_error TEXT
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
Proxies are mandatory. There is no environment variable to disable them. Workers will refuse to start without active proxies in the database.
|
||||||
|
|
||||||
|
### User-Agent Generation
|
||||||
|
|
||||||
|
See `workflow-12102025.md` for full specification.
|
||||||
|
|
||||||
|
**Summary:**
|
||||||
|
- Uses `intoli/user-agents` library (daily-updated market share data)
|
||||||
|
- Device distribution: Mobile 62%, Desktop 36%, Tablet 2%
|
||||||
|
- Browser whitelist: Chrome, Safari, Edge, Firefox only
|
||||||
|
- UA sticks until IP rotates (403 or manual rotation)
|
||||||
|
- Failure = alert admin + stop crawl (no fallback)
|
||||||
|
|
||||||
|
Each fingerprint includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
- **GraphQL errors:** Logged, task marked failed, retried later
|
||||||
|
- **Normalization errors:** Logged as warnings, continue with valid products
|
||||||
|
- **Image download errors:** Non-fatal, logged, continue
|
||||||
|
- **Database errors:** Task fails, will be retried
|
||||||
|
- **403 blocks:** Auto-rotate proxy + fingerprint, retry (up to 3 retries)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/tasks/handlers/product-resync.ts` | Main crawl handler |
|
||||||
|
| `src/tasks/handlers/entry-point-discovery.ts` | Slug → ID resolution |
|
||||||
|
| `src/platforms/dutchie/index.ts` | GraphQL client, session management |
|
||||||
|
| `src/hydration/normalizers/dutchie.ts` | Payload normalization |
|
||||||
|
| `src/hydration/canonical-upsert.ts` | Database upsert logic |
|
||||||
|
| `src/utils/image-storage.ts` | Image download and local storage |
|
||||||
|
| `src/routes/image-proxy.ts` | On-demand image resizing |
|
||||||
|
| `migrations/075_consecutive_misses.sql` | OOS tracking column |
|
||||||
584
backend/docs/TASK_WORKFLOW_2024-12-10.md
Normal file
584
backend/docs/TASK_WORKFLOW_2024-12-10.md
Normal file
@@ -0,0 +1,584 @@
|
|||||||
|
# Task Workflow Documentation
|
||||||
|
**Date: 2024-12-10**
|
||||||
|
|
||||||
|
This document describes the complete task/job processing architecture after the 2024-12-10 rewrite.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Complete Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ KUBERNETES CLUSTER │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ API SERVER POD (scraper) │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌──────────────────┐ ┌────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ Express API │ │ TaskScheduler │ │ │
|
||||||
|
│ │ │ │ │ (src/services/task-scheduler.ts) │ │ │
|
||||||
|
│ │ │ /api/job-queue │ │ │ │ │
|
||||||
|
│ │ │ /api/tasks │ │ • Polls every 60s │ │ │
|
||||||
|
│ │ │ /api/schedules │ │ • Checks task_schedules table │ │ │
|
||||||
|
│ │ └────────┬─────────┘ │ • SELECT FOR UPDATE SKIP LOCKED │ │ │
|
||||||
|
│ │ │ │ • Generates tasks when due │ │ │
|
||||||
|
│ │ │ └──────────────────┬─────────────────────┘ │ │
|
||||||
|
│ │ │ │ │ │
|
||||||
|
│ └────────────┼──────────────────────────────────┼──────────────────────────┘ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌────────────────────────┘ │
|
||||||
|
│ │ │ │
|
||||||
|
│ ▼ ▼ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ POSTGRESQL DATABASE │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌─────────────────────┐ ┌─────────────────────┐ │ │
|
||||||
|
│ │ │ task_schedules │ │ worker_tasks │ │ │
|
||||||
|
│ │ │ │ │ │ │ │
|
||||||
|
│ │ │ • product_refresh │───────►│ • pending tasks │ │ │
|
||||||
|
│ │ │ • store_discovery │ create │ • claimed tasks │ │ │
|
||||||
|
│ │ │ • analytics_refresh │ tasks │ • running tasks │ │ │
|
||||||
|
│ │ │ │ │ • completed tasks │ │ │
|
||||||
|
│ │ │ next_run_at │ │ │ │ │
|
||||||
|
│ │ │ last_run_at │ │ role, dispensary_id │ │ │
|
||||||
|
│ │ │ interval_hours │ │ priority, status │ │ │
|
||||||
|
│ │ └─────────────────────┘ └──────────┬──────────┘ │ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ └─────────────────────────────────────────────┼────────────────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ ┌──────────────────────┘ │
|
||||||
|
│ │ Workers poll for tasks │
|
||||||
|
│ │ (SELECT FOR UPDATE SKIP LOCKED) │
|
||||||
|
│ ▼ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ WORKER PODS (StatefulSet: scraper-worker) │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||||
|
│ │ │ Worker 0 │ │ Worker 1 │ │ Worker 2 │ │ Worker N │ │ │
|
||||||
|
│ │ │ │ │ │ │ │ │ │ │ │
|
||||||
|
│ │ │ task-worker │ │ task-worker │ │ task-worker │ │ task-worker │ │ │
|
||||||
|
│ │ │ .ts │ │ .ts │ │ .ts │ │ .ts │ │ │
|
||||||
|
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ └──────────────────────────────────────────────────────────────────────────┘ │
|
||||||
|
│ │
|
||||||
|
└──────────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Startup Sequence
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ API SERVER STARTUP │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ 1. Express app initializes │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 2. runAutoMigrations() │
|
||||||
|
│ • Runs pending migrations (including 079_task_schedules.sql) │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 3. initializeMinio() / initializeImageStorage() │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 4. cleanupOrphanedJobs() │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 5. taskScheduler.start() ◄─── NEW (per TASK_WORKFLOW_2024-12-10.md) │
|
||||||
|
│ │ │
|
||||||
|
│ ├── Recover stale tasks (workers that died) │
|
||||||
|
│ ├── Ensure default schedules exist in task_schedules │
|
||||||
|
│ ├── Check and run any due schedules immediately │
|
||||||
|
│ └── Start 60-second poll interval │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 6. app.listen(PORT) │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ WORKER POD STARTUP │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ 1. K8s starts pod from StatefulSet │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 2. TaskWorker.constructor() │
|
||||||
|
│ • Create DB pool │
|
||||||
|
│ • Create CrawlRotator │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 3. initializeStealth() │
|
||||||
|
│ • Load proxies from DB (REQUIRED - fails if none) │
|
||||||
|
│ • Wire rotator to Dutchie client │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 4. register() with API │
|
||||||
|
│ • Optional - continues if fails │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 5. startRegistryHeartbeat() every 30s │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 6. processNextTask() loop │
|
||||||
|
│ │ │
|
||||||
|
│ ├── Poll for pending task (FOR UPDATE SKIP LOCKED) │
|
||||||
|
│ ├── Claim task atomically │
|
||||||
|
│ ├── Execute handler (product_refresh, store_discovery, etc.) │
|
||||||
|
│ ├── Mark complete/failed │
|
||||||
|
│ ├── Chain next task if applicable │
|
||||||
|
│ └── Loop │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Schedule Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ SCHEDULER POLL (every 60 seconds) │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ BEGIN TRANSACTION │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ SELECT * FROM task_schedules │
|
||||||
|
│ WHERE enabled = true AND next_run_at <= NOW() │
|
||||||
|
│ FOR UPDATE SKIP LOCKED ◄─── Prevents duplicate execution across replicas │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ For each due schedule: │
|
||||||
|
│ │ │
|
||||||
|
│ ├── product_refresh_all │
|
||||||
|
│ │ └─► Query dispensaries needing crawl │
|
||||||
|
│ │ └─► Create product_refresh tasks in worker_tasks │
|
||||||
|
│ │ │
|
||||||
|
│ ├── store_discovery_dutchie │
|
||||||
|
│ │ └─► Create single store_discovery task │
|
||||||
|
│ │ │
|
||||||
|
│ └── analytics_refresh │
|
||||||
|
│ └─► Create single analytics_refresh task │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ UPDATE task_schedules SET │
|
||||||
|
│ last_run_at = NOW(), │
|
||||||
|
│ next_run_at = NOW() + interval_hours │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ COMMIT │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Task Lifecycle
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────┐
|
||||||
|
│ SCHEDULE │
|
||||||
|
│ DUE │
|
||||||
|
└────┬─────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────┐ claim ┌──────────────┐ start ┌──────────────┐
|
||||||
|
│ PENDING │────────────►│ CLAIMED │────────────►│ RUNNING │
|
||||||
|
└──────────────┘ └──────────────┘ └──────┬───────┘
|
||||||
|
▲ │
|
||||||
|
│ ┌──────────────┼──────────────┐
|
||||||
|
│ retry │ │ │
|
||||||
|
│ (if retries < max) ▼ ▼ ▼
|
||||||
|
│ ┌──────────┐ ┌──────────┐ ┌──────────┐
|
||||||
|
└──────────────────────────────────│ FAILED │ │ COMPLETED│ │ STALE │
|
||||||
|
└──────────┘ └──────────┘ └────┬─────┘
|
||||||
|
│
|
||||||
|
recover_stale_tasks()
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────┐
|
||||||
|
│ PENDING │
|
||||||
|
└──────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Database Tables
|
||||||
|
|
||||||
|
### task_schedules (NEW - migration 079)
|
||||||
|
|
||||||
|
Stores schedule definitions. Survives restarts.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE task_schedules (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(100) NOT NULL UNIQUE,
|
||||||
|
role VARCHAR(50) NOT NULL, -- product_refresh, store_discovery, etc.
|
||||||
|
enabled BOOLEAN DEFAULT TRUE,
|
||||||
|
interval_hours INTEGER NOT NULL, -- How often to run
|
||||||
|
priority INTEGER DEFAULT 0, -- Task priority when created
|
||||||
|
state_code VARCHAR(2), -- Optional filter
|
||||||
|
last_run_at TIMESTAMPTZ, -- When it last ran
|
||||||
|
next_run_at TIMESTAMPTZ, -- When it's due next
|
||||||
|
last_task_count INTEGER, -- Tasks created last run
|
||||||
|
last_error TEXT -- Error message if failed
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### worker_tasks (migration 074)
|
||||||
|
|
||||||
|
The task queue. Workers pull from here.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE worker_tasks (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
role task_role NOT NULL, -- What type of work
|
||||||
|
dispensary_id INTEGER, -- Which store (if applicable)
|
||||||
|
platform VARCHAR(50), -- Which platform
|
||||||
|
status task_status DEFAULT 'pending',
|
||||||
|
priority INTEGER DEFAULT 0, -- Higher = process first
|
||||||
|
scheduled_for TIMESTAMP, -- Don't process before this time
|
||||||
|
worker_id VARCHAR(100), -- Which worker claimed it
|
||||||
|
claimed_at TIMESTAMP,
|
||||||
|
started_at TIMESTAMP,
|
||||||
|
completed_at TIMESTAMP,
|
||||||
|
last_heartbeat_at TIMESTAMP, -- For stale detection
|
||||||
|
result JSONB,
|
||||||
|
error_message TEXT,
|
||||||
|
retry_count INTEGER DEFAULT 0,
|
||||||
|
max_retries INTEGER DEFAULT 3
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Default Schedules
|
||||||
|
|
||||||
|
| Name | Role | Interval | Priority | Description |
|
||||||
|
|------|------|----------|----------|-------------|
|
||||||
|
| `payload_fetch_all` | payload_fetch | 4 hours | 0 | Fetch payloads from Dutchie API (chains to product_refresh) |
|
||||||
|
| `store_discovery_dutchie` | store_discovery | 24 hours | 5 | Find new Dutchie stores |
|
||||||
|
| `analytics_refresh` | analytics_refresh | 6 hours | 0 | Refresh MVs |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Task Roles
|
||||||
|
|
||||||
|
| Role | Description | Creates Tasks For |
|
||||||
|
|------|-------------|-------------------|
|
||||||
|
| `payload_fetch` | **NEW** - Fetch from Dutchie API, save to disk | Each dispensary needing crawl |
|
||||||
|
| `product_refresh` | **CHANGED** - Read local payload, normalize, upsert to DB | Chained from payload_fetch |
|
||||||
|
| `store_discovery` | Find new dispensaries, returns newStoreIds[] | Single task per platform |
|
||||||
|
| `entry_point_discovery` | **DEPRECATED** - Resolve platform IDs | No longer used |
|
||||||
|
| `product_discovery` | Initial product fetch for new stores | Chained from store_discovery |
|
||||||
|
| `analytics_refresh` | Refresh MVs | Single global task |
|
||||||
|
|
||||||
|
### Payload/Refresh Separation (2024-12-10)
|
||||||
|
|
||||||
|
The crawl workflow is now split into two phases:
|
||||||
|
|
||||||
|
```
|
||||||
|
payload_fetch (scheduled every 4h)
|
||||||
|
└─► Hit Dutchie GraphQL API
|
||||||
|
└─► Save raw JSON to /storage/payloads/{year}/{month}/{day}/store_{id}_{ts}.json.gz
|
||||||
|
└─► Record metadata in raw_crawl_payloads table
|
||||||
|
└─► Queue product_refresh task with payload_id
|
||||||
|
|
||||||
|
product_refresh (chained from payload_fetch)
|
||||||
|
└─► Load payload from filesystem (NOT from API)
|
||||||
|
└─► Normalize via DutchieNormalizer
|
||||||
|
└─► Upsert to store_products
|
||||||
|
└─► Create snapshots
|
||||||
|
└─► Track missing products
|
||||||
|
└─► Download images
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits:**
|
||||||
|
- **Retry-friendly**: If normalize fails, re-run product_refresh without re-crawling
|
||||||
|
- **Replay-able**: Run product_refresh against any historical payload
|
||||||
|
- **Faster refreshes**: Local file read vs network call
|
||||||
|
- **Historical diffs**: Compare payloads to see what changed between crawls
|
||||||
|
- **Less API pressure**: Only payload_fetch hits Dutchie
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Task Chaining
|
||||||
|
|
||||||
|
Tasks automatically queue follow-up tasks upon successful completion. This creates two main flows:
|
||||||
|
|
||||||
|
### Discovery Flow (New Stores)
|
||||||
|
|
||||||
|
When `store_discovery` finds new dispensaries, they automatically get their initial product data:
|
||||||
|
|
||||||
|
```
|
||||||
|
store_discovery
|
||||||
|
└─► Discovers new locations via Dutchie GraphQL
|
||||||
|
└─► Auto-promotes valid locations to dispensaries table
|
||||||
|
└─► Collects newDispensaryIds[] from promotions
|
||||||
|
└─► Returns { newStoreIds: [...] } in result
|
||||||
|
|
||||||
|
chainNextTask() detects newStoreIds
|
||||||
|
└─► Creates product_discovery task for each new store
|
||||||
|
|
||||||
|
product_discovery
|
||||||
|
└─► Calls handlePayloadFetch() internally
|
||||||
|
└─► payload_fetch hits Dutchie API
|
||||||
|
└─► Saves raw JSON to /storage/payloads/
|
||||||
|
└─► Queues product_refresh task with payload_id
|
||||||
|
|
||||||
|
product_refresh
|
||||||
|
└─► Loads payload from filesystem
|
||||||
|
└─► Normalizes and upserts to store_products
|
||||||
|
└─► Creates snapshots, downloads images
|
||||||
|
```
|
||||||
|
|
||||||
|
**Complete Discovery Chain:**
|
||||||
|
```
|
||||||
|
store_discovery → product_discovery → payload_fetch → product_refresh
|
||||||
|
(internal call) (queues next)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Scheduled Flow (Existing Stores)
|
||||||
|
|
||||||
|
For existing stores, `payload_fetch_all` schedule runs every 4 hours:
|
||||||
|
|
||||||
|
```
|
||||||
|
TaskScheduler (every 60s)
|
||||||
|
└─► Checks task_schedules for due schedules
|
||||||
|
└─► payload_fetch_all is due
|
||||||
|
└─► Generates payload_fetch task for each dispensary
|
||||||
|
|
||||||
|
payload_fetch
|
||||||
|
└─► Hits Dutchie GraphQL API
|
||||||
|
└─► Saves raw JSON to /storage/payloads/
|
||||||
|
└─► Queues product_refresh task with payload_id
|
||||||
|
|
||||||
|
product_refresh
|
||||||
|
└─► Loads payload from filesystem (NOT API)
|
||||||
|
└─► Normalizes via DutchieNormalizer
|
||||||
|
└─► Upserts to store_products
|
||||||
|
└─► Creates snapshots
|
||||||
|
```
|
||||||
|
|
||||||
|
**Complete Scheduled Chain:**
|
||||||
|
```
|
||||||
|
payload_fetch → product_refresh
|
||||||
|
(queues) (reads local)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Chaining Implementation
|
||||||
|
|
||||||
|
Task chaining is handled in two places:
|
||||||
|
|
||||||
|
1. **Internal chaining (handler calls handler):**
|
||||||
|
- `product_discovery` calls `handlePayloadFetch()` directly
|
||||||
|
|
||||||
|
2. **External chaining (chainNextTask() in task-service.ts):**
|
||||||
|
- Called after task completion
|
||||||
|
- `store_discovery` → queues `product_discovery` for each newStoreId
|
||||||
|
|
||||||
|
3. **Queue-based chaining (taskService.createTask):**
|
||||||
|
- `payload_fetch` queues `product_refresh` with `payload: { payload_id }`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Payload API Endpoints
|
||||||
|
|
||||||
|
Raw crawl payloads can be accessed via the Payloads API:
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `GET /api/payloads` | GET | List payload metadata (paginated) |
|
||||||
|
| `GET /api/payloads/:id` | GET | Get payload metadata by ID |
|
||||||
|
| `GET /api/payloads/:id/data` | GET | Get full payload JSON (decompressed) |
|
||||||
|
| `GET /api/payloads/store/:dispensaryId` | GET | List payloads for a store |
|
||||||
|
| `GET /api/payloads/store/:dispensaryId/latest` | GET | Get latest payload for a store |
|
||||||
|
| `GET /api/payloads/store/:dispensaryId/diff` | GET | Diff two payloads for changes |
|
||||||
|
|
||||||
|
### Payload Diff Response
|
||||||
|
|
||||||
|
The diff endpoint returns:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"from": { "id": 123, "fetchedAt": "...", "productCount": 100 },
|
||||||
|
"to": { "id": 456, "fetchedAt": "...", "productCount": 105 },
|
||||||
|
"diff": {
|
||||||
|
"added": 10,
|
||||||
|
"removed": 5,
|
||||||
|
"priceChanges": 8,
|
||||||
|
"stockChanges": 12
|
||||||
|
},
|
||||||
|
"details": {
|
||||||
|
"added": [...],
|
||||||
|
"removed": [...],
|
||||||
|
"priceChanges": [...],
|
||||||
|
"stockChanges": [...]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Schedules (NEW)
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `GET /api/schedules` | GET | List all schedules |
|
||||||
|
| `PUT /api/schedules/:id` | PUT | Update schedule |
|
||||||
|
| `POST /api/schedules/:id/trigger` | POST | Run schedule immediately |
|
||||||
|
|
||||||
|
### Task Creation (rewired 2024-12-10)
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `POST /api/job-queue/enqueue` | POST | Create single task |
|
||||||
|
| `POST /api/job-queue/enqueue-batch` | POST | Create batch tasks |
|
||||||
|
| `POST /api/job-queue/enqueue-state` | POST | Create tasks for state |
|
||||||
|
| `POST /api/tasks` | POST | Direct task creation |
|
||||||
|
|
||||||
|
### Task Management
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `GET /api/tasks` | GET | List tasks |
|
||||||
|
| `GET /api/tasks/:id` | GET | Get single task |
|
||||||
|
| `GET /api/tasks/counts` | GET | Task counts by status |
|
||||||
|
| `POST /api/tasks/recover-stale` | POST | Recover stale tasks |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Files
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/services/task-scheduler.ts` | **NEW** - DB-driven scheduler |
|
||||||
|
| `src/tasks/task-worker.ts` | Worker that processes tasks |
|
||||||
|
| `src/tasks/task-service.ts` | Task CRUD operations |
|
||||||
|
| `src/tasks/handlers/payload-fetch.ts` | **NEW** - Fetches from API, saves to disk |
|
||||||
|
| `src/tasks/handlers/product-refresh.ts` | **CHANGED** - Reads from disk, processes to DB |
|
||||||
|
| `src/utils/payload-storage.ts` | **NEW** - Payload save/load utilities |
|
||||||
|
| `src/routes/tasks.ts` | Task API endpoints |
|
||||||
|
| `src/routes/job-queue.ts` | Job Queue UI endpoints (rewired) |
|
||||||
|
| `migrations/079_task_schedules.sql` | Schedule table |
|
||||||
|
| `migrations/080_raw_crawl_payloads.sql` | Payload metadata table |
|
||||||
|
| `migrations/081_payload_fetch_columns.sql` | payload, last_fetch_at columns |
|
||||||
|
| `migrations/074_worker_task_queue.sql` | Task queue table |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Legacy Code (DEPRECATED)
|
||||||
|
|
||||||
|
| File | Status | Replacement |
|
||||||
|
|------|--------|-------------|
|
||||||
|
| `src/services/scheduler.ts` | DEPRECATED | `task-scheduler.ts` |
|
||||||
|
| `dispensary_crawl_jobs` table | ORPHANED | `worker_tasks` |
|
||||||
|
| `job_schedules` table | LEGACY | `task_schedules` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Dashboard Integration
|
||||||
|
|
||||||
|
Both pages remain wired to the dashboard:
|
||||||
|
|
||||||
|
| Page | Data Source | Actions |
|
||||||
|
|------|-------------|---------|
|
||||||
|
| **Job Queue** | `worker_tasks`, `task_schedules` | Create tasks, view schedules |
|
||||||
|
| **Task Queue** | `worker_tasks` | View tasks, recover stale |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Multi-Replica Safety
|
||||||
|
|
||||||
|
The scheduler uses `SELECT FOR UPDATE SKIP LOCKED` to ensure:
|
||||||
|
|
||||||
|
1. **Only one replica** executes a schedule at a time
|
||||||
|
2. **No duplicate tasks** created
|
||||||
|
3. **Survives pod restarts** - state in DB, not memory
|
||||||
|
4. **Self-healing** - recovers stale tasks on startup
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- This query is atomic across all API server replicas
|
||||||
|
SELECT * FROM task_schedules
|
||||||
|
WHERE enabled = true AND next_run_at <= NOW()
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Worker Scaling (K8s)
|
||||||
|
|
||||||
|
Workers run as a StatefulSet in Kubernetes. You can scale from the admin UI or CLI.
|
||||||
|
|
||||||
|
### From Admin UI
|
||||||
|
|
||||||
|
The Workers page (`/admin/workers`) provides:
|
||||||
|
- Current replica count display
|
||||||
|
- Scale up/down buttons
|
||||||
|
- Target replica input
|
||||||
|
|
||||||
|
### API Endpoints
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `GET /api/workers/k8s/replicas` | GET | Get current/desired replica counts |
|
||||||
|
| `POST /api/workers/k8s/scale` | POST | Scale to N replicas (body: `{ replicas: N }`) |
|
||||||
|
|
||||||
|
### From CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View current replicas
|
||||||
|
kubectl get statefulset scraper-worker -n dispensary-scraper
|
||||||
|
|
||||||
|
# Scale to 10 workers
|
||||||
|
kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=10
|
||||||
|
|
||||||
|
# Scale down to 3 workers
|
||||||
|
kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
Environment variables for the API server:
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `K8S_NAMESPACE` | `dispensary-scraper` | Kubernetes namespace |
|
||||||
|
| `K8S_WORKER_STATEFULSET` | `scraper-worker` | StatefulSet name |
|
||||||
|
|
||||||
|
### RBAC Requirements
|
||||||
|
|
||||||
|
The API server pod needs these K8s permissions:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: worker-scaler
|
||||||
|
namespace: dispensary-scraper
|
||||||
|
rules:
|
||||||
|
- apiGroups: ["apps"]
|
||||||
|
resources: ["statefulsets"]
|
||||||
|
verbs: ["get", "patch"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: scraper-worker-scaler
|
||||||
|
namespace: dispensary-scraper
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: default
|
||||||
|
namespace: dispensary-scraper
|
||||||
|
roleRef:
|
||||||
|
kind: Role
|
||||||
|
name: worker-scaler
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
```
|
||||||
542
backend/docs/WORKER_TASK_ARCHITECTURE.md
Normal file
542
backend/docs/WORKER_TASK_ARCHITECTURE.md
Normal file
@@ -0,0 +1,542 @@
|
|||||||
|
# Worker Task Architecture
|
||||||
|
|
||||||
|
This document describes the unified task-based worker system that replaces the legacy fragmented job systems.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The task worker architecture provides a single, unified system for managing all background work in CannaiQ:
|
||||||
|
|
||||||
|
- **Store discovery** - Find new dispensaries on platforms
|
||||||
|
- **Entry point discovery** - Resolve platform IDs from menu URLs
|
||||||
|
- **Product discovery** - Initial product fetch for new stores
|
||||||
|
- **Product resync** - Regular price/stock updates for existing stores
|
||||||
|
- **Analytics refresh** - Refresh materialized views and analytics
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Database Tables
|
||||||
|
|
||||||
|
**`worker_tasks`** - Central task queue
|
||||||
|
```sql
|
||||||
|
CREATE TABLE worker_tasks (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
role task_role NOT NULL, -- What type of work
|
||||||
|
dispensary_id INTEGER, -- Which store (if applicable)
|
||||||
|
platform VARCHAR(50), -- Which platform (dutchie, etc.)
|
||||||
|
status task_status DEFAULT 'pending',
|
||||||
|
priority INTEGER DEFAULT 0, -- Higher = process first
|
||||||
|
scheduled_for TIMESTAMP, -- Don't process before this time
|
||||||
|
worker_id VARCHAR(100), -- Which worker claimed it
|
||||||
|
claimed_at TIMESTAMP,
|
||||||
|
started_at TIMESTAMP,
|
||||||
|
completed_at TIMESTAMP,
|
||||||
|
last_heartbeat_at TIMESTAMP, -- For stale detection
|
||||||
|
result JSONB, -- Output from handler
|
||||||
|
error_message TEXT,
|
||||||
|
retry_count INTEGER DEFAULT 0,
|
||||||
|
max_retries INTEGER DEFAULT 3,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key indexes:**
|
||||||
|
- `idx_worker_tasks_pending_priority` - For efficient task claiming
|
||||||
|
- `idx_worker_tasks_active_dispensary` - Prevents concurrent tasks per store (partial unique index)
|
||||||
|
|
||||||
|
### Task Roles
|
||||||
|
|
||||||
|
| Role | Purpose | Per-Store | Scheduled |
|
||||||
|
|------|---------|-----------|-----------|
|
||||||
|
| `store_discovery` | Find new stores on a platform | No | Daily |
|
||||||
|
| `entry_point_discovery` | Resolve platform IDs | Yes | On-demand |
|
||||||
|
| `product_discovery` | Initial product fetch | Yes | After entry_point |
|
||||||
|
| `product_resync` | Price/stock updates | Yes | Every 4 hours |
|
||||||
|
| `analytics_refresh` | Refresh MVs | No | Daily |
|
||||||
|
|
||||||
|
### Task Lifecycle
|
||||||
|
|
||||||
|
```
|
||||||
|
pending → claimed → running → completed
|
||||||
|
↓
|
||||||
|
failed
|
||||||
|
```
|
||||||
|
|
||||||
|
1. **pending** - Task is waiting to be picked up
|
||||||
|
2. **claimed** - Worker has claimed it (atomic via SELECT FOR UPDATE SKIP LOCKED)
|
||||||
|
3. **running** - Worker is actively processing
|
||||||
|
4. **completed** - Task finished successfully
|
||||||
|
5. **failed** - Task encountered an error
|
||||||
|
6. **stale** - Task lost its worker (recovered automatically)
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
### Core Files
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/tasks/task-service.ts` | TaskService - CRUD, claiming, capacity metrics |
|
||||||
|
| `src/tasks/task-worker.ts` | TaskWorker - Main worker loop |
|
||||||
|
| `src/tasks/index.ts` | Module exports |
|
||||||
|
| `src/routes/tasks.ts` | API endpoints |
|
||||||
|
| `migrations/074_worker_task_queue.sql` | Database schema |
|
||||||
|
|
||||||
|
### Task Handlers
|
||||||
|
|
||||||
|
| File | Role |
|
||||||
|
|------|------|
|
||||||
|
| `src/tasks/handlers/store-discovery.ts` | `store_discovery` |
|
||||||
|
| `src/tasks/handlers/entry-point-discovery.ts` | `entry_point_discovery` |
|
||||||
|
| `src/tasks/handlers/product-discovery.ts` | `product_discovery` |
|
||||||
|
| `src/tasks/handlers/product-resync.ts` | `product_resync` |
|
||||||
|
| `src/tasks/handlers/analytics-refresh.ts` | `analytics_refresh` |
|
||||||
|
|
||||||
|
## Running Workers
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `WORKER_ROLE` | (required) | Which task role to process |
|
||||||
|
| `WORKER_ID` | auto-generated | Custom worker identifier |
|
||||||
|
| `POLL_INTERVAL_MS` | 5000 | How often to check for tasks |
|
||||||
|
| `HEARTBEAT_INTERVAL_MS` | 30000 | How often to update heartbeat |
|
||||||
|
|
||||||
|
### Starting a Worker
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start a product resync worker
|
||||||
|
WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts
|
||||||
|
|
||||||
|
# Start with custom ID
|
||||||
|
WORKER_ROLE=product_resync WORKER_ID=resync-1 npx tsx src/tasks/task-worker.ts
|
||||||
|
|
||||||
|
# Start multiple workers for different roles
|
||||||
|
WORKER_ROLE=store_discovery npx tsx src/tasks/task-worker.ts &
|
||||||
|
WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts &
|
||||||
|
```
|
||||||
|
|
||||||
|
### Kubernetes Deployment
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: task-worker-resync
|
||||||
|
spec:
|
||||||
|
replicas: 3
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: worker
|
||||||
|
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
|
||||||
|
command: ["npx", "tsx", "src/tasks/task-worker.ts"]
|
||||||
|
env:
|
||||||
|
- name: WORKER_ROLE
|
||||||
|
value: "product_resync"
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Task Management
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks` | GET | List tasks with filters |
|
||||||
|
| `/api/tasks` | POST | Create a new task |
|
||||||
|
| `/api/tasks/:id` | GET | Get task by ID |
|
||||||
|
| `/api/tasks/counts` | GET | Get counts by status |
|
||||||
|
| `/api/tasks/capacity` | GET | Get capacity metrics |
|
||||||
|
| `/api/tasks/capacity/:role` | GET | Get role-specific capacity |
|
||||||
|
| `/api/tasks/recover-stale` | POST | Recover tasks from dead workers |
|
||||||
|
|
||||||
|
### Task Generation
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks/generate/resync` | POST | Generate daily resync tasks |
|
||||||
|
| `/api/tasks/generate/discovery` | POST | Create store discovery task |
|
||||||
|
|
||||||
|
### Migration (from legacy systems)
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks/migration/status` | GET | Compare old vs new systems |
|
||||||
|
| `/api/tasks/migration/disable-old-schedules` | POST | Disable job_schedules |
|
||||||
|
| `/api/tasks/migration/cancel-pending-crawl-jobs` | POST | Cancel old crawl jobs |
|
||||||
|
| `/api/tasks/migration/create-resync-tasks` | POST | Create tasks for all stores |
|
||||||
|
| `/api/tasks/migration/full-migrate` | POST | One-click migration |
|
||||||
|
|
||||||
|
### Role-Specific Endpoints
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks/role/:role/last-completion` | GET | Last completion time |
|
||||||
|
| `/api/tasks/role/:role/recent` | GET | Recent completions |
|
||||||
|
| `/api/tasks/store/:id/active` | GET | Check if store has active task |
|
||||||
|
|
||||||
|
## Capacity Planning
|
||||||
|
|
||||||
|
The `v_worker_capacity` view provides real-time metrics:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM v_worker_capacity;
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- `pending_tasks` - Tasks waiting to be claimed
|
||||||
|
- `ready_tasks` - Tasks ready now (scheduled_for is null or past)
|
||||||
|
- `claimed_tasks` - Tasks claimed but not started
|
||||||
|
- `running_tasks` - Tasks actively processing
|
||||||
|
- `completed_last_hour` - Recent completions
|
||||||
|
- `failed_last_hour` - Recent failures
|
||||||
|
- `active_workers` - Workers with recent heartbeats
|
||||||
|
- `avg_duration_sec` - Average task duration
|
||||||
|
- `tasks_per_worker_hour` - Throughput estimate
|
||||||
|
- `estimated_hours_to_drain` - Time to clear queue
|
||||||
|
|
||||||
|
### Scaling Recommendations
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// API: GET /api/tasks/capacity/:role
|
||||||
|
{
|
||||||
|
"role": "product_resync",
|
||||||
|
"pending_tasks": 500,
|
||||||
|
"active_workers": 3,
|
||||||
|
"workers_needed": {
|
||||||
|
"for_1_hour": 10,
|
||||||
|
"for_4_hours": 3,
|
||||||
|
"for_8_hours": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Task Chaining
|
||||||
|
|
||||||
|
Tasks can automatically create follow-up tasks:
|
||||||
|
|
||||||
|
```
|
||||||
|
store_discovery → entry_point_discovery → product_discovery
|
||||||
|
↓
|
||||||
|
(store has platform_dispensary_id)
|
||||||
|
↓
|
||||||
|
Daily resync tasks
|
||||||
|
```
|
||||||
|
|
||||||
|
The `chainNextTask()` method handles this automatically.
|
||||||
|
|
||||||
|
## Stale Task Recovery
|
||||||
|
|
||||||
|
Tasks are considered stale if `last_heartbeat_at` is older than the threshold (default 10 minutes).
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT recover_stale_tasks(10); -- 10 minute threshold
|
||||||
|
```
|
||||||
|
|
||||||
|
Or via API:
|
||||||
|
```bash
|
||||||
|
curl -X POST /api/tasks/recover-stale \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"threshold_minutes": 10}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Migration from Legacy Systems
|
||||||
|
|
||||||
|
### Legacy Systems Replaced
|
||||||
|
|
||||||
|
1. **job_schedules + job_run_logs** - Scheduled job definitions
|
||||||
|
2. **dispensary_crawl_jobs** - Per-dispensary crawl queue
|
||||||
|
3. **SyncOrchestrator + HydrationWorker** - Raw payload processing
|
||||||
|
|
||||||
|
### Migration Steps
|
||||||
|
|
||||||
|
**Option 1: One-Click Migration**
|
||||||
|
```bash
|
||||||
|
curl -X POST /api/tasks/migration/full-migrate
|
||||||
|
```
|
||||||
|
|
||||||
|
This will:
|
||||||
|
1. Disable all job_schedules
|
||||||
|
2. Cancel pending dispensary_crawl_jobs
|
||||||
|
3. Generate resync tasks for all stores
|
||||||
|
4. Create discovery and analytics tasks
|
||||||
|
|
||||||
|
**Option 2: Manual Migration**
|
||||||
|
```bash
|
||||||
|
# 1. Check current status
|
||||||
|
curl /api/tasks/migration/status
|
||||||
|
|
||||||
|
# 2. Disable old schedules
|
||||||
|
curl -X POST /api/tasks/migration/disable-old-schedules
|
||||||
|
|
||||||
|
# 3. Cancel pending crawl jobs
|
||||||
|
curl -X POST /api/tasks/migration/cancel-pending-crawl-jobs
|
||||||
|
|
||||||
|
# 4. Create resync tasks
|
||||||
|
curl -X POST /api/tasks/migration/create-resync-tasks \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"state_code": "AZ"}'
|
||||||
|
|
||||||
|
# 5. Generate daily resync schedule
|
||||||
|
curl -X POST /api/tasks/generate/resync \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"batches_per_day": 6}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Per-Store Locking
|
||||||
|
|
||||||
|
The system prevents concurrent tasks for the same store using a partial unique index:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE UNIQUE INDEX idx_worker_tasks_active_dispensary
|
||||||
|
ON worker_tasks (dispensary_id)
|
||||||
|
WHERE dispensary_id IS NOT NULL
|
||||||
|
AND status IN ('claimed', 'running');
|
||||||
|
```
|
||||||
|
|
||||||
|
This ensures only one task can be active per store at any time.
|
||||||
|
|
||||||
|
## Task Priority
|
||||||
|
|
||||||
|
Tasks are claimed in priority order (higher first), then by creation time:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ORDER BY priority DESC, created_at ASC
|
||||||
|
```
|
||||||
|
|
||||||
|
Default priorities:
|
||||||
|
- `store_discovery`: 0
|
||||||
|
- `entry_point_discovery`: 10 (high - new stores)
|
||||||
|
- `product_discovery`: 10 (high - new stores)
|
||||||
|
- `product_resync`: 0
|
||||||
|
- `analytics_refresh`: 0
|
||||||
|
|
||||||
|
## Scheduled Tasks
|
||||||
|
|
||||||
|
Tasks can be scheduled for future execution:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_resync',
|
||||||
|
dispensary_id: 123,
|
||||||
|
scheduled_for: new Date('2025-01-10T06:00:00Z'),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
The `generate_resync_tasks()` function creates staggered tasks throughout the day:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT generate_resync_tasks(6, '2025-01-10'); -- 6 batches = every 4 hours
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dashboard Integration
|
||||||
|
|
||||||
|
The admin dashboard shows task queue status in the main overview:
|
||||||
|
|
||||||
|
```
|
||||||
|
Task Queue Summary
|
||||||
|
------------------
|
||||||
|
Pending: 45
|
||||||
|
Running: 3
|
||||||
|
Completed: 1,234
|
||||||
|
Failed: 12
|
||||||
|
```
|
||||||
|
|
||||||
|
Full task management is available at `/admin/tasks`.
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
Failed tasks include the error message in `error_message` and can be retried:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- View failed tasks
|
||||||
|
SELECT id, role, dispensary_id, error_message, retry_count
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE status = 'failed'
|
||||||
|
ORDER BY completed_at DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
-- Retry failed tasks
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET status = 'pending', retry_count = retry_count + 1
|
||||||
|
WHERE status = 'failed' AND retry_count < max_retries;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Concurrent Task Processing (Added 2024-12)
|
||||||
|
|
||||||
|
Workers can now process multiple tasks concurrently within a single worker instance. This improves throughput by utilizing async I/O efficiently.
|
||||||
|
|
||||||
|
### Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Pod (K8s) │
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ TaskWorker │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
|
||||||
|
│ │ │ Task 1 │ │ Task 2 │ │ Task 3 │ (concurrent)│ │
|
||||||
|
│ │ └─────────┘ └─────────┘ └─────────┘ │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ Resource Monitor │ │
|
||||||
|
│ │ ├── Memory: 65% (threshold: 85%) │ │
|
||||||
|
│ │ ├── CPU: 45% (threshold: 90%) │ │
|
||||||
|
│ │ └── Status: Normal │ │
|
||||||
|
│ └─────────────────────────────────────────────────────┘ │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `MAX_CONCURRENT_TASKS` | 3 | Maximum tasks a worker will run concurrently |
|
||||||
|
| `MEMORY_BACKOFF_THRESHOLD` | 0.85 | Back off when heap memory exceeds 85% |
|
||||||
|
| `CPU_BACKOFF_THRESHOLD` | 0.90 | Back off when CPU exceeds 90% |
|
||||||
|
| `BACKOFF_DURATION_MS` | 10000 | How long to wait when backing off (10s) |
|
||||||
|
|
||||||
|
### How It Works
|
||||||
|
|
||||||
|
1. **Main Loop**: Worker continuously tries to fill up to `MAX_CONCURRENT_TASKS`
|
||||||
|
2. **Resource Monitoring**: Before claiming a new task, worker checks memory and CPU
|
||||||
|
3. **Backoff**: If resources exceed thresholds, worker pauses and stops claiming new tasks
|
||||||
|
4. **Concurrent Execution**: Tasks run in parallel using `Promise` - they don't block each other
|
||||||
|
5. **Graceful Shutdown**: On SIGTERM/decommission, worker stops claiming but waits for active tasks
|
||||||
|
|
||||||
|
### Resource Monitoring
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ResourceStats interface
|
||||||
|
interface ResourceStats {
|
||||||
|
memoryPercent: number; // Current heap usage as decimal (0.0-1.0)
|
||||||
|
memoryMb: number; // Current heap used in MB
|
||||||
|
memoryTotalMb: number; // Total heap available in MB
|
||||||
|
cpuPercent: number; // CPU usage as percentage (0-100)
|
||||||
|
isBackingOff: boolean; // True if worker is in backoff state
|
||||||
|
backoffReason: string; // Why the worker is backing off
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Heartbeat Data
|
||||||
|
|
||||||
|
Workers report the following in their heartbeat:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"worker_id": "worker-abc123",
|
||||||
|
"current_task_id": 456,
|
||||||
|
"current_task_ids": [456, 457, 458],
|
||||||
|
"active_task_count": 3,
|
||||||
|
"max_concurrent_tasks": 3,
|
||||||
|
"status": "active",
|
||||||
|
"resources": {
|
||||||
|
"memory_mb": 256,
|
||||||
|
"memory_total_mb": 512,
|
||||||
|
"memory_rss_mb": 320,
|
||||||
|
"memory_percent": 50,
|
||||||
|
"cpu_user_ms": 12500,
|
||||||
|
"cpu_system_ms": 3200,
|
||||||
|
"cpu_percent": 45,
|
||||||
|
"is_backing_off": false,
|
||||||
|
"backoff_reason": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Backoff Behavior
|
||||||
|
|
||||||
|
When resources exceed thresholds:
|
||||||
|
|
||||||
|
1. Worker logs the backoff reason:
|
||||||
|
```
|
||||||
|
[TaskWorker] MyWorker backing off: Memory at 87.3% (threshold: 85%)
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Worker stops claiming new tasks but continues existing tasks
|
||||||
|
|
||||||
|
3. After `BACKOFF_DURATION_MS`, worker rechecks resources
|
||||||
|
|
||||||
|
4. When resources return to normal:
|
||||||
|
```
|
||||||
|
[TaskWorker] MyWorker resuming normal operation
|
||||||
|
```
|
||||||
|
|
||||||
|
### UI Display
|
||||||
|
|
||||||
|
The Workers Dashboard shows:
|
||||||
|
|
||||||
|
- **Tasks Column**: `2/3 tasks` (active/max concurrent)
|
||||||
|
- **Resources Column**: Memory % and CPU % with color coding
|
||||||
|
- Green: < 50%
|
||||||
|
- Yellow: 50-74%
|
||||||
|
- Amber: 75-89%
|
||||||
|
- Red: 90%+
|
||||||
|
- **Backing Off**: Orange warning badge when worker is in backoff state
|
||||||
|
|
||||||
|
### Task Count Badge Details
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────┐
|
||||||
|
│ Worker: "MyWorker" │
|
||||||
|
│ Tasks: 2/3 tasks #456, #457 │
|
||||||
|
│ Resources: 🧠 65% 💻 45% │
|
||||||
|
│ Status: ● Active │
|
||||||
|
└─────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Best Practices
|
||||||
|
|
||||||
|
1. **Start Conservative**: Use `MAX_CONCURRENT_TASKS=3` initially
|
||||||
|
2. **Monitor Resources**: Watch for frequent backoffs in logs
|
||||||
|
3. **Tune Per Workload**: I/O-bound tasks benefit from higher concurrency
|
||||||
|
4. **Scale Horizontally**: Add more pods rather than cranking concurrency too high
|
||||||
|
|
||||||
|
### Code References
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/tasks/task-worker.ts:68-71` | Concurrency environment variables |
|
||||||
|
| `src/tasks/task-worker.ts:104-111` | ResourceStats interface |
|
||||||
|
| `src/tasks/task-worker.ts:149-179` | getResourceStats() method |
|
||||||
|
| `src/tasks/task-worker.ts:184-196` | shouldBackOff() method |
|
||||||
|
| `src/tasks/task-worker.ts:462-516` | mainLoop() with concurrent claiming |
|
||||||
|
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
|
||||||
|
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
### Logs
|
||||||
|
|
||||||
|
Workers log to stdout:
|
||||||
|
```
|
||||||
|
[TaskWorker] Starting worker worker-product_resync-a1b2c3d4 for role: product_resync
|
||||||
|
[TaskWorker] Claimed task 123 (product_resync) for dispensary 456
|
||||||
|
[TaskWorker] Task 123 completed successfully
|
||||||
|
```
|
||||||
|
|
||||||
|
### Health Check
|
||||||
|
|
||||||
|
Check if workers are active:
|
||||||
|
```sql
|
||||||
|
SELECT worker_id, role, COUNT(*), MAX(last_heartbeat_at)
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE last_heartbeat_at > NOW() - INTERVAL '5 minutes'
|
||||||
|
GROUP BY worker_id, role;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Metrics
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Tasks by status
|
||||||
|
SELECT status, COUNT(*) FROM worker_tasks GROUP BY status;
|
||||||
|
|
||||||
|
-- Tasks by role
|
||||||
|
SELECT role, status, COUNT(*) FROM worker_tasks GROUP BY role, status;
|
||||||
|
|
||||||
|
-- Average duration by role
|
||||||
|
SELECT role, AVG(EXTRACT(EPOCH FROM (completed_at - started_at))) as avg_seconds
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours'
|
||||||
|
GROUP BY role;
|
||||||
|
```
|
||||||
69
backend/k8s/cronjob-ip2location.yaml
Normal file
69
backend/k8s/cronjob-ip2location.yaml
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
apiVersion: batch/v1
|
||||||
|
kind: CronJob
|
||||||
|
metadata:
|
||||||
|
name: ip2location-update
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
# Run on the 1st of every month at 3am UTC
|
||||||
|
schedule: "0 3 1 * *"
|
||||||
|
concurrencyPolicy: Forbid
|
||||||
|
successfulJobsHistoryLimit: 3
|
||||||
|
failedJobsHistoryLimit: 3
|
||||||
|
jobTemplate:
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: ip2location-updater
|
||||||
|
image: curlimages/curl:latest
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
set -e
|
||||||
|
echo "Downloading IP2Location LITE DB5..."
|
||||||
|
|
||||||
|
# Download to temp
|
||||||
|
cd /tmp
|
||||||
|
curl -L -o ip2location.zip "https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB5LITEBIN"
|
||||||
|
|
||||||
|
# Extract
|
||||||
|
unzip -o ip2location.zip
|
||||||
|
|
||||||
|
# Find and copy the BIN file
|
||||||
|
BIN_FILE=$(ls *.BIN 2>/dev/null | head -1)
|
||||||
|
if [ -z "$BIN_FILE" ]; then
|
||||||
|
echo "ERROR: No BIN file found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Copy to shared volume
|
||||||
|
cp "$BIN_FILE" /data/IP2LOCATION-LITE-DB5.BIN
|
||||||
|
|
||||||
|
echo "Done! Database updated: /data/IP2LOCATION-LITE-DB5.BIN"
|
||||||
|
env:
|
||||||
|
- name: IP2LOCATION_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: dutchie-backend-secret
|
||||||
|
key: IP2LOCATION_TOKEN
|
||||||
|
volumeMounts:
|
||||||
|
- name: ip2location-data
|
||||||
|
mountPath: /data
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
volumes:
|
||||||
|
- name: ip2location-data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ip2location-pvc
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: ip2location-pvc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 100Mi
|
||||||
@@ -26,6 +26,12 @@ spec:
|
|||||||
name: dutchie-backend-config
|
name: dutchie-backend-config
|
||||||
- secretRef:
|
- secretRef:
|
||||||
name: dutchie-backend-secret
|
name: dutchie-backend-secret
|
||||||
|
env:
|
||||||
|
- name: IP2LOCATION_DB_PATH
|
||||||
|
value: /data/ip2location/IP2LOCATION-LITE-DB5.BIN
|
||||||
|
volumeMounts:
|
||||||
|
- name: ip2location-data
|
||||||
|
mountPath: /data/ip2location
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: "256Mi"
|
memory: "256Mi"
|
||||||
@@ -45,3 +51,7 @@ spec:
|
|||||||
port: 3010
|
port: 3010
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
|
volumes:
|
||||||
|
- name: ip2location-data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ip2location-pvc
|
||||||
|
|||||||
12
backend/migrations/073_proxy_timezone.sql
Normal file
12
backend/migrations/073_proxy_timezone.sql
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
-- Add timezone column to proxies table for geo-consistent fingerprinting
|
||||||
|
-- This allows matching Accept-Language and other headers to proxy location
|
||||||
|
|
||||||
|
ALTER TABLE proxies
|
||||||
|
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add timezone to failed_proxies as well
|
||||||
|
ALTER TABLE failed_proxies
|
||||||
|
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Comment explaining usage
|
||||||
|
COMMENT ON COLUMN proxies.timezone IS 'IANA timezone (e.g., America/Phoenix) for geo-consistent fingerprinting';
|
||||||
27
backend/migrations/074_worker_commands.sql
Normal file
27
backend/migrations/074_worker_commands.sql
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
-- Migration: Worker Commands Table
|
||||||
|
-- Purpose: Store commands for workers (decommission, etc.)
|
||||||
|
-- Workers poll this table after each task to check for commands
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_commands (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id TEXT NOT NULL,
|
||||||
|
command TEXT NOT NULL, -- 'decommission', 'pause', 'resume'
|
||||||
|
reason TEXT,
|
||||||
|
issued_by TEXT,
|
||||||
|
issued_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
acknowledged_at TIMESTAMPTZ,
|
||||||
|
executed_at TIMESTAMPTZ,
|
||||||
|
status TEXT DEFAULT 'pending' -- 'pending', 'acknowledged', 'executed', 'cancelled'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for worker lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_commands_worker_id ON worker_commands(worker_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_commands_pending ON worker_commands(worker_id, status) WHERE status = 'pending';
|
||||||
|
|
||||||
|
-- Add decommission_requested column to worker_registry for quick checks
|
||||||
|
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_reason TEXT;
|
||||||
|
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- Comment
|
||||||
|
COMMENT ON TABLE worker_commands IS 'Commands issued to workers (decommission after task, pause, etc.)';
|
||||||
322
backend/migrations/074_worker_task_queue.sql
Normal file
322
backend/migrations/074_worker_task_queue.sql
Normal file
@@ -0,0 +1,322 @@
|
|||||||
|
-- Migration 074: Worker Task Queue System
|
||||||
|
-- Implements role-based task queue with per-store locking and capacity tracking
|
||||||
|
|
||||||
|
-- Task queue table
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_tasks (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Task identification
|
||||||
|
role VARCHAR(50) NOT NULL, -- store_discovery, entry_point_discovery, product_discovery, product_resync, analytics_refresh
|
||||||
|
dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
platform VARCHAR(20), -- dutchie, jane, treez, etc.
|
||||||
|
|
||||||
|
-- Task state
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'pending',
|
||||||
|
priority INTEGER DEFAULT 0, -- Higher = more urgent
|
||||||
|
|
||||||
|
-- Scheduling
|
||||||
|
scheduled_for TIMESTAMPTZ, -- For batch scheduling (e.g., every 4 hours)
|
||||||
|
|
||||||
|
-- Ownership
|
||||||
|
worker_id VARCHAR(100), -- Pod name or worker ID
|
||||||
|
claimed_at TIMESTAMPTZ,
|
||||||
|
started_at TIMESTAMPTZ,
|
||||||
|
completed_at TIMESTAMPTZ,
|
||||||
|
last_heartbeat_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
-- Results
|
||||||
|
result JSONB, -- Task output data
|
||||||
|
error_message TEXT,
|
||||||
|
retry_count INTEGER DEFAULT 0,
|
||||||
|
max_retries INTEGER DEFAULT 3,
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Constraints
|
||||||
|
CONSTRAINT valid_status CHECK (status IN ('pending', 'claimed', 'running', 'completed', 'failed', 'stale'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for efficient task claiming
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_pending
|
||||||
|
ON worker_tasks(role, priority DESC, created_at ASC)
|
||||||
|
WHERE status = 'pending';
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_claimed
|
||||||
|
ON worker_tasks(worker_id, claimed_at)
|
||||||
|
WHERE status = 'claimed';
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_running
|
||||||
|
ON worker_tasks(worker_id, last_heartbeat_at)
|
||||||
|
WHERE status = 'running';
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_dispensary
|
||||||
|
ON worker_tasks(dispensary_id)
|
||||||
|
WHERE dispensary_id IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_scheduled
|
||||||
|
ON worker_tasks(scheduled_for)
|
||||||
|
WHERE status = 'pending' AND scheduled_for IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_history
|
||||||
|
ON worker_tasks(role, completed_at DESC)
|
||||||
|
WHERE status IN ('completed', 'failed');
|
||||||
|
|
||||||
|
-- Partial unique index to prevent duplicate active tasks per store
|
||||||
|
-- Only one task can be claimed/running for a given dispensary at a time
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_tasks_unique_active_store
|
||||||
|
ON worker_tasks(dispensary_id)
|
||||||
|
WHERE status IN ('claimed', 'running') AND dispensary_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Worker registration table (tracks active workers)
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_registry (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id VARCHAR(100) UNIQUE NOT NULL,
|
||||||
|
role VARCHAR(50) NOT NULL,
|
||||||
|
pod_name VARCHAR(100),
|
||||||
|
hostname VARCHAR(100),
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
tasks_completed INTEGER DEFAULT 0,
|
||||||
|
tasks_failed INTEGER DEFAULT 0,
|
||||||
|
status VARCHAR(20) DEFAULT 'active',
|
||||||
|
|
||||||
|
CONSTRAINT valid_worker_status CHECK (status IN ('active', 'idle', 'offline'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_role
|
||||||
|
ON worker_registry(role, status);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat
|
||||||
|
ON worker_registry(last_heartbeat_at)
|
||||||
|
WHERE status = 'active';
|
||||||
|
|
||||||
|
-- Task completion tracking (summarized history)
|
||||||
|
CREATE TABLE IF NOT EXISTS task_completion_log (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
role VARCHAR(50) NOT NULL,
|
||||||
|
date DATE NOT NULL DEFAULT CURRENT_DATE,
|
||||||
|
hour INTEGER NOT NULL DEFAULT EXTRACT(HOUR FROM NOW()),
|
||||||
|
|
||||||
|
tasks_created INTEGER DEFAULT 0,
|
||||||
|
tasks_completed INTEGER DEFAULT 0,
|
||||||
|
tasks_failed INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
avg_duration_sec NUMERIC(10,2),
|
||||||
|
min_duration_sec NUMERIC(10,2),
|
||||||
|
max_duration_sec NUMERIC(10,2),
|
||||||
|
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(role, date, hour)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Capacity planning view
|
||||||
|
CREATE OR REPLACE VIEW v_worker_capacity AS
|
||||||
|
SELECT
|
||||||
|
role,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending') as pending_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending' AND (scheduled_for IS NULL OR scheduled_for <= NOW())) as ready_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'claimed') as claimed_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'running') as running_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') as completed_last_hour,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '1 hour') as failed_last_hour,
|
||||||
|
COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) as active_workers,
|
||||||
|
AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||||
|
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') as avg_duration_sec,
|
||||||
|
-- Capacity planning metrics
|
||||||
|
CASE
|
||||||
|
WHEN COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') > 0
|
||||||
|
THEN 3600.0 / NULLIF(AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||||
|
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour'), 0)
|
||||||
|
ELSE NULL
|
||||||
|
END as tasks_per_worker_hour,
|
||||||
|
-- Estimated time to drain queue
|
||||||
|
CASE
|
||||||
|
WHEN COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) > 0
|
||||||
|
AND COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') > 0
|
||||||
|
THEN COUNT(*) FILTER (WHERE status = 'pending') / NULLIF(
|
||||||
|
COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) *
|
||||||
|
(3600.0 / NULLIF(AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||||
|
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour'), 0)),
|
||||||
|
0
|
||||||
|
)
|
||||||
|
ELSE NULL
|
||||||
|
END as estimated_hours_to_drain
|
||||||
|
FROM worker_tasks
|
||||||
|
GROUP BY role;
|
||||||
|
|
||||||
|
-- Task history view (for UI)
|
||||||
|
CREATE OR REPLACE VIEW v_task_history AS
|
||||||
|
SELECT
|
||||||
|
t.id,
|
||||||
|
t.role,
|
||||||
|
t.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
t.platform,
|
||||||
|
t.status,
|
||||||
|
t.priority,
|
||||||
|
t.worker_id,
|
||||||
|
t.scheduled_for,
|
||||||
|
t.claimed_at,
|
||||||
|
t.started_at,
|
||||||
|
t.completed_at,
|
||||||
|
t.error_message,
|
||||||
|
t.retry_count,
|
||||||
|
t.created_at,
|
||||||
|
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
|
||||||
|
FROM worker_tasks t
|
||||||
|
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
||||||
|
ORDER BY t.created_at DESC;
|
||||||
|
|
||||||
|
-- Function to claim a task atomically
|
||||||
|
CREATE OR REPLACE FUNCTION claim_task(
|
||||||
|
p_role VARCHAR(50),
|
||||||
|
p_worker_id VARCHAR(100)
|
||||||
|
) RETURNS worker_tasks AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_task worker_tasks;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_tasks
|
||||||
|
WHERE role = p_role
|
||||||
|
AND status = 'pending'
|
||||||
|
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||||
|
-- Exclude stores that already have an active task
|
||||||
|
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||||
|
SELECT dispensary_id FROM worker_tasks
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND dispensary_id IS NOT NULL
|
||||||
|
))
|
||||||
|
ORDER BY priority DESC, created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_task;
|
||||||
|
|
||||||
|
RETURN claimed_task;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to mark stale tasks (workers that died)
|
||||||
|
CREATE OR REPLACE FUNCTION recover_stale_tasks(
|
||||||
|
stale_threshold_minutes INTEGER DEFAULT 10
|
||||||
|
) RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
recovered_count INTEGER;
|
||||||
|
BEGIN
|
||||||
|
WITH stale AS (
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'pending',
|
||||||
|
worker_id = NULL,
|
||||||
|
claimed_at = NULL,
|
||||||
|
started_at = NULL,
|
||||||
|
retry_count = retry_count + 1,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
|
||||||
|
AND retry_count < max_retries
|
||||||
|
RETURNING id
|
||||||
|
)
|
||||||
|
SELECT COUNT(*) INTO recovered_count FROM stale;
|
||||||
|
|
||||||
|
-- Mark tasks that exceeded retries as failed
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'failed',
|
||||||
|
error_message = 'Exceeded max retries after worker failures',
|
||||||
|
completed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
|
||||||
|
AND retry_count >= max_retries;
|
||||||
|
|
||||||
|
RETURN recovered_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to generate daily resync tasks
|
||||||
|
CREATE OR REPLACE FUNCTION generate_resync_tasks(
|
||||||
|
p_batches_per_day INTEGER DEFAULT 6, -- Every 4 hours
|
||||||
|
p_date DATE DEFAULT CURRENT_DATE
|
||||||
|
) RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
store_count INTEGER;
|
||||||
|
stores_per_batch INTEGER;
|
||||||
|
batch_num INTEGER;
|
||||||
|
scheduled_time TIMESTAMPTZ;
|
||||||
|
created_count INTEGER := 0;
|
||||||
|
BEGIN
|
||||||
|
-- Count active stores that need resync
|
||||||
|
SELECT COUNT(*) INTO store_count
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE crawl_enabled = true
|
||||||
|
AND menu_type = 'dutchie'
|
||||||
|
AND platform_dispensary_id IS NOT NULL;
|
||||||
|
|
||||||
|
IF store_count = 0 THEN
|
||||||
|
RETURN 0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
stores_per_batch := CEIL(store_count::NUMERIC / p_batches_per_day);
|
||||||
|
|
||||||
|
FOR batch_num IN 0..(p_batches_per_day - 1) LOOP
|
||||||
|
scheduled_time := p_date + (batch_num * 4 || ' hours')::INTERVAL;
|
||||||
|
|
||||||
|
INSERT INTO worker_tasks (role, dispensary_id, platform, scheduled_for, priority)
|
||||||
|
SELECT
|
||||||
|
'product_resync',
|
||||||
|
d.id,
|
||||||
|
'dutchie',
|
||||||
|
scheduled_time,
|
||||||
|
0
|
||||||
|
FROM (
|
||||||
|
SELECT id, ROW_NUMBER() OVER (ORDER BY id) as rn
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE crawl_enabled = true
|
||||||
|
AND menu_type = 'dutchie'
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
|
) d
|
||||||
|
WHERE d.rn > (batch_num * stores_per_batch)
|
||||||
|
AND d.rn <= ((batch_num + 1) * stores_per_batch)
|
||||||
|
ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
|
GET DIAGNOSTICS created_count = created_count + ROW_COUNT;
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
RETURN created_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Trigger to update timestamp
|
||||||
|
CREATE OR REPLACE FUNCTION update_worker_tasks_timestamp()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.updated_at = NOW();
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS worker_tasks_updated_at ON worker_tasks;
|
||||||
|
CREATE TRIGGER worker_tasks_updated_at
|
||||||
|
BEFORE UPDATE ON worker_tasks
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_worker_tasks_timestamp();
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON TABLE worker_tasks IS 'Central task queue for all worker roles';
|
||||||
|
COMMENT ON TABLE worker_registry IS 'Registry of active workers and their stats';
|
||||||
|
COMMENT ON TABLE task_completion_log IS 'Hourly aggregated task completion metrics';
|
||||||
|
COMMENT ON VIEW v_worker_capacity IS 'Real-time capacity planning metrics per role';
|
||||||
|
COMMENT ON VIEW v_task_history IS 'Task history with dispensary details for UI';
|
||||||
|
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task for a worker, respecting per-store locking';
|
||||||
|
COMMENT ON FUNCTION recover_stale_tasks IS 'Release tasks from dead workers back to pending';
|
||||||
|
COMMENT ON FUNCTION generate_resync_tasks IS 'Generate daily product resync tasks in batches';
|
||||||
13
backend/migrations/075_consecutive_misses.sql
Normal file
13
backend/migrations/075_consecutive_misses.sql
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
-- Migration 075: Add consecutive_misses column to store_products
|
||||||
|
-- Used to track how many consecutive crawls a product has been missing from the feed
|
||||||
|
-- After 3 consecutive misses, product is marked as OOS
|
||||||
|
|
||||||
|
ALTER TABLE store_products
|
||||||
|
ADD COLUMN IF NOT EXISTS consecutive_misses INTEGER NOT NULL DEFAULT 0;
|
||||||
|
|
||||||
|
-- Index for finding products that need OOS check
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_consecutive_misses
|
||||||
|
ON store_products (dispensary_id, consecutive_misses)
|
||||||
|
WHERE consecutive_misses > 0;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN store_products.consecutive_misses IS 'Number of consecutive crawls where product was not in feed. Reset to 0 when seen. At 3, mark OOS.';
|
||||||
71
backend/migrations/076_visitor_analytics.sql
Normal file
71
backend/migrations/076_visitor_analytics.sql
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
-- Visitor location analytics for Findagram
|
||||||
|
-- Tracks visitor locations to understand popular areas
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS visitor_locations (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Location data (from IP lookup)
|
||||||
|
ip_hash VARCHAR(64), -- Hashed IP for privacy (SHA256)
|
||||||
|
city VARCHAR(100),
|
||||||
|
state VARCHAR(100),
|
||||||
|
state_code VARCHAR(10),
|
||||||
|
country VARCHAR(100),
|
||||||
|
country_code VARCHAR(10),
|
||||||
|
latitude DECIMAL(10, 7),
|
||||||
|
longitude DECIMAL(10, 7),
|
||||||
|
|
||||||
|
-- Visit metadata
|
||||||
|
domain VARCHAR(50) NOT NULL, -- 'findagram.co', 'findadispo.com', etc.
|
||||||
|
page_path VARCHAR(255), -- '/products', '/dispensaries/123', etc.
|
||||||
|
referrer VARCHAR(500),
|
||||||
|
user_agent VARCHAR(500),
|
||||||
|
|
||||||
|
-- Session tracking
|
||||||
|
session_id VARCHAR(64), -- For grouping page views in a session
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for analytics queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_domain ON visitor_locations(domain);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_city_state ON visitor_locations(city, state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_created_at ON visitor_locations(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_session ON visitor_locations(session_id);
|
||||||
|
|
||||||
|
-- Aggregated daily stats (materialized for performance)
|
||||||
|
CREATE TABLE IF NOT EXISTS visitor_location_stats (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
date DATE NOT NULL,
|
||||||
|
domain VARCHAR(50) NOT NULL,
|
||||||
|
city VARCHAR(100),
|
||||||
|
state VARCHAR(100),
|
||||||
|
state_code VARCHAR(10),
|
||||||
|
country_code VARCHAR(10),
|
||||||
|
|
||||||
|
-- Metrics
|
||||||
|
visit_count INTEGER DEFAULT 0,
|
||||||
|
unique_sessions INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
UNIQUE(date, domain, city, state_code, country_code)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_date ON visitor_location_stats(date);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_domain ON visitor_location_stats(domain);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_state ON visitor_location_stats(state_code);
|
||||||
|
|
||||||
|
-- View for easy querying of top locations
|
||||||
|
CREATE OR REPLACE VIEW v_top_visitor_locations AS
|
||||||
|
SELECT
|
||||||
|
domain,
|
||||||
|
city,
|
||||||
|
state,
|
||||||
|
state_code,
|
||||||
|
country_code,
|
||||||
|
COUNT(*) as total_visits,
|
||||||
|
COUNT(DISTINCT session_id) as unique_sessions,
|
||||||
|
MAX(created_at) as last_visit
|
||||||
|
FROM visitor_locations
|
||||||
|
WHERE created_at > NOW() - INTERVAL '30 days'
|
||||||
|
GROUP BY domain, city, state, state_code, country_code
|
||||||
|
ORDER BY total_visits DESC;
|
||||||
141
backend/migrations/076_worker_registry.sql
Normal file
141
backend/migrations/076_worker_registry.sql
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
-- Migration 076: Worker Registry for Dynamic Workers
|
||||||
|
-- Workers register on startup, receive a friendly name, and report heartbeats
|
||||||
|
|
||||||
|
-- Name pool for workers (expandable, no hardcoding)
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_name_pool (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(50) UNIQUE NOT NULL,
|
||||||
|
in_use BOOLEAN DEFAULT FALSE,
|
||||||
|
assigned_to VARCHAR(100), -- worker_id
|
||||||
|
assigned_at TIMESTAMPTZ,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Seed with initial names (can add more via API)
|
||||||
|
INSERT INTO worker_name_pool (name) VALUES
|
||||||
|
('Alice'), ('Bella'), ('Clara'), ('Diana'), ('Elena'),
|
||||||
|
('Fiona'), ('Grace'), ('Hazel'), ('Iris'), ('Julia'),
|
||||||
|
('Katie'), ('Luna'), ('Mia'), ('Nora'), ('Olive'),
|
||||||
|
('Pearl'), ('Quinn'), ('Rosa'), ('Sara'), ('Tara'),
|
||||||
|
('Uma'), ('Vera'), ('Wendy'), ('Xena'), ('Yuki'), ('Zara'),
|
||||||
|
('Amber'), ('Blake'), ('Coral'), ('Dawn'), ('Echo'),
|
||||||
|
('Fleur'), ('Gem'), ('Haven'), ('Ivy'), ('Jade'),
|
||||||
|
('Kira'), ('Lotus'), ('Maple'), ('Nova'), ('Onyx'),
|
||||||
|
('Pixel'), ('Quest'), ('Raven'), ('Sage'), ('Terra'),
|
||||||
|
('Unity'), ('Violet'), ('Willow'), ('Xylo'), ('Yara'), ('Zen')
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
|
|
||||||
|
-- Worker registry - tracks active workers
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_registry (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id VARCHAR(100) UNIQUE NOT NULL, -- e.g., "pod-abc123" or uuid
|
||||||
|
friendly_name VARCHAR(50), -- assigned from pool
|
||||||
|
role VARCHAR(50) NOT NULL, -- task role
|
||||||
|
pod_name VARCHAR(100), -- k8s pod name
|
||||||
|
hostname VARCHAR(100), -- machine hostname
|
||||||
|
ip_address VARCHAR(50), -- worker IP
|
||||||
|
status VARCHAR(20) DEFAULT 'starting', -- starting, active, idle, offline, terminated
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_task_at TIMESTAMPTZ,
|
||||||
|
tasks_completed INTEGER DEFAULT 0,
|
||||||
|
tasks_failed INTEGER DEFAULT 0,
|
||||||
|
current_task_id INTEGER,
|
||||||
|
metadata JSONB DEFAULT '{}',
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for worker registry
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_status ON worker_registry(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_role ON worker_registry(role);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat ON worker_registry(last_heartbeat_at);
|
||||||
|
|
||||||
|
-- Function to assign a name to a new worker
|
||||||
|
CREATE OR REPLACE FUNCTION assign_worker_name(p_worker_id VARCHAR(100))
|
||||||
|
RETURNS VARCHAR(50) AS $$
|
||||||
|
DECLARE
|
||||||
|
v_name VARCHAR(50);
|
||||||
|
BEGIN
|
||||||
|
-- Try to get an unused name
|
||||||
|
UPDATE worker_name_pool
|
||||||
|
SET in_use = TRUE, assigned_to = p_worker_id, assigned_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_name_pool
|
||||||
|
WHERE in_use = FALSE
|
||||||
|
ORDER BY RANDOM()
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING name INTO v_name;
|
||||||
|
|
||||||
|
-- If no names available, generate one
|
||||||
|
IF v_name IS NULL THEN
|
||||||
|
v_name := 'Worker-' || SUBSTRING(p_worker_id FROM 1 FOR 8);
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN v_name;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to release a worker's name back to the pool
|
||||||
|
CREATE OR REPLACE FUNCTION release_worker_name(p_worker_id VARCHAR(100))
|
||||||
|
RETURNS VOID AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_name_pool
|
||||||
|
SET in_use = FALSE, assigned_to = NULL, assigned_at = NULL
|
||||||
|
WHERE assigned_to = p_worker_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to mark stale workers as offline
|
||||||
|
CREATE OR REPLACE FUNCTION mark_stale_workers(stale_threshold_minutes INTEGER DEFAULT 5)
|
||||||
|
RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
v_count INTEGER;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET status = 'offline', updated_at = NOW()
|
||||||
|
WHERE status IN ('active', 'idle', 'starting')
|
||||||
|
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
|
||||||
|
RETURNING COUNT(*) INTO v_count;
|
||||||
|
|
||||||
|
-- Release names from offline workers
|
||||||
|
PERFORM release_worker_name(worker_id)
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE status = 'offline'
|
||||||
|
AND last_heartbeat_at < NOW() - INTERVAL '30 minutes';
|
||||||
|
|
||||||
|
RETURN COALESCE(v_count, 0);
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- View for dashboard
|
||||||
|
CREATE OR REPLACE VIEW v_active_workers AS
|
||||||
|
SELECT
|
||||||
|
wr.id,
|
||||||
|
wr.worker_id,
|
||||||
|
wr.friendly_name,
|
||||||
|
wr.role,
|
||||||
|
wr.status,
|
||||||
|
wr.pod_name,
|
||||||
|
wr.hostname,
|
||||||
|
wr.started_at,
|
||||||
|
wr.last_heartbeat_at,
|
||||||
|
wr.last_task_at,
|
||||||
|
wr.tasks_completed,
|
||||||
|
wr.tasks_failed,
|
||||||
|
wr.current_task_id,
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
|
CASE
|
||||||
|
WHEN wr.status = 'offline' THEN 'offline'
|
||||||
|
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
|
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||||
|
ELSE 'ready'
|
||||||
|
END as health_status
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.status != 'terminated'
|
||||||
|
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||||
|
|
||||||
|
COMMENT ON TABLE worker_registry IS 'Tracks all workers that have registered with the system';
|
||||||
|
COMMENT ON TABLE worker_name_pool IS 'Pool of friendly names for workers - expandable via API';
|
||||||
35
backend/migrations/077_click_events_location.sql
Normal file
35
backend/migrations/077_click_events_location.sql
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
-- Migration: Add visitor location and dispensary name to click events
|
||||||
|
-- Captures where visitors are clicking from and which dispensary
|
||||||
|
|
||||||
|
-- Add visitor location columns
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_city VARCHAR(100);
|
||||||
|
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_state VARCHAR(10);
|
||||||
|
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_lat DECIMAL(10, 7);
|
||||||
|
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_lng DECIMAL(10, 7);
|
||||||
|
|
||||||
|
-- Add dispensary name for easier reporting
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS dispensary_name VARCHAR(255);
|
||||||
|
|
||||||
|
-- Create index for location-based analytics
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_state
|
||||||
|
ON product_click_events(visitor_state)
|
||||||
|
WHERE visitor_state IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_city
|
||||||
|
ON product_click_events(visitor_city)
|
||||||
|
WHERE visitor_city IS NOT NULL;
|
||||||
|
|
||||||
|
-- Add comments
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_city IS 'City where the visitor is located (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_state IS 'State where the visitor is located (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_lat IS 'Visitor latitude (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_lng IS 'Visitor longitude (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.dispensary_name IS 'Name of the dispensary (denormalized for easier reporting)';
|
||||||
8
backend/migrations/078_proxy_consecutive_403.sql
Normal file
8
backend/migrations/078_proxy_consecutive_403.sql
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
-- Migration 078: Add consecutive_403_count to proxies table
|
||||||
|
-- Per workflow-12102025.md: Track consecutive 403s per proxy
|
||||||
|
-- After 3 consecutive 403s with different fingerprints → disable proxy
|
||||||
|
|
||||||
|
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS consecutive_403_count INTEGER DEFAULT 0;
|
||||||
|
|
||||||
|
-- Add comment explaining the column
|
||||||
|
COMMENT ON COLUMN proxies.consecutive_403_count IS 'Tracks consecutive 403 blocks. Reset to 0 on success. Proxy disabled at 3.';
|
||||||
49
backend/migrations/079_task_schedules.sql
Normal file
49
backend/migrations/079_task_schedules.sql
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
-- Migration 079: Task Schedules for Database-Driven Scheduler
|
||||||
|
-- Per TASK_WORKFLOW_2024-12-10.md: Replaces node-cron with DB-driven scheduling
|
||||||
|
--
|
||||||
|
-- 2024-12-10: Created for reliable, multi-replica-safe task scheduling
|
||||||
|
|
||||||
|
-- task_schedules: Stores schedule definitions and state
|
||||||
|
CREATE TABLE IF NOT EXISTS task_schedules (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(100) NOT NULL UNIQUE,
|
||||||
|
role VARCHAR(50) NOT NULL, -- TaskRole: product_refresh, store_discovery, etc.
|
||||||
|
description TEXT,
|
||||||
|
|
||||||
|
-- Schedule configuration
|
||||||
|
enabled BOOLEAN DEFAULT TRUE,
|
||||||
|
interval_hours INTEGER NOT NULL DEFAULT 4,
|
||||||
|
priority INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
-- Optional scope filters
|
||||||
|
state_code VARCHAR(2), -- NULL = all states
|
||||||
|
platform VARCHAR(50), -- NULL = all platforms
|
||||||
|
|
||||||
|
-- Execution state (updated by scheduler)
|
||||||
|
last_run_at TIMESTAMPTZ,
|
||||||
|
next_run_at TIMESTAMPTZ,
|
||||||
|
last_task_count INTEGER DEFAULT 0,
|
||||||
|
last_error TEXT,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for scheduler queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_schedules_enabled ON task_schedules(enabled) WHERE enabled = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_schedules_next_run ON task_schedules(next_run_at) WHERE enabled = TRUE;
|
||||||
|
|
||||||
|
-- Insert default schedules
|
||||||
|
INSERT INTO task_schedules (name, role, interval_hours, priority, description, next_run_at)
|
||||||
|
VALUES
|
||||||
|
('product_refresh_all', 'product_refresh', 4, 0, 'Generate product refresh tasks for all crawl-enabled stores every 4 hours', NOW()),
|
||||||
|
('store_discovery_dutchie', 'store_discovery', 24, 5, 'Discover new Dutchie stores daily', NOW()),
|
||||||
|
('analytics_refresh', 'analytics_refresh', 6, 0, 'Refresh analytics materialized views every 6 hours', NOW())
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
|
|
||||||
|
-- Comment for documentation
|
||||||
|
COMMENT ON TABLE task_schedules IS 'Database-driven task scheduler configuration. Per TASK_WORKFLOW_2024-12-10.md:
|
||||||
|
- Schedules persist in DB (survive restarts)
|
||||||
|
- Uses SELECT FOR UPDATE SKIP LOCKED for multi-replica safety
|
||||||
|
- Scheduler polls every 60s and executes due schedules
|
||||||
|
- Creates tasks in worker_tasks for task-worker.ts to process';
|
||||||
58
backend/migrations/080_raw_crawl_payloads.sql
Normal file
58
backend/migrations/080_raw_crawl_payloads.sql
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
-- Migration 080: Raw Crawl Payloads Metadata Table
|
||||||
|
-- Per TASK_WORKFLOW_2024-12-10.md: Store full GraphQL payloads for historical analysis
|
||||||
|
--
|
||||||
|
-- Design Pattern: Metadata/Payload Separation
|
||||||
|
-- - Metadata (this table): Small, indexed, queryable
|
||||||
|
-- - Payload (filesystem): Gzipped JSON at storage_path
|
||||||
|
--
|
||||||
|
-- Benefits:
|
||||||
|
-- - Compare any two crawls to see what changed
|
||||||
|
-- - Replay/re-normalize historical data if logic changes
|
||||||
|
-- - Debug issues by seeing exactly what the API returned
|
||||||
|
-- - DB stays small, backups stay fast
|
||||||
|
--
|
||||||
|
-- Storage location: /storage/payloads/{year}/{month}/{day}/store_{id}_{timestamp}.json.gz
|
||||||
|
-- Compression: ~90% reduction (1.5MB -> 150KB per crawl)
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS raw_crawl_payloads (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Links to crawl tracking
|
||||||
|
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
|
||||||
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
|
||||||
|
-- File location (gzipped JSON)
|
||||||
|
storage_path TEXT NOT NULL,
|
||||||
|
|
||||||
|
-- Metadata for quick queries without loading file
|
||||||
|
product_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
size_bytes INTEGER, -- Compressed size
|
||||||
|
size_bytes_raw INTEGER, -- Uncompressed size
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Optional: checksum for integrity verification
|
||||||
|
checksum_sha256 VARCHAR(64)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary
|
||||||
|
ON raw_crawl_payloads(dispensary_id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary_fetched
|
||||||
|
ON raw_crawl_payloads(dispensary_id, fetched_at DESC);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_fetched
|
||||||
|
ON raw_crawl_payloads(fetched_at DESC);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_crawl_run
|
||||||
|
ON raw_crawl_payloads(crawl_run_id)
|
||||||
|
WHERE crawl_run_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON TABLE raw_crawl_payloads IS 'Metadata for raw GraphQL payloads stored on filesystem. Per TASK_WORKFLOW_2024-12-10.md: Full payloads enable historical diffs and replay.';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.storage_path IS 'Path to gzipped JSON file, e.g. /storage/payloads/2024/12/10/store_123_1702234567.json.gz';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.size_bytes IS 'Compressed file size in bytes';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.size_bytes_raw IS 'Uncompressed payload size in bytes';
|
||||||
37
backend/migrations/081_payload_fetch_columns.sql
Normal file
37
backend/migrations/081_payload_fetch_columns.sql
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
-- Migration 081: Payload Fetch Columns
|
||||||
|
-- Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing
|
||||||
|
--
|
||||||
|
-- New architecture:
|
||||||
|
-- - payload_fetch: Hits Dutchie API, saves raw payload to disk
|
||||||
|
-- - product_refresh: Reads local payload, normalizes, upserts to DB
|
||||||
|
--
|
||||||
|
-- This migration adds:
|
||||||
|
-- 1. payload column to worker_tasks (for task chaining data)
|
||||||
|
-- 2. processed_at column to raw_crawl_payloads (track when payload was processed)
|
||||||
|
-- 3. last_fetch_at column to dispensaries (track when last payload was fetched)
|
||||||
|
|
||||||
|
-- Add payload column to worker_tasks for task chaining
|
||||||
|
-- Used by payload_fetch to pass payload_id to product_refresh
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS payload JSONB DEFAULT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_tasks.payload IS 'Per TASK_WORKFLOW_2024-12-10.md: Task chaining data (e.g., payload_id from payload_fetch to product_refresh)';
|
||||||
|
|
||||||
|
-- Add processed_at to raw_crawl_payloads
|
||||||
|
-- Tracks when the payload was processed by product_refresh
|
||||||
|
ALTER TABLE raw_crawl_payloads
|
||||||
|
ADD COLUMN IF NOT EXISTS processed_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.processed_at IS 'When this payload was processed by product_refresh handler';
|
||||||
|
|
||||||
|
-- Index for finding unprocessed payloads
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_unprocessed
|
||||||
|
ON raw_crawl_payloads(dispensary_id, fetched_at DESC)
|
||||||
|
WHERE processed_at IS NULL;
|
||||||
|
|
||||||
|
-- Add last_fetch_at to dispensaries
|
||||||
|
-- Tracks when the last payload was fetched (separate from last_crawl_at which is when processing completed)
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_fetch_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN dispensaries.last_fetch_at IS 'Per TASK_WORKFLOW_2024-12-10.md: When last payload was fetched from API (separate from last_crawl_at which is when processing completed)';
|
||||||
27
backend/migrations/082_proxy_notification_trigger.sql
Normal file
27
backend/migrations/082_proxy_notification_trigger.sql
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
-- Migration: 082_proxy_notification_trigger
|
||||||
|
-- Date: 2024-12-11
|
||||||
|
-- Description: Add PostgreSQL NOTIFY trigger to alert workers when proxies are added
|
||||||
|
|
||||||
|
-- Create function to notify workers when active proxy is added/activated
|
||||||
|
CREATE OR REPLACE FUNCTION notify_proxy_added()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
-- Only notify if proxy is active
|
||||||
|
IF NEW.active = true THEN
|
||||||
|
PERFORM pg_notify('proxy_added', NEW.id::text);
|
||||||
|
END IF;
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Drop existing trigger if any
|
||||||
|
DROP TRIGGER IF EXISTS proxy_added_trigger ON proxies;
|
||||||
|
|
||||||
|
-- Create trigger on insert and update of active column
|
||||||
|
CREATE TRIGGER proxy_added_trigger
|
||||||
|
AFTER INSERT OR UPDATE OF active ON proxies
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION notify_proxy_added();
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION notify_proxy_added() IS
|
||||||
|
'Sends PostgreSQL NOTIFY to proxy_added channel when an active proxy is added or activated. Workers LISTEN on this channel to wake up immediately.';
|
||||||
305
backend/node_modules/.package-lock.json
generated
vendored
305
backend/node_modules/.package-lock.json
generated
vendored
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.5.1",
|
"version": "1.6.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
@@ -46,6 +46,97 @@
|
|||||||
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
||||||
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
||||||
},
|
},
|
||||||
|
"node_modules/@jsep-plugin/assignment": {
|
||||||
|
"version": "1.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
|
||||||
|
"integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"jsep": "^0.4.0||^1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@jsep-plugin/regex": {
|
||||||
|
"version": "1.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
|
||||||
|
"integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"jsep": "^0.4.0||^1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/js-yaml": "^4.0.1",
|
||||||
|
"@types/node": "^24.0.0",
|
||||||
|
"@types/node-fetch": "^2.6.13",
|
||||||
|
"@types/stream-buffers": "^3.0.3",
|
||||||
|
"form-data": "^4.0.0",
|
||||||
|
"hpagent": "^1.2.0",
|
||||||
|
"isomorphic-ws": "^5.0.0",
|
||||||
|
"js-yaml": "^4.1.0",
|
||||||
|
"jsonpath-plus": "^10.3.0",
|
||||||
|
"node-fetch": "^2.7.0",
|
||||||
|
"openid-client": "^6.1.3",
|
||||||
|
"rfc4648": "^1.3.0",
|
||||||
|
"socks-proxy-agent": "^8.0.4",
|
||||||
|
"stream-buffers": "^3.0.2",
|
||||||
|
"tar-fs": "^3.0.9",
|
||||||
|
"ws": "^8.18.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/@types/node": {
|
||||||
|
"version": "24.10.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
|
||||||
|
"integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"undici-types": "~7.16.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/tar-fs": {
|
||||||
|
"version": "3.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
|
||||||
|
"integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
|
||||||
|
"dependencies": {
|
||||||
|
"pump": "^3.0.0",
|
||||||
|
"tar-stream": "^3.1.5"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"bare-fs": "^4.0.1",
|
||||||
|
"bare-path": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/undici-types": {
|
||||||
|
"version": "7.16.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||||
|
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/ws": {
|
||||||
|
"version": "8.18.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
||||||
|
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bufferutil": "^4.0.1",
|
||||||
|
"utf-8-validate": ">=5.0.2"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bufferutil": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"utf-8-validate": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@mapbox/node-pre-gyp": {
|
"node_modules/@mapbox/node-pre-gyp": {
|
||||||
"version": "1.0.11",
|
"version": "1.0.11",
|
||||||
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
||||||
@@ -251,6 +342,11 @@
|
|||||||
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/js-yaml": {
|
||||||
|
"version": "4.0.9",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
||||||
|
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
|
||||||
|
},
|
||||||
"node_modules/@types/jsonwebtoken": {
|
"node_modules/@types/jsonwebtoken": {
|
||||||
"version": "9.0.10",
|
"version": "9.0.10",
|
||||||
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
||||||
@@ -276,7 +372,6 @@
|
|||||||
"version": "20.19.25",
|
"version": "20.19.25",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
||||||
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
||||||
"devOptional": true,
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~6.21.0"
|
"undici-types": "~6.21.0"
|
||||||
}
|
}
|
||||||
@@ -287,6 +382,15 @@
|
|||||||
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/node-fetch": {
|
||||||
|
"version": "2.6.13",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
|
||||||
|
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*",
|
||||||
|
"form-data": "^4.0.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/pg": {
|
"node_modules/@types/pg": {
|
||||||
"version": "8.15.6",
|
"version": "8.15.6",
|
||||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
||||||
@@ -340,6 +444,14 @@
|
|||||||
"@types/node": "*"
|
"@types/node": "*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/stream-buffers": {
|
||||||
|
"version": "3.0.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
|
||||||
|
"integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/uuid": {
|
"node_modules/@types/uuid": {
|
||||||
"version": "9.0.8",
|
"version": "9.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||||
@@ -520,6 +632,78 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/bare-fs": {
|
||||||
|
"version": "4.5.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
|
||||||
|
"integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-events": "^2.5.4",
|
||||||
|
"bare-path": "^3.0.0",
|
||||||
|
"bare-stream": "^2.6.4",
|
||||||
|
"bare-url": "^2.2.2",
|
||||||
|
"fast-fifo": "^1.3.2"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"bare": ">=1.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bare-buffer": "*"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bare-buffer": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-os": {
|
||||||
|
"version": "3.6.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
|
||||||
|
"integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
|
||||||
|
"optional": true,
|
||||||
|
"engines": {
|
||||||
|
"bare": ">=1.14.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-path": {
|
||||||
|
"version": "3.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
|
||||||
|
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-os": "^3.0.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-stream": {
|
||||||
|
"version": "2.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
|
||||||
|
"integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"streamx": "^2.21.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bare-buffer": "*",
|
||||||
|
"bare-events": "*"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bare-buffer": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"bare-events": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-url": {
|
||||||
|
"version": "2.3.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
|
||||||
|
"integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-path": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/base64-js": {
|
"node_modules/base64-js": {
|
||||||
"version": "1.5.1",
|
"version": "1.5.1",
|
||||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||||
@@ -1026,6 +1210,17 @@
|
|||||||
"url": "https://github.com/sponsors/fb55"
|
"url": "https://github.com/sponsors/fb55"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/csv-parser": {
|
||||||
|
"version": "3.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
|
||||||
|
"integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
|
||||||
|
"bin": {
|
||||||
|
"csv-parser": "bin/csv-parser"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/data-uri-to-buffer": {
|
"node_modules/data-uri-to-buffer": {
|
||||||
"version": "6.0.2",
|
"version": "6.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
||||||
@@ -2008,6 +2203,14 @@
|
|||||||
"node": ">=16.0.0"
|
"node": ">=16.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/hpagent": {
|
||||||
|
"version": "1.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
|
||||||
|
"integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=14"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/htmlparser2": {
|
"node_modules/htmlparser2": {
|
||||||
"version": "10.0.0",
|
"version": "10.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
||||||
@@ -2235,6 +2438,14 @@
|
|||||||
"node": ">= 12"
|
"node": ">= 12"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/ip2location-nodejs": {
|
||||||
|
"version": "9.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/ip2location-nodejs/-/ip2location-nodejs-9.7.0.tgz",
|
||||||
|
"integrity": "sha512-eQ4T5TXm1cx0+pQcRycPiuaiRuoDEMd9O89Be7Ugk555qi9UY9enXSznkkqr3kQRyUaXx7zj5dORC5LGTPOttA==",
|
||||||
|
"dependencies": {
|
||||||
|
"csv-parser": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/ipaddr.js": {
|
"node_modules/ipaddr.js": {
|
||||||
"version": "2.2.0",
|
"version": "2.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
||||||
@@ -2363,6 +2574,22 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/isomorphic-ws": {
|
||||||
|
"version": "5.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
|
||||||
|
"integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
|
||||||
|
"peerDependencies": {
|
||||||
|
"ws": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/jose": {
|
||||||
|
"version": "6.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
|
||||||
|
"integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/js-tokens": {
|
"node_modules/js-tokens": {
|
||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||||
@@ -2379,6 +2606,14 @@
|
|||||||
"js-yaml": "bin/js-yaml.js"
|
"js-yaml": "bin/js-yaml.js"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/jsep": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/json-parse-even-better-errors": {
|
"node_modules/json-parse-even-better-errors": {
|
||||||
"version": "2.3.1",
|
"version": "2.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
||||||
@@ -2400,6 +2635,23 @@
|
|||||||
"graceful-fs": "^4.1.6"
|
"graceful-fs": "^4.1.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/jsonpath-plus": {
|
||||||
|
"version": "10.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
|
||||||
|
"integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
|
||||||
|
"dependencies": {
|
||||||
|
"@jsep-plugin/assignment": "^1.3.0",
|
||||||
|
"@jsep-plugin/regex": "^1.0.4",
|
||||||
|
"jsep": "^1.4.0"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"jsonpath": "bin/jsonpath-cli.js",
|
||||||
|
"jsonpath-plus": "bin/jsonpath-cli.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/jsonwebtoken": {
|
"node_modules/jsonwebtoken": {
|
||||||
"version": "9.0.2",
|
"version": "9.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
||||||
@@ -2474,6 +2726,11 @@
|
|||||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/lodash.clonedeep": {
|
||||||
|
"version": "4.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
|
||||||
|
"integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
|
||||||
|
},
|
||||||
"node_modules/lodash.defaults": {
|
"node_modules/lodash.defaults": {
|
||||||
"version": "4.2.0",
|
"version": "4.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||||
@@ -2923,6 +3180,14 @@
|
|||||||
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/oauth4webapi": {
|
||||||
|
"version": "3.8.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
|
||||||
|
"integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/object-assign": {
|
"node_modules/object-assign": {
|
||||||
"version": "4.1.1",
|
"version": "4.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||||
@@ -2961,6 +3226,18 @@
|
|||||||
"wrappy": "1"
|
"wrappy": "1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/openid-client": {
|
||||||
|
"version": "6.8.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
|
||||||
|
"integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
|
||||||
|
"dependencies": {
|
||||||
|
"jose": "^6.1.0",
|
||||||
|
"oauth4webapi": "^3.8.2"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/pac-proxy-agent": {
|
"node_modules/pac-proxy-agent": {
|
||||||
"version": "7.2.0",
|
"version": "7.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||||
@@ -3864,6 +4141,11 @@
|
|||||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/rfc4648": {
|
||||||
|
"version": "1.5.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
|
||||||
|
"integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
|
||||||
|
},
|
||||||
"node_modules/rimraf": {
|
"node_modules/rimraf": {
|
||||||
"version": "3.0.2",
|
"version": "3.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
||||||
@@ -4294,6 +4576,14 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/stream-buffers": {
|
||||||
|
"version": "3.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
|
||||||
|
"integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.10.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/streamx": {
|
"node_modules/streamx": {
|
||||||
"version": "2.23.0",
|
"version": "2.23.0",
|
||||||
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
||||||
@@ -4513,8 +4803,7 @@
|
|||||||
"node_modules/undici-types": {
|
"node_modules/undici-types": {
|
||||||
"version": "6.21.0",
|
"version": "6.21.0",
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
|
||||||
"devOptional": true
|
|
||||||
},
|
},
|
||||||
"node_modules/universalify": {
|
"node_modules/universalify": {
|
||||||
"version": "2.0.1",
|
"version": "2.0.1",
|
||||||
@@ -4537,6 +4826,14 @@
|
|||||||
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
||||||
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/user-agents": {
|
||||||
|
"version": "1.1.669",
|
||||||
|
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
|
||||||
|
"integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
|
||||||
|
"dependencies": {
|
||||||
|
"lodash.clonedeep": "^4.5.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/util": {
|
"node_modules/util": {
|
||||||
"version": "0.12.5",
|
"version": "0.12.5",
|
||||||
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
||||||
|
|||||||
310
backend/package-lock.json
generated
310
backend/package-lock.json
generated
@@ -1,13 +1,14 @@
|
|||||||
{
|
{
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.5.1",
|
"version": "1.6.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.5.1",
|
"version": "1.6.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@kubernetes/client-node": "^1.4.0",
|
||||||
"@types/bcryptjs": "^3.0.0",
|
"@types/bcryptjs": "^3.0.0",
|
||||||
"axios": "^1.6.2",
|
"axios": "^1.6.2",
|
||||||
"bcrypt": "^5.1.1",
|
"bcrypt": "^5.1.1",
|
||||||
@@ -21,6 +22,7 @@
|
|||||||
"helmet": "^7.1.0",
|
"helmet": "^7.1.0",
|
||||||
"https-proxy-agent": "^7.0.2",
|
"https-proxy-agent": "^7.0.2",
|
||||||
"ioredis": "^5.8.2",
|
"ioredis": "^5.8.2",
|
||||||
|
"ip2location-nodejs": "^9.7.0",
|
||||||
"ipaddr.js": "^2.2.0",
|
"ipaddr.js": "^2.2.0",
|
||||||
"jsonwebtoken": "^9.0.2",
|
"jsonwebtoken": "^9.0.2",
|
||||||
"minio": "^7.1.3",
|
"minio": "^7.1.3",
|
||||||
@@ -33,6 +35,7 @@
|
|||||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||||
"sharp": "^0.32.0",
|
"sharp": "^0.32.0",
|
||||||
"socks-proxy-agent": "^8.0.2",
|
"socks-proxy-agent": "^8.0.2",
|
||||||
|
"user-agents": "^1.1.669",
|
||||||
"uuid": "^9.0.1",
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.22.4"
|
"zod": "^3.22.4"
|
||||||
},
|
},
|
||||||
@@ -491,6 +494,97 @@
|
|||||||
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
||||||
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
||||||
},
|
},
|
||||||
|
"node_modules/@jsep-plugin/assignment": {
|
||||||
|
"version": "1.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
|
||||||
|
"integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"jsep": "^0.4.0||^1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@jsep-plugin/regex": {
|
||||||
|
"version": "1.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
|
||||||
|
"integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"jsep": "^0.4.0||^1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/js-yaml": "^4.0.1",
|
||||||
|
"@types/node": "^24.0.0",
|
||||||
|
"@types/node-fetch": "^2.6.13",
|
||||||
|
"@types/stream-buffers": "^3.0.3",
|
||||||
|
"form-data": "^4.0.0",
|
||||||
|
"hpagent": "^1.2.0",
|
||||||
|
"isomorphic-ws": "^5.0.0",
|
||||||
|
"js-yaml": "^4.1.0",
|
||||||
|
"jsonpath-plus": "^10.3.0",
|
||||||
|
"node-fetch": "^2.7.0",
|
||||||
|
"openid-client": "^6.1.3",
|
||||||
|
"rfc4648": "^1.3.0",
|
||||||
|
"socks-proxy-agent": "^8.0.4",
|
||||||
|
"stream-buffers": "^3.0.2",
|
||||||
|
"tar-fs": "^3.0.9",
|
||||||
|
"ws": "^8.18.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/@types/node": {
|
||||||
|
"version": "24.10.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
|
||||||
|
"integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"undici-types": "~7.16.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/tar-fs": {
|
||||||
|
"version": "3.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
|
||||||
|
"integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
|
||||||
|
"dependencies": {
|
||||||
|
"pump": "^3.0.0",
|
||||||
|
"tar-stream": "^3.1.5"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"bare-fs": "^4.0.1",
|
||||||
|
"bare-path": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/undici-types": {
|
||||||
|
"version": "7.16.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||||
|
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/ws": {
|
||||||
|
"version": "8.18.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
||||||
|
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bufferutil": "^4.0.1",
|
||||||
|
"utf-8-validate": ">=5.0.2"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bufferutil": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"utf-8-validate": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@mapbox/node-pre-gyp": {
|
"node_modules/@mapbox/node-pre-gyp": {
|
||||||
"version": "1.0.11",
|
"version": "1.0.11",
|
||||||
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
||||||
@@ -756,6 +850,11 @@
|
|||||||
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/js-yaml": {
|
||||||
|
"version": "4.0.9",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
||||||
|
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
|
||||||
|
},
|
||||||
"node_modules/@types/jsonwebtoken": {
|
"node_modules/@types/jsonwebtoken": {
|
||||||
"version": "9.0.10",
|
"version": "9.0.10",
|
||||||
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
||||||
@@ -781,7 +880,6 @@
|
|||||||
"version": "20.19.25",
|
"version": "20.19.25",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
||||||
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
||||||
"devOptional": true,
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~6.21.0"
|
"undici-types": "~6.21.0"
|
||||||
}
|
}
|
||||||
@@ -792,6 +890,15 @@
|
|||||||
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/node-fetch": {
|
||||||
|
"version": "2.6.13",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
|
||||||
|
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*",
|
||||||
|
"form-data": "^4.0.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/pg": {
|
"node_modules/@types/pg": {
|
||||||
"version": "8.15.6",
|
"version": "8.15.6",
|
||||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
||||||
@@ -845,6 +952,14 @@
|
|||||||
"@types/node": "*"
|
"@types/node": "*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/stream-buffers": {
|
||||||
|
"version": "3.0.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
|
||||||
|
"integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/uuid": {
|
"node_modules/@types/uuid": {
|
||||||
"version": "9.0.8",
|
"version": "9.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||||
@@ -1025,6 +1140,78 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/bare-fs": {
|
||||||
|
"version": "4.5.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
|
||||||
|
"integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-events": "^2.5.4",
|
||||||
|
"bare-path": "^3.0.0",
|
||||||
|
"bare-stream": "^2.6.4",
|
||||||
|
"bare-url": "^2.2.2",
|
||||||
|
"fast-fifo": "^1.3.2"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"bare": ">=1.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bare-buffer": "*"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bare-buffer": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-os": {
|
||||||
|
"version": "3.6.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
|
||||||
|
"integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
|
||||||
|
"optional": true,
|
||||||
|
"engines": {
|
||||||
|
"bare": ">=1.14.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-path": {
|
||||||
|
"version": "3.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
|
||||||
|
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-os": "^3.0.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-stream": {
|
||||||
|
"version": "2.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
|
||||||
|
"integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"streamx": "^2.21.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bare-buffer": "*",
|
||||||
|
"bare-events": "*"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bare-buffer": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"bare-events": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-url": {
|
||||||
|
"version": "2.3.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
|
||||||
|
"integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-path": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/base64-js": {
|
"node_modules/base64-js": {
|
||||||
"version": "1.5.1",
|
"version": "1.5.1",
|
||||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||||
@@ -1531,6 +1718,17 @@
|
|||||||
"url": "https://github.com/sponsors/fb55"
|
"url": "https://github.com/sponsors/fb55"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/csv-parser": {
|
||||||
|
"version": "3.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
|
||||||
|
"integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
|
||||||
|
"bin": {
|
||||||
|
"csv-parser": "bin/csv-parser"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/data-uri-to-buffer": {
|
"node_modules/data-uri-to-buffer": {
|
||||||
"version": "6.0.2",
|
"version": "6.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
||||||
@@ -2527,6 +2725,14 @@
|
|||||||
"node": ">=16.0.0"
|
"node": ">=16.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/hpagent": {
|
||||||
|
"version": "1.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
|
||||||
|
"integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=14"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/htmlparser2": {
|
"node_modules/htmlparser2": {
|
||||||
"version": "10.0.0",
|
"version": "10.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
||||||
@@ -2754,6 +2960,14 @@
|
|||||||
"node": ">= 12"
|
"node": ">= 12"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/ip2location-nodejs": {
|
||||||
|
"version": "9.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/ip2location-nodejs/-/ip2location-nodejs-9.7.0.tgz",
|
||||||
|
"integrity": "sha512-eQ4T5TXm1cx0+pQcRycPiuaiRuoDEMd9O89Be7Ugk555qi9UY9enXSznkkqr3kQRyUaXx7zj5dORC5LGTPOttA==",
|
||||||
|
"dependencies": {
|
||||||
|
"csv-parser": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/ipaddr.js": {
|
"node_modules/ipaddr.js": {
|
||||||
"version": "2.2.0",
|
"version": "2.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
||||||
@@ -2882,6 +3096,22 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/isomorphic-ws": {
|
||||||
|
"version": "5.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
|
||||||
|
"integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
|
||||||
|
"peerDependencies": {
|
||||||
|
"ws": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/jose": {
|
||||||
|
"version": "6.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
|
||||||
|
"integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/js-tokens": {
|
"node_modules/js-tokens": {
|
||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||||
@@ -2898,6 +3128,14 @@
|
|||||||
"js-yaml": "bin/js-yaml.js"
|
"js-yaml": "bin/js-yaml.js"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/jsep": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/json-parse-even-better-errors": {
|
"node_modules/json-parse-even-better-errors": {
|
||||||
"version": "2.3.1",
|
"version": "2.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
||||||
@@ -2919,6 +3157,23 @@
|
|||||||
"graceful-fs": "^4.1.6"
|
"graceful-fs": "^4.1.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/jsonpath-plus": {
|
||||||
|
"version": "10.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
|
||||||
|
"integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
|
||||||
|
"dependencies": {
|
||||||
|
"@jsep-plugin/assignment": "^1.3.0",
|
||||||
|
"@jsep-plugin/regex": "^1.0.4",
|
||||||
|
"jsep": "^1.4.0"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"jsonpath": "bin/jsonpath-cli.js",
|
||||||
|
"jsonpath-plus": "bin/jsonpath-cli.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/jsonwebtoken": {
|
"node_modules/jsonwebtoken": {
|
||||||
"version": "9.0.2",
|
"version": "9.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
||||||
@@ -2993,6 +3248,11 @@
|
|||||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/lodash.clonedeep": {
|
||||||
|
"version": "4.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
|
||||||
|
"integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
|
||||||
|
},
|
||||||
"node_modules/lodash.defaults": {
|
"node_modules/lodash.defaults": {
|
||||||
"version": "4.2.0",
|
"version": "4.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||||
@@ -3442,6 +3702,14 @@
|
|||||||
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/oauth4webapi": {
|
||||||
|
"version": "3.8.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
|
||||||
|
"integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/object-assign": {
|
"node_modules/object-assign": {
|
||||||
"version": "4.1.1",
|
"version": "4.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||||
@@ -3480,6 +3748,18 @@
|
|||||||
"wrappy": "1"
|
"wrappy": "1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/openid-client": {
|
||||||
|
"version": "6.8.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
|
||||||
|
"integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
|
||||||
|
"dependencies": {
|
||||||
|
"jose": "^6.1.0",
|
||||||
|
"oauth4webapi": "^3.8.2"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/pac-proxy-agent": {
|
"node_modules/pac-proxy-agent": {
|
||||||
"version": "7.2.0",
|
"version": "7.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||||
@@ -4396,6 +4676,11 @@
|
|||||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/rfc4648": {
|
||||||
|
"version": "1.5.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
|
||||||
|
"integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
|
||||||
|
},
|
||||||
"node_modules/rimraf": {
|
"node_modules/rimraf": {
|
||||||
"version": "3.0.2",
|
"version": "3.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
||||||
@@ -4826,6 +5111,14 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/stream-buffers": {
|
||||||
|
"version": "3.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
|
||||||
|
"integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.10.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/streamx": {
|
"node_modules/streamx": {
|
||||||
"version": "2.23.0",
|
"version": "2.23.0",
|
||||||
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
||||||
@@ -5045,8 +5338,7 @@
|
|||||||
"node_modules/undici-types": {
|
"node_modules/undici-types": {
|
||||||
"version": "6.21.0",
|
"version": "6.21.0",
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
|
||||||
"devOptional": true
|
|
||||||
},
|
},
|
||||||
"node_modules/universalify": {
|
"node_modules/universalify": {
|
||||||
"version": "2.0.1",
|
"version": "2.0.1",
|
||||||
@@ -5069,6 +5361,14 @@
|
|||||||
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
||||||
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/user-agents": {
|
||||||
|
"version": "1.1.669",
|
||||||
|
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
|
||||||
|
"integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
|
||||||
|
"dependencies": {
|
||||||
|
"lodash.clonedeep": "^4.5.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/util": {
|
"node_modules/util": {
|
||||||
"version": "0.12.5",
|
"version": "0.12.5",
|
||||||
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.5.1",
|
"version": "1.6.0",
|
||||||
"description": "Backend API for Dutchie Menus scraper and management",
|
"description": "Backend API for Dutchie Menus scraper and management",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
@@ -22,6 +22,7 @@
|
|||||||
"seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
|
"seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@kubernetes/client-node": "^1.4.0",
|
||||||
"@types/bcryptjs": "^3.0.0",
|
"@types/bcryptjs": "^3.0.0",
|
||||||
"axios": "^1.6.2",
|
"axios": "^1.6.2",
|
||||||
"bcrypt": "^5.1.1",
|
"bcrypt": "^5.1.1",
|
||||||
@@ -35,6 +36,7 @@
|
|||||||
"helmet": "^7.1.0",
|
"helmet": "^7.1.0",
|
||||||
"https-proxy-agent": "^7.0.2",
|
"https-proxy-agent": "^7.0.2",
|
||||||
"ioredis": "^5.8.2",
|
"ioredis": "^5.8.2",
|
||||||
|
"ip2location-nodejs": "^9.7.0",
|
||||||
"ipaddr.js": "^2.2.0",
|
"ipaddr.js": "^2.2.0",
|
||||||
"jsonwebtoken": "^9.0.2",
|
"jsonwebtoken": "^9.0.2",
|
||||||
"minio": "^7.1.3",
|
"minio": "^7.1.3",
|
||||||
@@ -47,6 +49,7 @@
|
|||||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||||
"sharp": "^0.32.0",
|
"sharp": "^0.32.0",
|
||||||
"socks-proxy-agent": "^8.0.2",
|
"socks-proxy-agent": "^8.0.2",
|
||||||
|
"user-agents": "^1.1.669",
|
||||||
"uuid": "^9.0.1",
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.22.4"
|
"zod": "^3.22.4"
|
||||||
},
|
},
|
||||||
|
|||||||
BIN
backend/public/downloads/cannaiq-menus-1.5.4.zip
Normal file
BIN
backend/public/downloads/cannaiq-menus-1.5.4.zip
Normal file
Binary file not shown.
BIN
backend/public/downloads/cannaiq-menus-1.6.0.zip
Normal file
BIN
backend/public/downloads/cannaiq-menus-1.6.0.zip
Normal file
Binary file not shown.
1
backend/public/downloads/cannaiq-menus-latest.zip
Symbolic link
1
backend/public/downloads/cannaiq-menus-latest.zip
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
cannaiq-menus-1.6.0.zip
|
||||||
65
backend/scripts/download-ip2location.sh
Executable file
65
backend/scripts/download-ip2location.sh
Executable file
@@ -0,0 +1,65 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Download IP2Location LITE DB3 (City-level) database
|
||||||
|
# Free for commercial use with attribution
|
||||||
|
# https://lite.ip2location.com/database/db3-ip-country-region-city
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
DATA_DIR="${1:-./data/ip2location}"
|
||||||
|
DB_FILE="IP2LOCATION-LITE-DB3.BIN"
|
||||||
|
|
||||||
|
mkdir -p "$DATA_DIR"
|
||||||
|
cd "$DATA_DIR"
|
||||||
|
|
||||||
|
echo "Downloading IP2Location LITE DB3 database..."
|
||||||
|
|
||||||
|
# IP2Location LITE DB3 - includes city, region, country, lat/lng
|
||||||
|
# You need to register at https://lite.ip2location.com/ to get a download token
|
||||||
|
# Then set IP2LOCATION_TOKEN environment variable
|
||||||
|
|
||||||
|
if [ -z "$IP2LOCATION_TOKEN" ]; then
|
||||||
|
echo ""
|
||||||
|
echo "ERROR: IP2LOCATION_TOKEN not set"
|
||||||
|
echo ""
|
||||||
|
echo "To download the database:"
|
||||||
|
echo "1. Register free at https://lite.ip2location.com/"
|
||||||
|
echo "2. Get your download token from the dashboard"
|
||||||
|
echo "3. Run: IP2LOCATION_TOKEN=your_token ./scripts/download-ip2location.sh"
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Download DB3.LITE (IPv4 + City)
|
||||||
|
DOWNLOAD_URL="https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB3LITEBIN"
|
||||||
|
|
||||||
|
echo "Downloading from IP2Location..."
|
||||||
|
curl -L -o ip2location.zip "$DOWNLOAD_URL"
|
||||||
|
|
||||||
|
echo "Extracting..."
|
||||||
|
unzip -o ip2location.zip
|
||||||
|
|
||||||
|
# Rename to standard name
|
||||||
|
if [ -f "IP2LOCATION-LITE-DB3.BIN" ]; then
|
||||||
|
echo "Database ready: $DATA_DIR/IP2LOCATION-LITE-DB3.BIN"
|
||||||
|
elif [ -f "IP-COUNTRY-REGION-CITY.BIN" ]; then
|
||||||
|
mv "IP-COUNTRY-REGION-CITY.BIN" "$DB_FILE"
|
||||||
|
echo "Database ready: $DATA_DIR/$DB_FILE"
|
||||||
|
else
|
||||||
|
# Find whatever BIN file was extracted
|
||||||
|
BIN_FILE=$(ls *.BIN 2>/dev/null | head -1)
|
||||||
|
if [ -n "$BIN_FILE" ]; then
|
||||||
|
mv "$BIN_FILE" "$DB_FILE"
|
||||||
|
echo "Database ready: $DATA_DIR/$DB_FILE"
|
||||||
|
else
|
||||||
|
echo "ERROR: No BIN file found in archive"
|
||||||
|
ls -la
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
rm -f ip2location.zip *.txt LICENSE* README*
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Done! Database saved to: $DATA_DIR/$DB_FILE"
|
||||||
|
echo "Update monthly by re-running this script."
|
||||||
@@ -29,6 +29,12 @@ const TRUSTED_ORIGINS = [
|
|||||||
'http://localhost:5173',
|
'http://localhost:5173',
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// Pattern-based trusted origins (wildcards)
|
||||||
|
const TRUSTED_ORIGIN_PATTERNS = [
|
||||||
|
/^https:\/\/.*\.cannabrands\.app$/, // *.cannabrands.app
|
||||||
|
/^https:\/\/.*\.cannaiq\.co$/, // *.cannaiq.co
|
||||||
|
];
|
||||||
|
|
||||||
// Trusted IPs for internal pod-to-pod communication
|
// Trusted IPs for internal pod-to-pod communication
|
||||||
const TRUSTED_IPS = [
|
const TRUSTED_IPS = [
|
||||||
'127.0.0.1',
|
'127.0.0.1',
|
||||||
@@ -42,9 +48,17 @@ const TRUSTED_IPS = [
|
|||||||
function isTrustedRequest(req: Request): boolean {
|
function isTrustedRequest(req: Request): boolean {
|
||||||
// Check origin header
|
// Check origin header
|
||||||
const origin = req.headers.origin;
|
const origin = req.headers.origin;
|
||||||
if (origin && TRUSTED_ORIGINS.includes(origin)) {
|
if (origin) {
|
||||||
|
if (TRUSTED_ORIGINS.includes(origin)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
// Check pattern-based origins (wildcards like *.cannabrands.app)
|
||||||
|
for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
|
||||||
|
if (pattern.test(origin)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check referer header (for same-origin requests without CORS)
|
// Check referer header (for same-origin requests without CORS)
|
||||||
const referer = req.headers.referer;
|
const referer = req.headers.referer;
|
||||||
@@ -54,6 +68,18 @@ function isTrustedRequest(req: Request): boolean {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Check pattern-based referers
|
||||||
|
try {
|
||||||
|
const refererUrl = new URL(referer);
|
||||||
|
const refererOrigin = refererUrl.origin;
|
||||||
|
for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
|
||||||
|
if (pattern.test(refererOrigin)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Invalid referer URL, skip
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check IP for internal requests (pod-to-pod, localhost)
|
// Check IP for internal requests (pod-to-pod, localhost)
|
||||||
@@ -127,22 +153,10 @@ export async function authenticateUser(email: string, password: string): Promise
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function authMiddleware(req: AuthRequest, res: Response, next: NextFunction) {
|
export async function authMiddleware(req: AuthRequest, res: Response, next: NextFunction) {
|
||||||
// Allow trusted origins/IPs to bypass auth (internal services, same-origin)
|
|
||||||
if (isTrustedRequest(req)) {
|
|
||||||
req.user = {
|
|
||||||
id: 0,
|
|
||||||
email: 'internal@system',
|
|
||||||
role: 'internal'
|
|
||||||
};
|
|
||||||
return next();
|
|
||||||
}
|
|
||||||
|
|
||||||
const authHeader = req.headers.authorization;
|
const authHeader = req.headers.authorization;
|
||||||
|
|
||||||
if (!authHeader || !authHeader.startsWith('Bearer ')) {
|
// If a Bearer token is provided, always try to use it first (logged-in user)
|
||||||
return res.status(401).json({ error: 'No token provided' });
|
if (authHeader && authHeader.startsWith('Bearer ')) {
|
||||||
}
|
|
||||||
|
|
||||||
const token = authHeader.substring(7);
|
const token = authHeader.substring(7);
|
||||||
|
|
||||||
// Try JWT first
|
// Try JWT first
|
||||||
@@ -161,56 +175,44 @@ export async function authMiddleware(req: AuthRequest, res: Response, next: Next
|
|||||||
WHERE token = $1
|
WHERE token = $1
|
||||||
`, [token]);
|
`, [token]);
|
||||||
|
|
||||||
if (result.rows.length === 0) {
|
if (result.rows.length > 0) {
|
||||||
|
const apiToken = result.rows[0];
|
||||||
|
if (!apiToken.active) {
|
||||||
|
return res.status(401).json({ error: 'API token is inactive' });
|
||||||
|
}
|
||||||
|
if (apiToken.expires_at && new Date(apiToken.expires_at) < new Date()) {
|
||||||
|
return res.status(401).json({ error: 'API token has expired' });
|
||||||
|
}
|
||||||
|
req.user = {
|
||||||
|
id: 0,
|
||||||
|
email: `api:${apiToken.name}`,
|
||||||
|
role: 'api_token'
|
||||||
|
};
|
||||||
|
req.apiToken = apiToken;
|
||||||
|
return next();
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error('API token lookup error:', err);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Token provided but invalid
|
||||||
return res.status(401).json({ error: 'Invalid token' });
|
return res.status(401).json({ error: 'Invalid token' });
|
||||||
}
|
}
|
||||||
|
|
||||||
const apiToken = result.rows[0];
|
// No token provided - check trusted origins for API access (WordPress, etc.)
|
||||||
|
if (isTrustedRequest(req)) {
|
||||||
// Check if token is active
|
|
||||||
if (!apiToken.active) {
|
|
||||||
return res.status(401).json({ error: 'Token is disabled' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if token is expired
|
|
||||||
if (apiToken.expires_at && new Date(apiToken.expires_at) < new Date()) {
|
|
||||||
return res.status(401).json({ error: 'Token has expired' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check allowed endpoints
|
|
||||||
if (apiToken.allowed_endpoints && apiToken.allowed_endpoints.length > 0) {
|
|
||||||
const isAllowed = apiToken.allowed_endpoints.some((pattern: string) => {
|
|
||||||
// Simple wildcard matching
|
|
||||||
const regex = new RegExp('^' + pattern.replace('*', '.*') + '$');
|
|
||||||
return regex.test(req.path);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!isAllowed) {
|
|
||||||
return res.status(403).json({ error: 'Endpoint not allowed for this token' });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set API token on request for tracking
|
|
||||||
req.apiToken = {
|
|
||||||
id: apiToken.id,
|
|
||||||
name: apiToken.name,
|
|
||||||
rate_limit: apiToken.rate_limit
|
|
||||||
};
|
|
||||||
|
|
||||||
// Set a generic user for compatibility with existing code
|
|
||||||
req.user = {
|
req.user = {
|
||||||
id: apiToken.id,
|
id: 0,
|
||||||
email: `api-token-${apiToken.id}@system`,
|
email: 'internal@system',
|
||||||
role: 'api'
|
role: 'internal'
|
||||||
};
|
};
|
||||||
|
return next();
|
||||||
|
}
|
||||||
|
|
||||||
next();
|
return res.status(401).json({ error: 'No token provided' });
|
||||||
} catch (error) {
|
|
||||||
console.error('Error verifying API token:', error);
|
|
||||||
return res.status(500).json({ error: 'Authentication failed' });
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Require specific role(s) to access endpoint.
|
* Require specific role(s) to access endpoint.
|
||||||
*
|
*
|
||||||
|
|||||||
141
backend/src/db/auto-migrate.ts
Normal file
141
backend/src/db/auto-migrate.ts
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
/**
|
||||||
|
* Auto-Migration System
|
||||||
|
*
|
||||||
|
* Runs SQL migration files from the migrations/ folder automatically on server startup.
|
||||||
|
* Uses a schema_migrations table to track which migrations have been applied.
|
||||||
|
*
|
||||||
|
* Safe to run multiple times - only applies new migrations.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import fs from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
|
||||||
|
const MIGRATIONS_DIR = path.join(__dirname, '../../migrations');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure schema_migrations table exists
|
||||||
|
*/
|
||||||
|
async function ensureMigrationsTable(pool: Pool): Promise<void> {
|
||||||
|
await pool.query(`
|
||||||
|
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get list of already-applied migrations
|
||||||
|
*/
|
||||||
|
async function getAppliedMigrations(pool: Pool): Promise<Set<string>> {
|
||||||
|
const result = await pool.query('SELECT name FROM schema_migrations');
|
||||||
|
return new Set(result.rows.map(row => row.name));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get list of migration files from disk
|
||||||
|
*/
|
||||||
|
function getMigrationFiles(): string[] {
|
||||||
|
if (!fs.existsSync(MIGRATIONS_DIR)) {
|
||||||
|
console.log('[AutoMigrate] No migrations directory found');
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return fs.readdirSync(MIGRATIONS_DIR)
|
||||||
|
.filter(f => f.endsWith('.sql'))
|
||||||
|
.sort(); // Sort alphabetically (001_, 002_, etc.)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run a single migration file
|
||||||
|
*/
|
||||||
|
async function runMigration(pool: Pool, filename: string): Promise<void> {
|
||||||
|
const filepath = path.join(MIGRATIONS_DIR, filename);
|
||||||
|
const sql = fs.readFileSync(filepath, 'utf8');
|
||||||
|
|
||||||
|
const client = await pool.connect();
|
||||||
|
try {
|
||||||
|
await client.query('BEGIN');
|
||||||
|
|
||||||
|
// Run the migration SQL
|
||||||
|
await client.query(sql);
|
||||||
|
|
||||||
|
// Record that this migration was applied
|
||||||
|
await client.query(
|
||||||
|
'INSERT INTO schema_migrations (name) VALUES ($1) ON CONFLICT (name) DO NOTHING',
|
||||||
|
[filename]
|
||||||
|
);
|
||||||
|
|
||||||
|
await client.query('COMMIT');
|
||||||
|
console.log(`[AutoMigrate] ✓ Applied: ${filename}`);
|
||||||
|
} catch (error: any) {
|
||||||
|
await client.query('ROLLBACK');
|
||||||
|
console.error(`[AutoMigrate] ✗ Failed: ${filename}`);
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run all pending migrations
|
||||||
|
*
|
||||||
|
* @param pool - Database connection pool
|
||||||
|
* @returns Number of migrations applied
|
||||||
|
*/
|
||||||
|
export async function runAutoMigrations(pool: Pool): Promise<number> {
|
||||||
|
console.log('[AutoMigrate] Checking for pending migrations...');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Ensure migrations table exists
|
||||||
|
await ensureMigrationsTable(pool);
|
||||||
|
|
||||||
|
// Get applied and available migrations
|
||||||
|
const applied = await getAppliedMigrations(pool);
|
||||||
|
const available = getMigrationFiles();
|
||||||
|
|
||||||
|
// Find pending migrations
|
||||||
|
const pending = available.filter(f => !applied.has(f));
|
||||||
|
|
||||||
|
if (pending.length === 0) {
|
||||||
|
console.log('[AutoMigrate] No pending migrations');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[AutoMigrate] Found ${pending.length} pending migrations`);
|
||||||
|
|
||||||
|
// Run each pending migration in order
|
||||||
|
for (const filename of pending) {
|
||||||
|
await runMigration(pool, filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[AutoMigrate] Successfully applied ${pending.length} migrations`);
|
||||||
|
return pending.length;
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[AutoMigrate] Migration failed:', error.message);
|
||||||
|
// Don't crash the server - log and continue
|
||||||
|
// The specific failing migration will have been rolled back
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check migration status without running anything
|
||||||
|
*/
|
||||||
|
export async function checkMigrationStatus(pool: Pool): Promise<{
|
||||||
|
applied: string[];
|
||||||
|
pending: string[];
|
||||||
|
}> {
|
||||||
|
await ensureMigrationsTable(pool);
|
||||||
|
|
||||||
|
const applied = await getAppliedMigrations(pool);
|
||||||
|
const available = getMigrationFiles();
|
||||||
|
|
||||||
|
return {
|
||||||
|
applied: available.filter(f => applied.has(f)),
|
||||||
|
pending: available.filter(f => !applied.has(f)),
|
||||||
|
};
|
||||||
|
}
|
||||||
200
backend/src/db/run-migrations.ts
Normal file
200
backend/src/db/run-migrations.ts
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
#!/usr/bin/env npx tsx
|
||||||
|
/**
|
||||||
|
* Database Migration Runner
|
||||||
|
*
|
||||||
|
* Runs SQL migrations from backend/migrations/*.sql in order.
|
||||||
|
* Tracks applied migrations in schema_migrations table.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/db/run-migrations.ts
|
||||||
|
*
|
||||||
|
* Environment:
|
||||||
|
* DATABASE_URL or CANNAIQ_DB_* variables
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import * as fs from 'fs/promises';
|
||||||
|
import * as path from 'path';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
function getConnectionString(): string {
|
||||||
|
if (process.env.DATABASE_URL) {
|
||||||
|
return process.env.DATABASE_URL;
|
||||||
|
}
|
||||||
|
if (process.env.CANNAIQ_DB_URL) {
|
||||||
|
return process.env.CANNAIQ_DB_URL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||||
|
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||||
|
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||||
|
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||||
|
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||||
|
|
||||||
|
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MigrationFile {
|
||||||
|
filename: string;
|
||||||
|
number: number;
|
||||||
|
path: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getMigrationFiles(migrationsDir: string): Promise<MigrationFile[]> {
|
||||||
|
const files = await fs.readdir(migrationsDir);
|
||||||
|
|
||||||
|
const migrations: MigrationFile[] = files
|
||||||
|
.filter(f => f.endsWith('.sql'))
|
||||||
|
.map(filename => {
|
||||||
|
// Extract number from filename like "005_api_tokens.sql" or "073_proxy_timezone.sql"
|
||||||
|
const match = filename.match(/^(\d+)_/);
|
||||||
|
if (!match) return null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
filename,
|
||||||
|
number: parseInt(match[1], 10),
|
||||||
|
path: path.join(migrationsDir, filename),
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter((m): m is MigrationFile => m !== null)
|
||||||
|
.sort((a, b) => a.number - b.number);
|
||||||
|
|
||||||
|
return migrations;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function ensureMigrationsTable(pool: Pool): Promise<void> {
|
||||||
|
// Migrate to filename-based tracking (handles duplicate version numbers)
|
||||||
|
// Check if old version-based PK exists
|
||||||
|
const pkCheck = await pool.query(`
|
||||||
|
SELECT constraint_name FROM information_schema.table_constraints
|
||||||
|
WHERE table_name = 'schema_migrations' AND constraint_type = 'PRIMARY KEY'
|
||||||
|
`);
|
||||||
|
|
||||||
|
if (pkCheck.rows.length === 0) {
|
||||||
|
// Table doesn't exist, create with filename as PK
|
||||||
|
await pool.query(`
|
||||||
|
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||||
|
filename VARCHAR(255) NOT NULL PRIMARY KEY,
|
||||||
|
version VARCHAR(10),
|
||||||
|
name VARCHAR(255),
|
||||||
|
applied_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
} else {
|
||||||
|
// Table exists - add filename column if missing
|
||||||
|
await pool.query(`
|
||||||
|
ALTER TABLE schema_migrations ADD COLUMN IF NOT EXISTS filename VARCHAR(255)
|
||||||
|
`);
|
||||||
|
// Populate filename from version+name for existing rows
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE schema_migrations SET filename = version || '_' || name || '.sql'
|
||||||
|
WHERE filename IS NULL
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getAppliedMigrations(pool: Pool): Promise<Set<string>> {
|
||||||
|
// Try filename first, fall back to version_name combo
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT COALESCE(filename, version || '_' || name || '.sql') as filename
|
||||||
|
FROM schema_migrations
|
||||||
|
`);
|
||||||
|
return new Set(result.rows.map(r => r.filename));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function applyMigration(pool: Pool, migration: MigrationFile): Promise<void> {
|
||||||
|
const sql = await fs.readFile(migration.path, 'utf-8');
|
||||||
|
|
||||||
|
// Extract version and name from filename like "005_api_tokens.sql"
|
||||||
|
const version = String(migration.number).padStart(3, '0');
|
||||||
|
const name = migration.filename.replace(/^\d+_/, '').replace(/\.sql$/, '');
|
||||||
|
|
||||||
|
const client = await pool.connect();
|
||||||
|
try {
|
||||||
|
await client.query('BEGIN');
|
||||||
|
|
||||||
|
// Run the migration SQL
|
||||||
|
await client.query(sql);
|
||||||
|
|
||||||
|
// Record that it was applied - use INSERT with ON CONFLICT for safety
|
||||||
|
await client.query(`
|
||||||
|
INSERT INTO schema_migrations (filename, version, name)
|
||||||
|
VALUES ($1, $2, $3)
|
||||||
|
ON CONFLICT DO NOTHING
|
||||||
|
`, [migration.filename, version, name]);
|
||||||
|
|
||||||
|
await client.query('COMMIT');
|
||||||
|
} catch (error) {
|
||||||
|
await client.query('ROLLBACK');
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const pool = new Pool({ connectionString: getConnectionString() });
|
||||||
|
|
||||||
|
// Migrations directory relative to this file
|
||||||
|
const migrationsDir = path.resolve(__dirname, '../../migrations');
|
||||||
|
|
||||||
|
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||||
|
console.log('║ DATABASE MIGRATION RUNNER ║');
|
||||||
|
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||||
|
console.log(`Migrations dir: ${migrationsDir}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Ensure tracking table exists
|
||||||
|
await ensureMigrationsTable(pool);
|
||||||
|
|
||||||
|
// Get all migration files
|
||||||
|
const allMigrations = await getMigrationFiles(migrationsDir);
|
||||||
|
console.log(`Found ${allMigrations.length} migration files`);
|
||||||
|
|
||||||
|
// Get already-applied migrations
|
||||||
|
const applied = await getAppliedMigrations(pool);
|
||||||
|
console.log(`Already applied: ${applied.size} migrations`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Find pending migrations (compare by filename)
|
||||||
|
const pending = allMigrations.filter(m => !applied.has(m.filename));
|
||||||
|
|
||||||
|
if (pending.length === 0) {
|
||||||
|
console.log('✅ No pending migrations. Database is up to date.');
|
||||||
|
await pool.end();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Pending migrations: ${pending.length}`);
|
||||||
|
console.log('─'.repeat(60));
|
||||||
|
|
||||||
|
// Apply each pending migration
|
||||||
|
for (const migration of pending) {
|
||||||
|
process.stdout.write(` ${migration.filename}... `);
|
||||||
|
try {
|
||||||
|
await applyMigration(pool, migration);
|
||||||
|
console.log('✅');
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log('❌');
|
||||||
|
console.error(`\nError applying ${migration.filename}:`);
|
||||||
|
console.error(error.message);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('');
|
||||||
|
console.log('═'.repeat(60));
|
||||||
|
console.log(`✅ Applied ${pending.length} migrations successfully`);
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Migration runner failed:', error.message);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
@@ -172,6 +172,9 @@ export async function runFullDiscovery(
|
|||||||
console.log(`Errors: ${totalErrors}`);
|
console.log(`Errors: ${totalErrors}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||||
|
let newDispensaryIds: number[] = [];
|
||||||
|
|
||||||
// Step 4: Auto-validate and promote discovered locations
|
// Step 4: Auto-validate and promote discovered locations
|
||||||
if (!dryRun && totalLocationsUpserted > 0) {
|
if (!dryRun && totalLocationsUpserted > 0) {
|
||||||
console.log('\n[Discovery] Step 4: Auto-promoting discovered locations...');
|
console.log('\n[Discovery] Step 4: Auto-promoting discovered locations...');
|
||||||
@@ -180,6 +183,13 @@ export async function runFullDiscovery(
|
|||||||
console.log(` Created: ${promotionResult.created} new dispensaries`);
|
console.log(` Created: ${promotionResult.created} new dispensaries`);
|
||||||
console.log(` Updated: ${promotionResult.updated} existing dispensaries`);
|
console.log(` Updated: ${promotionResult.updated} existing dispensaries`);
|
||||||
console.log(` Rejected: ${promotionResult.rejected} (validation failed)`);
|
console.log(` Rejected: ${promotionResult.rejected} (validation failed)`);
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Capture new IDs for task chaining
|
||||||
|
newDispensaryIds = promotionResult.newDispensaryIds;
|
||||||
|
if (newDispensaryIds.length > 0) {
|
||||||
|
console.log(` New store IDs for crawl: [${newDispensaryIds.join(', ')}]`);
|
||||||
|
}
|
||||||
|
|
||||||
if (promotionResult.rejectedRecords.length > 0) {
|
if (promotionResult.rejectedRecords.length > 0) {
|
||||||
console.log(` Rejection reasons:`);
|
console.log(` Rejection reasons:`);
|
||||||
promotionResult.rejectedRecords.slice(0, 5).forEach(r => {
|
promotionResult.rejectedRecords.slice(0, 5).forEach(r => {
|
||||||
@@ -191,12 +201,132 @@ export async function runFullDiscovery(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Step 5: Detect dropped stores (in DB but not in discovery results)
|
||||||
|
if (!dryRun) {
|
||||||
|
console.log('\n[Discovery] Step 5: Detecting dropped stores...');
|
||||||
|
const droppedResult = await detectDroppedStores(pool, stateCode);
|
||||||
|
if (droppedResult.droppedCount > 0) {
|
||||||
|
console.log(`[Discovery] Found ${droppedResult.droppedCount} dropped stores:`);
|
||||||
|
droppedResult.droppedStores.slice(0, 10).forEach(s => {
|
||||||
|
console.log(` - ${s.name} (${s.city}, ${s.state}) - last seen: ${s.lastSeenAt}`);
|
||||||
|
});
|
||||||
|
if (droppedResult.droppedCount > 10) {
|
||||||
|
console.log(` ... and ${droppedResult.droppedCount - 10} more`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(`[Discovery] No dropped stores detected`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
cities: cityResult,
|
cities: cityResult,
|
||||||
locations: locationResults,
|
locations: locationResults,
|
||||||
totalLocationsFound,
|
totalLocationsFound,
|
||||||
totalLocationsUpserted,
|
totalLocationsUpserted,
|
||||||
durationMs,
|
durationMs,
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
|
||||||
|
newDispensaryIds,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// DROPPED STORE DETECTION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface DroppedStoreResult {
|
||||||
|
droppedCount: number;
|
||||||
|
droppedStores: Array<{
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
city: string;
|
||||||
|
state: string;
|
||||||
|
platformDispensaryId: string;
|
||||||
|
lastSeenAt: string;
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect stores that exist in dispensaries but were not found in discovery.
|
||||||
|
* Marks them as status='dropped' for manual review.
|
||||||
|
*
|
||||||
|
* A store is considered "dropped" if:
|
||||||
|
* 1. It has a platform_dispensary_id (was verified via Dutchie)
|
||||||
|
* 2. It was NOT seen in the latest discovery crawl (last_seen_at in discovery < 24h ago)
|
||||||
|
* 3. It's currently marked as 'open' status
|
||||||
|
*/
|
||||||
|
export async function detectDroppedStores(
|
||||||
|
pool: Pool,
|
||||||
|
stateCode?: string
|
||||||
|
): Promise<DroppedStoreResult> {
|
||||||
|
// Find dispensaries that:
|
||||||
|
// 1. Have platform_dispensary_id (verified Dutchie stores)
|
||||||
|
// 2. Are currently 'open' status
|
||||||
|
// 3. Have a linked discovery record that wasn't seen in the last discovery run
|
||||||
|
// (last_seen_at in dutchie_discovery_locations is older than 24 hours)
|
||||||
|
const params: any[] = [];
|
||||||
|
let stateFilter = '';
|
||||||
|
|
||||||
|
if (stateCode) {
|
||||||
|
stateFilter = ` AND d.state = $1`;
|
||||||
|
params.push(stateCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
const query = `
|
||||||
|
WITH recently_seen AS (
|
||||||
|
SELECT DISTINCT platform_location_id
|
||||||
|
FROM dutchie_discovery_locations
|
||||||
|
WHERE last_seen_at > NOW() - INTERVAL '24 hours'
|
||||||
|
AND active = true
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
d.id,
|
||||||
|
d.name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
d.platform_dispensary_id,
|
||||||
|
d.updated_at as last_seen_at
|
||||||
|
FROM dispensaries d
|
||||||
|
WHERE d.platform_dispensary_id IS NOT NULL
|
||||||
|
AND d.platform = 'dutchie'
|
||||||
|
AND (d.status = 'open' OR d.status IS NULL)
|
||||||
|
AND d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id NOT IN (SELECT platform_location_id FROM recently_seen)
|
||||||
|
${stateFilter}
|
||||||
|
ORDER BY d.name
|
||||||
|
`;
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
const droppedStores = result.rows;
|
||||||
|
|
||||||
|
// Mark these stores as 'dropped' status
|
||||||
|
if (droppedStores.length > 0) {
|
||||||
|
const ids = droppedStores.map(s => s.id);
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET status = 'dropped', updated_at = NOW()
|
||||||
|
WHERE id = ANY($1::int[])
|
||||||
|
`, [ids]);
|
||||||
|
|
||||||
|
// Log to promotion log for audit
|
||||||
|
for (const store of droppedStores) {
|
||||||
|
await pool.query(`
|
||||||
|
INSERT INTO dutchie_promotion_log
|
||||||
|
(dispensary_id, action, state_code, store_name, triggered_by)
|
||||||
|
VALUES ($1, 'dropped', $2, $3, 'discovery_detection')
|
||||||
|
`, [store.id, store.state, store.name]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
droppedCount: droppedStores.length,
|
||||||
|
droppedStores: droppedStores.map(s => ({
|
||||||
|
id: s.id,
|
||||||
|
name: s.name,
|
||||||
|
city: s.city,
|
||||||
|
state: s.state,
|
||||||
|
platformDispensaryId: s.platform_dispensary_id,
|
||||||
|
lastSeenAt: s.last_seen_at,
|
||||||
|
})),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -127,6 +127,8 @@ export interface PromotionSummary {
|
|||||||
errors: string[];
|
errors: string[];
|
||||||
}>;
|
}>;
|
||||||
durationMs: number;
|
durationMs: number;
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||||
|
newDispensaryIds: number[];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -469,6 +471,8 @@ export async function promoteDiscoveredLocations(
|
|||||||
|
|
||||||
const results: PromotionResult[] = [];
|
const results: PromotionResult[] = [];
|
||||||
const rejectedRecords: PromotionSummary['rejectedRecords'] = [];
|
const rejectedRecords: PromotionSummary['rejectedRecords'] = [];
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||||
|
const newDispensaryIds: number[] = [];
|
||||||
let created = 0;
|
let created = 0;
|
||||||
let updated = 0;
|
let updated = 0;
|
||||||
let skipped = 0;
|
let skipped = 0;
|
||||||
@@ -525,6 +529,8 @@ export async function promoteDiscoveredLocations(
|
|||||||
|
|
||||||
if (promotionResult.action === 'created') {
|
if (promotionResult.action === 'created') {
|
||||||
created++;
|
created++;
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Track new IDs for task chaining
|
||||||
|
newDispensaryIds.push(promotionResult.dispensaryId);
|
||||||
} else {
|
} else {
|
||||||
updated++;
|
updated++;
|
||||||
}
|
}
|
||||||
@@ -548,6 +554,8 @@ export async function promoteDiscoveredLocations(
|
|||||||
results,
|
results,
|
||||||
rejectedRecords,
|
rejectedRecords,
|
||||||
durationMs: Date.now() - startTime,
|
durationMs: Date.now() - startTime,
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
|
||||||
|
newDispensaryIds,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -211,6 +211,8 @@ export interface FullDiscoveryResult {
|
|||||||
totalLocationsFound: number;
|
totalLocationsFound: number;
|
||||||
totalLocationsUpserted: number;
|
totalLocationsUpserted: number;
|
||||||
durationMs: number;
|
durationMs: number;
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||||
|
newDispensaryIds?: number[];
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
@@ -16,6 +16,12 @@ import {
|
|||||||
NormalizedBrand,
|
NormalizedBrand,
|
||||||
NormalizationResult,
|
NormalizationResult,
|
||||||
} from './types';
|
} from './types';
|
||||||
|
import {
|
||||||
|
downloadProductImage,
|
||||||
|
ProductImageContext,
|
||||||
|
isImageStorageReady,
|
||||||
|
LocalImageSizes,
|
||||||
|
} from '../utils/image-storage';
|
||||||
|
|
||||||
const BATCH_SIZE = 100;
|
const BATCH_SIZE = 100;
|
||||||
|
|
||||||
@@ -23,10 +29,21 @@ const BATCH_SIZE = 100;
|
|||||||
// PRODUCT UPSERTS
|
// PRODUCT UPSERTS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
|
export interface NewProductInfo {
|
||||||
|
id: number; // store_products.id
|
||||||
|
externalProductId: string; // provider_product_id
|
||||||
|
name: string;
|
||||||
|
brandName: string | null;
|
||||||
|
primaryImageUrl: string | null;
|
||||||
|
hasLocalImage?: boolean; // True if local_image_path is already set
|
||||||
|
}
|
||||||
|
|
||||||
export interface UpsertProductsResult {
|
export interface UpsertProductsResult {
|
||||||
upserted: number;
|
upserted: number;
|
||||||
new: number;
|
new: number;
|
||||||
updated: number;
|
updated: number;
|
||||||
|
newProducts: NewProductInfo[]; // Details of newly created products
|
||||||
|
productsNeedingImages: NewProductInfo[]; // Products (new or updated) that need image downloads
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -41,12 +58,14 @@ export async function upsertStoreProducts(
|
|||||||
options: { dryRun?: boolean } = {}
|
options: { dryRun?: boolean } = {}
|
||||||
): Promise<UpsertProductsResult> {
|
): Promise<UpsertProductsResult> {
|
||||||
if (products.length === 0) {
|
if (products.length === 0) {
|
||||||
return { upserted: 0, new: 0, updated: 0 };
|
return { upserted: 0, new: 0, updated: 0, newProducts: [], productsNeedingImages: [] };
|
||||||
}
|
}
|
||||||
|
|
||||||
const { dryRun = false } = options;
|
const { dryRun = false } = options;
|
||||||
let newCount = 0;
|
let newCount = 0;
|
||||||
let updatedCount = 0;
|
let updatedCount = 0;
|
||||||
|
const newProducts: NewProductInfo[] = [];
|
||||||
|
const productsNeedingImages: NewProductInfo[] = [];
|
||||||
|
|
||||||
// Process in batches
|
// Process in batches
|
||||||
for (let i = 0; i < products.length; i += BATCH_SIZE) {
|
for (let i = 0; i < products.length; i += BATCH_SIZE) {
|
||||||
@@ -71,7 +90,7 @@ export async function upsertStoreProducts(
|
|||||||
name_raw, brand_name_raw, category_raw, subcategory_raw,
|
name_raw, brand_name_raw, category_raw, subcategory_raw,
|
||||||
price_rec, price_med, price_rec_special, price_med_special,
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
is_on_special, discount_percent,
|
is_on_special, discount_percent,
|
||||||
is_in_stock, stock_status,
|
is_in_stock, stock_status, stock_quantity, total_quantity_available,
|
||||||
thc_percent, cbd_percent,
|
thc_percent, cbd_percent,
|
||||||
image_url,
|
image_url,
|
||||||
first_seen_at, last_seen_at, updated_at
|
first_seen_at, last_seen_at, updated_at
|
||||||
@@ -80,9 +99,9 @@ export async function upsertStoreProducts(
|
|||||||
$5, $6, $7, $8,
|
$5, $6, $7, $8,
|
||||||
$9, $10, $11, $12,
|
$9, $10, $11, $12,
|
||||||
$13, $14,
|
$13, $14,
|
||||||
$15, $16,
|
$15, $16, $17, $17,
|
||||||
$17, $18,
|
$18, $19,
|
||||||
$19,
|
$20,
|
||||||
NOW(), NOW(), NOW()
|
NOW(), NOW(), NOW()
|
||||||
)
|
)
|
||||||
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
||||||
@@ -99,12 +118,14 @@ export async function upsertStoreProducts(
|
|||||||
discount_percent = EXCLUDED.discount_percent,
|
discount_percent = EXCLUDED.discount_percent,
|
||||||
is_in_stock = EXCLUDED.is_in_stock,
|
is_in_stock = EXCLUDED.is_in_stock,
|
||||||
stock_status = EXCLUDED.stock_status,
|
stock_status = EXCLUDED.stock_status,
|
||||||
|
stock_quantity = EXCLUDED.stock_quantity,
|
||||||
|
total_quantity_available = EXCLUDED.total_quantity_available,
|
||||||
thc_percent = EXCLUDED.thc_percent,
|
thc_percent = EXCLUDED.thc_percent,
|
||||||
cbd_percent = EXCLUDED.cbd_percent,
|
cbd_percent = EXCLUDED.cbd_percent,
|
||||||
image_url = EXCLUDED.image_url,
|
image_url = EXCLUDED.image_url,
|
||||||
last_seen_at = NOW(),
|
last_seen_at = NOW(),
|
||||||
updated_at = NOW()
|
updated_at = NOW()
|
||||||
RETURNING (xmax = 0) as is_new`,
|
RETURNING id, (xmax = 0) as is_new, (local_image_path IS NOT NULL) as has_local_image`,
|
||||||
[
|
[
|
||||||
product.dispensaryId,
|
product.dispensaryId,
|
||||||
product.platform,
|
product.platform,
|
||||||
@@ -122,6 +143,7 @@ export async function upsertStoreProducts(
|
|||||||
productPricing?.discountPercent,
|
productPricing?.discountPercent,
|
||||||
productAvailability?.inStock ?? true,
|
productAvailability?.inStock ?? true,
|
||||||
productAvailability?.stockStatus || 'unknown',
|
productAvailability?.stockStatus || 'unknown',
|
||||||
|
productAvailability?.quantity ?? null, // stock_quantity and total_quantity_available
|
||||||
// Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
|
// Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
|
||||||
product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
|
product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
|
||||||
product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
|
product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
|
||||||
@@ -129,10 +151,30 @@ export async function upsertStoreProducts(
|
|||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
if (result.rows[0]?.is_new) {
|
const row = result.rows[0];
|
||||||
|
const productInfo: NewProductInfo = {
|
||||||
|
id: row.id,
|
||||||
|
externalProductId: product.externalProductId,
|
||||||
|
name: product.name,
|
||||||
|
brandName: product.brandName,
|
||||||
|
primaryImageUrl: product.primaryImageUrl,
|
||||||
|
hasLocalImage: row.has_local_image,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (row.is_new) {
|
||||||
newCount++;
|
newCount++;
|
||||||
|
// Track new products
|
||||||
|
newProducts.push(productInfo);
|
||||||
|
// New products always need images (if they have a source URL)
|
||||||
|
if (product.primaryImageUrl && !row.has_local_image) {
|
||||||
|
productsNeedingImages.push(productInfo);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
updatedCount++;
|
updatedCount++;
|
||||||
|
// Updated products need images only if they don't have a local image yet
|
||||||
|
if (product.primaryImageUrl && !row.has_local_image) {
|
||||||
|
productsNeedingImages.push(productInfo);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -149,6 +191,8 @@ export async function upsertStoreProducts(
|
|||||||
upserted: newCount + updatedCount,
|
upserted: newCount + updatedCount,
|
||||||
new: newCount,
|
new: newCount,
|
||||||
updated: updatedCount,
|
updated: updatedCount,
|
||||||
|
newProducts,
|
||||||
|
productsNeedingImages,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -564,6 +608,19 @@ export async function upsertBrands(
|
|||||||
// FULL HYDRATION
|
// FULL HYDRATION
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
|
export interface ImageDownloadResult {
|
||||||
|
downloaded: number;
|
||||||
|
skipped: number;
|
||||||
|
failed: number;
|
||||||
|
bytesTotal: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DispensaryContext {
|
||||||
|
stateCode: string;
|
||||||
|
storeSlug: string;
|
||||||
|
hasExistingProducts?: boolean; // True if store already has products with local images
|
||||||
|
}
|
||||||
|
|
||||||
export interface HydratePayloadResult {
|
export interface HydratePayloadResult {
|
||||||
productsUpserted: number;
|
productsUpserted: number;
|
||||||
productsNew: number;
|
productsNew: number;
|
||||||
@@ -574,6 +631,154 @@ export interface HydratePayloadResult {
|
|||||||
variantsUpserted: number;
|
variantsUpserted: number;
|
||||||
variantsNew: number;
|
variantsNew: number;
|
||||||
variantSnapshotsCreated: number;
|
variantSnapshotsCreated: number;
|
||||||
|
imagesDownloaded: number;
|
||||||
|
imagesSkipped: number;
|
||||||
|
imagesFailed: number;
|
||||||
|
imagesBytesTotal: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to create slug from string
|
||||||
|
*/
|
||||||
|
function slugify(str: string): string {
|
||||||
|
return str
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[^a-z0-9]+/g, '-')
|
||||||
|
.replace(/^-+|-+$/g, '')
|
||||||
|
.substring(0, 50) || 'unknown';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Download images for new products and update their local paths
|
||||||
|
*/
|
||||||
|
export async function downloadProductImages(
|
||||||
|
pool: Pool,
|
||||||
|
newProducts: NewProductInfo[],
|
||||||
|
dispensaryContext: DispensaryContext,
|
||||||
|
options: { dryRun?: boolean; concurrency?: number } = {}
|
||||||
|
): Promise<ImageDownloadResult> {
|
||||||
|
const { dryRun = false, concurrency = 5 } = options;
|
||||||
|
|
||||||
|
// Filter products that have images to download
|
||||||
|
const productsWithImages = newProducts.filter(p => p.primaryImageUrl);
|
||||||
|
|
||||||
|
if (productsWithImages.length === 0) {
|
||||||
|
return { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if image storage is ready
|
||||||
|
if (!isImageStorageReady()) {
|
||||||
|
console.warn('[ImageDownload] Image storage not initialized, skipping downloads');
|
||||||
|
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dryRun) {
|
||||||
|
console.log(`[DryRun] Would download ${productsWithImages.length} images`);
|
||||||
|
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
let downloaded = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
let failed = 0;
|
||||||
|
let bytesTotal = 0;
|
||||||
|
|
||||||
|
// Process in batches with concurrency limit
|
||||||
|
for (let i = 0; i < productsWithImages.length; i += concurrency) {
|
||||||
|
const batch = productsWithImages.slice(i, i + concurrency);
|
||||||
|
|
||||||
|
const results = await Promise.allSettled(
|
||||||
|
batch.map(async (product) => {
|
||||||
|
const ctx: ProductImageContext = {
|
||||||
|
stateCode: dispensaryContext.stateCode,
|
||||||
|
storeSlug: dispensaryContext.storeSlug,
|
||||||
|
brandSlug: slugify(product.brandName || 'unknown'),
|
||||||
|
productId: product.externalProductId,
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await downloadProductImage(product.primaryImageUrl!, ctx, { skipIfExists: true });
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
// Update the database with local image path
|
||||||
|
const imagesJson = JSON.stringify({
|
||||||
|
full: result.urls!.full,
|
||||||
|
medium: result.urls!.medium,
|
||||||
|
thumb: result.urls!.thumb,
|
||||||
|
});
|
||||||
|
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE store_products
|
||||||
|
SET local_image_path = $1, images = $2
|
||||||
|
WHERE id = $3`,
|
||||||
|
[result.urls!.full, imagesJson, product.id]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const result of results) {
|
||||||
|
if (result.status === 'fulfilled') {
|
||||||
|
const downloadResult = result.value;
|
||||||
|
if (downloadResult.success) {
|
||||||
|
if (downloadResult.skipped) {
|
||||||
|
skipped++;
|
||||||
|
} else {
|
||||||
|
downloaded++;
|
||||||
|
bytesTotal += downloadResult.bytesDownloaded || 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
failed++;
|
||||||
|
console.warn(`[ImageDownload] Failed: ${downloadResult.error}`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
failed++;
|
||||||
|
console.error(`[ImageDownload] Error:`, result.reason);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[ImageDownload] Downloaded: ${downloaded}, Skipped: ${skipped}, Failed: ${failed}, Bytes: ${bytesTotal}`);
|
||||||
|
return { downloaded, skipped, failed, bytesTotal };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get dispensary context for image paths
|
||||||
|
* Also checks if this dispensary already has products with local images
|
||||||
|
* to skip unnecessary filesystem checks for existing stores
|
||||||
|
*/
|
||||||
|
async function getDispensaryContext(pool: Pool, dispensaryId: number): Promise<DispensaryContext | null> {
|
||||||
|
try {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT
|
||||||
|
d.state,
|
||||||
|
d.slug,
|
||||||
|
d.name,
|
||||||
|
EXISTS(
|
||||||
|
SELECT 1 FROM store_products sp
|
||||||
|
WHERE sp.dispensary_id = d.id
|
||||||
|
AND sp.local_image_path IS NOT NULL
|
||||||
|
LIMIT 1
|
||||||
|
) as has_local_images
|
||||||
|
FROM dispensaries d
|
||||||
|
WHERE d.id = $1`,
|
||||||
|
[dispensaryId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const row = result.rows[0];
|
||||||
|
return {
|
||||||
|
stateCode: row.state || 'unknown',
|
||||||
|
storeSlug: row.slug || slugify(row.name || `store-${dispensaryId}`),
|
||||||
|
hasExistingProducts: row.has_local_images,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[getDispensaryContext] Error:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -584,9 +789,9 @@ export async function hydrateToCanonical(
|
|||||||
dispensaryId: number,
|
dispensaryId: number,
|
||||||
normResult: NormalizationResult,
|
normResult: NormalizationResult,
|
||||||
crawlRunId: number | null,
|
crawlRunId: number | null,
|
||||||
options: { dryRun?: boolean } = {}
|
options: { dryRun?: boolean; downloadImages?: boolean } = {}
|
||||||
): Promise<HydratePayloadResult> {
|
): Promise<HydratePayloadResult> {
|
||||||
const { dryRun = false } = options;
|
const { dryRun = false, downloadImages: shouldDownloadImages = true } = options;
|
||||||
|
|
||||||
// 1. Upsert brands
|
// 1. Upsert brands
|
||||||
const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
|
const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
|
||||||
@@ -634,6 +839,36 @@ export async function hydrateToCanonical(
|
|||||||
{ dryRun }
|
{ dryRun }
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// 6. Download images for products that need them
|
||||||
|
// This includes:
|
||||||
|
// - New products (always need images)
|
||||||
|
// - Updated products that don't have local images yet (backfill)
|
||||||
|
// This avoids:
|
||||||
|
// - Filesystem checks for products that already have local images
|
||||||
|
// - Unnecessary HTTP requests for products with existing images
|
||||||
|
let imageResult: ImageDownloadResult = { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
|
||||||
|
|
||||||
|
if (shouldDownloadImages && productResult.productsNeedingImages.length > 0) {
|
||||||
|
const dispensaryContext = await getDispensaryContext(pool, dispensaryId);
|
||||||
|
|
||||||
|
if (dispensaryContext) {
|
||||||
|
const newCount = productResult.productsNeedingImages.filter(p => !p.hasLocalImage).length;
|
||||||
|
const backfillCount = productResult.productsNeedingImages.length - newCount;
|
||||||
|
console.log(`[Hydration] Downloading images for ${productResult.productsNeedingImages.length} products (${productResult.new} new, ${backfillCount} backfill)...`);
|
||||||
|
imageResult = await downloadProductImages(
|
||||||
|
pool,
|
||||||
|
productResult.productsNeedingImages,
|
||||||
|
dispensaryContext,
|
||||||
|
{ dryRun }
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
console.warn(`[Hydration] Could not get dispensary context for ID ${dispensaryId}, skipping image downloads`);
|
||||||
|
}
|
||||||
|
} else if (productResult.productsNeedingImages.length === 0 && productResult.upserted > 0) {
|
||||||
|
// All products already have local images
|
||||||
|
console.log(`[Hydration] All ${productResult.upserted} products already have local images, skipping downloads`);
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
productsUpserted: productResult.upserted,
|
productsUpserted: productResult.upserted,
|
||||||
productsNew: productResult.new,
|
productsNew: productResult.new,
|
||||||
@@ -644,5 +879,9 @@ export async function hydrateToCanonical(
|
|||||||
variantsUpserted: variantResult.upserted,
|
variantsUpserted: variantResult.upserted,
|
||||||
variantsNew: variantResult.new,
|
variantsNew: variantResult.new,
|
||||||
variantSnapshotsCreated: variantResult.snapshotsCreated,
|
variantSnapshotsCreated: variantResult.snapshotsCreated,
|
||||||
|
imagesDownloaded: imageResult.downloaded,
|
||||||
|
imagesSkipped: imageResult.skipped,
|
||||||
|
imagesFailed: imageResult.failed,
|
||||||
|
imagesBytesTotal: imageResult.bytesTotal,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,12 @@ import { initializeMinio, isMinioEnabled } from './utils/minio';
|
|||||||
import { initializeImageStorage } from './utils/image-storage';
|
import { initializeImageStorage } from './utils/image-storage';
|
||||||
import { logger } from './services/logger';
|
import { logger } from './services/logger';
|
||||||
import { cleanupOrphanedJobs } from './services/proxyTestQueue';
|
import { cleanupOrphanedJobs } from './services/proxyTestQueue';
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Database-driven task scheduler
|
||||||
|
import { taskScheduler } from './services/task-scheduler';
|
||||||
|
import { runAutoMigrations } from './db/auto-migrate';
|
||||||
|
import { getPool } from './db/pool';
|
||||||
import healthRoutes from './routes/health';
|
import healthRoutes from './routes/health';
|
||||||
|
import imageProxyRoutes from './routes/image-proxy';
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
@@ -29,9 +34,44 @@ app.use(express.json());
|
|||||||
const LOCAL_IMAGES_PATH = process.env.LOCAL_IMAGES_PATH || './public/images';
|
const LOCAL_IMAGES_PATH = process.env.LOCAL_IMAGES_PATH || './public/images';
|
||||||
app.use('/images', express.static(LOCAL_IMAGES_PATH));
|
app.use('/images', express.static(LOCAL_IMAGES_PATH));
|
||||||
|
|
||||||
|
// Image proxy with on-demand resizing
|
||||||
|
// Usage: /img/products/az/store/brand/product/image.webp?w=200&h=200
|
||||||
|
app.use('/img', imageProxyRoutes);
|
||||||
|
|
||||||
// Serve static downloads (plugin files, etc.)
|
// Serve static downloads (plugin files, etc.)
|
||||||
// Uses ./public/downloads relative to working directory (works for both Docker and local dev)
|
// Uses ./public/downloads relative to working directory (works for both Docker and local dev)
|
||||||
const LOCAL_DOWNLOADS_PATH = process.env.LOCAL_DOWNLOADS_PATH || './public/downloads';
|
const LOCAL_DOWNLOADS_PATH = process.env.LOCAL_DOWNLOADS_PATH || './public/downloads';
|
||||||
|
|
||||||
|
// Dynamic "latest" redirect for WordPress plugin - finds highest version automatically
|
||||||
|
app.get('/downloads/cannaiq-menus-latest.zip', (req, res) => {
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
try {
|
||||||
|
const files = fs.readdirSync(LOCAL_DOWNLOADS_PATH);
|
||||||
|
const pluginFiles = files
|
||||||
|
.filter((f: string) => f.match(/^cannaiq-menus-\d+\.\d+\.\d+\.zip$/))
|
||||||
|
.sort((a: string, b: string) => {
|
||||||
|
const vA = a.match(/(\d+)\.(\d+)\.(\d+)/);
|
||||||
|
const vB = b.match(/(\d+)\.(\d+)\.(\d+)/);
|
||||||
|
if (!vA || !vB) return 0;
|
||||||
|
for (let i = 1; i <= 3; i++) {
|
||||||
|
const diff = parseInt(vB[i]) - parseInt(vA[i]);
|
||||||
|
if (diff !== 0) return diff;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (pluginFiles.length > 0) {
|
||||||
|
const latestFile = pluginFiles[0];
|
||||||
|
res.redirect(302, `/downloads/${latestFile}`);
|
||||||
|
} else {
|
||||||
|
res.status(404).json({ error: 'No plugin versions found' });
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
res.status(500).json({ error: 'Failed to find latest plugin' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
app.use('/downloads', express.static(LOCAL_DOWNLOADS_PATH));
|
app.use('/downloads', express.static(LOCAL_DOWNLOADS_PATH));
|
||||||
|
|
||||||
// Simple health check for load balancers/K8s probes
|
// Simple health check for load balancers/K8s probes
|
||||||
@@ -71,6 +111,7 @@ import apiPermissionsRoutes from './routes/api-permissions';
|
|||||||
import parallelScrapeRoutes from './routes/parallel-scrape';
|
import parallelScrapeRoutes from './routes/parallel-scrape';
|
||||||
import crawlerSandboxRoutes from './routes/crawler-sandbox';
|
import crawlerSandboxRoutes from './routes/crawler-sandbox';
|
||||||
import versionRoutes from './routes/version';
|
import versionRoutes from './routes/version';
|
||||||
|
import deployStatusRoutes from './routes/deploy-status';
|
||||||
import publicApiRoutes from './routes/public-api';
|
import publicApiRoutes from './routes/public-api';
|
||||||
import usersRoutes from './routes/users';
|
import usersRoutes from './routes/users';
|
||||||
import staleProcessesRoutes from './routes/stale-processes';
|
import staleProcessesRoutes from './routes/stale-processes';
|
||||||
@@ -90,7 +131,6 @@ import { createStatesRouter } from './routes/states';
|
|||||||
import { createAnalyticsV2Router } from './routes/analytics-v2';
|
import { createAnalyticsV2Router } from './routes/analytics-v2';
|
||||||
import { createDiscoveryRoutes } from './discovery';
|
import { createDiscoveryRoutes } from './discovery';
|
||||||
import pipelineRoutes from './routes/pipeline';
|
import pipelineRoutes from './routes/pipeline';
|
||||||
import { getPool } from './db/pool';
|
|
||||||
|
|
||||||
// Consumer API routes (findadispo.com, findagram.co)
|
// Consumer API routes (findadispo.com, findagram.co)
|
||||||
import consumerAuthRoutes from './routes/consumer-auth';
|
import consumerAuthRoutes from './routes/consumer-auth';
|
||||||
@@ -102,6 +142,11 @@ import eventsRoutes from './routes/events';
|
|||||||
import clickAnalyticsRoutes from './routes/click-analytics';
|
import clickAnalyticsRoutes from './routes/click-analytics';
|
||||||
import seoRoutes from './routes/seo';
|
import seoRoutes from './routes/seo';
|
||||||
import priceAnalyticsRoutes from './routes/price-analytics';
|
import priceAnalyticsRoutes from './routes/price-analytics';
|
||||||
|
import tasksRoutes from './routes/tasks';
|
||||||
|
import workerRegistryRoutes from './routes/worker-registry';
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
|
||||||
|
import payloadsRoutes from './routes/payloads';
|
||||||
|
import k8sRoutes from './routes/k8s';
|
||||||
|
|
||||||
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
|
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
|
||||||
// These domains can access the API without authentication
|
// These domains can access the API without authentication
|
||||||
@@ -144,6 +189,8 @@ app.use('/api/api-permissions', apiPermissionsRoutes);
|
|||||||
app.use('/api/parallel-scrape', parallelScrapeRoutes);
|
app.use('/api/parallel-scrape', parallelScrapeRoutes);
|
||||||
app.use('/api/crawler-sandbox', crawlerSandboxRoutes);
|
app.use('/api/crawler-sandbox', crawlerSandboxRoutes);
|
||||||
app.use('/api/version', versionRoutes);
|
app.use('/api/version', versionRoutes);
|
||||||
|
app.use('/api/admin/deploy-status', deployStatusRoutes);
|
||||||
|
console.log('[DeployStatus] Routes registered at /api/admin/deploy-status');
|
||||||
app.use('/api/users', usersRoutes);
|
app.use('/api/users', usersRoutes);
|
||||||
app.use('/api/stale-processes', staleProcessesRoutes);
|
app.use('/api/stale-processes', staleProcessesRoutes);
|
||||||
// Admin routes - orchestrator actions
|
// Admin routes - orchestrator actions
|
||||||
@@ -172,6 +219,22 @@ app.use('/api/monitor', workersRoutes);
|
|||||||
app.use('/api/job-queue', jobQueueRoutes);
|
app.use('/api/job-queue', jobQueueRoutes);
|
||||||
console.log('[Workers] Routes registered at /api/workers, /api/monitor, and /api/job-queue');
|
console.log('[Workers] Routes registered at /api/workers, /api/monitor, and /api/job-queue');
|
||||||
|
|
||||||
|
// Task queue management - worker tasks with capacity planning
|
||||||
|
app.use('/api/tasks', tasksRoutes);
|
||||||
|
console.log('[Tasks] Routes registered at /api/tasks');
|
||||||
|
|
||||||
|
// Worker registry - dynamic worker registration, heartbeats, and name management
|
||||||
|
app.use('/api/worker-registry', workerRegistryRoutes);
|
||||||
|
console.log('[WorkerRegistry] Routes registered at /api/worker-registry');
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
|
||||||
|
app.use('/api/payloads', payloadsRoutes);
|
||||||
|
console.log('[Payloads] Routes registered at /api/payloads');
|
||||||
|
|
||||||
|
// K8s control routes - worker scaling from admin UI
|
||||||
|
app.use('/api/k8s', k8sRoutes);
|
||||||
|
console.log('[K8s] Routes registered at /api/k8s');
|
||||||
|
|
||||||
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
|
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
|
||||||
try {
|
try {
|
||||||
const analyticsV2Router = createAnalyticsV2Router(getPool());
|
const analyticsV2Router = createAnalyticsV2Router(getPool());
|
||||||
@@ -258,6 +321,17 @@ async function startServer() {
|
|||||||
try {
|
try {
|
||||||
logger.info('system', 'Starting server...');
|
logger.info('system', 'Starting server...');
|
||||||
|
|
||||||
|
// Run auto-migrations before anything else
|
||||||
|
const pool = getPool();
|
||||||
|
const migrationsApplied = await runAutoMigrations(pool);
|
||||||
|
if (migrationsApplied > 0) {
|
||||||
|
logger.info('system', `Applied ${migrationsApplied} database migrations`);
|
||||||
|
} else if (migrationsApplied === 0) {
|
||||||
|
logger.info('system', 'Database schema up to date');
|
||||||
|
} else {
|
||||||
|
logger.warn('system', 'Some migrations failed - check logs');
|
||||||
|
}
|
||||||
|
|
||||||
await initializeMinio();
|
await initializeMinio();
|
||||||
await initializeImageStorage();
|
await initializeImageStorage();
|
||||||
logger.info('system', isMinioEnabled() ? 'MinIO storage initialized' : 'Local filesystem storage initialized');
|
logger.info('system', isMinioEnabled() ? 'MinIO storage initialized' : 'Local filesystem storage initialized');
|
||||||
@@ -265,6 +339,17 @@ async function startServer() {
|
|||||||
// Clean up any orphaned proxy test jobs from previous server runs
|
// Clean up any orphaned proxy test jobs from previous server runs
|
||||||
await cleanupOrphanedJobs();
|
await cleanupOrphanedJobs();
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Start database-driven task scheduler
|
||||||
|
// This replaces node-cron - schedules are stored in DB and survive restarts
|
||||||
|
// Uses SELECT FOR UPDATE SKIP LOCKED for multi-replica safety
|
||||||
|
try {
|
||||||
|
await taskScheduler.start();
|
||||||
|
logger.info('system', 'Task scheduler started');
|
||||||
|
} catch (err: any) {
|
||||||
|
// Non-fatal - scheduler can recover on next poll
|
||||||
|
logger.warn('system', `Task scheduler startup warning: ${err.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
app.listen(PORT, () => {
|
app.listen(PORT, () => {
|
||||||
logger.info('system', `Server running on port ${PORT}`);
|
logger.info('system', `Server running on port ${PORT}`);
|
||||||
console.log(`🚀 Server running on port ${PORT}`);
|
console.log(`🚀 Server running on port ${PORT}`);
|
||||||
|
|||||||
@@ -5,8 +5,8 @@ import { Request, Response, NextFunction } from 'express';
|
|||||||
* These are our own frontends that should have unrestricted access.
|
* These are our own frontends that should have unrestricted access.
|
||||||
*/
|
*/
|
||||||
const TRUSTED_DOMAINS = [
|
const TRUSTED_DOMAINS = [
|
||||||
'cannaiq.co',
|
'*.cannaiq.co',
|
||||||
'www.cannaiq.co',
|
'*.cannabrands.app',
|
||||||
'findagram.co',
|
'findagram.co',
|
||||||
'www.findagram.co',
|
'www.findagram.co',
|
||||||
'findadispo.com',
|
'findadispo.com',
|
||||||
@@ -32,6 +32,24 @@ function extractDomain(header: string): string | null {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a domain matches any trusted domain (supports *.domain.com wildcards)
|
||||||
|
*/
|
||||||
|
function isTrustedDomain(domain: string): boolean {
|
||||||
|
for (const trusted of TRUSTED_DOMAINS) {
|
||||||
|
if (trusted.startsWith('*.')) {
|
||||||
|
// Wildcard: *.example.com matches example.com and any subdomain
|
||||||
|
const baseDomain = trusted.slice(2);
|
||||||
|
if (domain === baseDomain || domain.endsWith('.' + baseDomain)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else if (domain === trusted) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if the request comes from a trusted domain
|
* Checks if the request comes from a trusted domain
|
||||||
*/
|
*/
|
||||||
@@ -42,7 +60,7 @@ function isRequestFromTrustedDomain(req: Request): boolean {
|
|||||||
// Check Origin header first (preferred for CORS requests)
|
// Check Origin header first (preferred for CORS requests)
|
||||||
if (origin) {
|
if (origin) {
|
||||||
const domain = extractDomain(origin);
|
const domain = extractDomain(origin);
|
||||||
if (domain && TRUSTED_DOMAINS.includes(domain)) {
|
if (domain && isTrustedDomain(domain)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -50,7 +68,7 @@ function isRequestFromTrustedDomain(req: Request): boolean {
|
|||||||
// Fallback to Referer header
|
// Fallback to Referer header
|
||||||
if (referer) {
|
if (referer) {
|
||||||
const domain = extractDomain(referer);
|
const domain = extractDomain(referer);
|
||||||
if (domain && TRUSTED_DOMAINS.includes(domain)) {
|
if (domain && isTrustedDomain(domain)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -702,12 +702,10 @@ export class StateQueryService {
|
|||||||
async getNationalSummary(): Promise<NationalSummary> {
|
async getNationalSummary(): Promise<NationalSummary> {
|
||||||
const stateMetrics = await this.getAllStateMetrics();
|
const stateMetrics = await this.getAllStateMetrics();
|
||||||
|
|
||||||
|
// Get all states count and aggregate metrics
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
COUNT(DISTINCT s.code) AS total_states,
|
COUNT(DISTINCT s.code) AS total_states,
|
||||||
COUNT(DISTINCT CASE WHEN EXISTS (
|
|
||||||
SELECT 1 FROM dispensaries d WHERE d.state = s.code AND d.menu_type IS NOT NULL
|
|
||||||
) THEN s.code END) AS active_states,
|
|
||||||
(SELECT COUNT(*) FROM dispensaries WHERE state IS NOT NULL) AS total_stores,
|
(SELECT COUNT(*) FROM dispensaries WHERE state IS NOT NULL) AS total_stores,
|
||||||
(SELECT COUNT(*) FROM store_products sp
|
(SELECT COUNT(*) FROM store_products sp
|
||||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
@@ -725,7 +723,7 @@ export class StateQueryService {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
totalStates: parseInt(data.total_states),
|
totalStates: parseInt(data.total_states),
|
||||||
activeStates: parseInt(data.active_states),
|
activeStates: parseInt(data.total_states), // Same as totalStates - all states shown
|
||||||
totalStores: parseInt(data.total_stores),
|
totalStores: parseInt(data.total_stores),
|
||||||
totalProducts: parseInt(data.total_products),
|
totalProducts: parseInt(data.total_products),
|
||||||
totalBrands: parseInt(data.total_brands),
|
totalBrands: parseInt(data.total_brands),
|
||||||
|
|||||||
@@ -5,22 +5,35 @@
|
|||||||
*
|
*
|
||||||
* DO NOT MODIFY THIS FILE WITHOUT EXPLICIT AUTHORIZATION.
|
* DO NOT MODIFY THIS FILE WITHOUT EXPLICIT AUTHORIZATION.
|
||||||
*
|
*
|
||||||
* This is the canonical HTTP client for all Dutchie communication.
|
* Updated: 2025-12-10 per workflow-12102025.md
|
||||||
* All Dutchie workers (Alice, Bella, etc.) MUST use this client.
|
*
|
||||||
|
* KEY BEHAVIORS (per workflow-12102025.md):
|
||||||
|
* 1. startSession() gets identity from PROXY LOCATION, not task params
|
||||||
|
* 2. On 403: immediately get new IP + new fingerprint, then retry
|
||||||
|
* 3. After 3 consecutive 403s on same proxy → disable it (burned)
|
||||||
|
* 4. Language is always English (en-US)
|
||||||
*
|
*
|
||||||
* IMPLEMENTATION:
|
* IMPLEMENTATION:
|
||||||
* - Uses curl via child_process.execSync (bypasses TLS fingerprinting)
|
* - Uses curl via child_process.execSync (bypasses TLS fingerprinting)
|
||||||
* - NO Puppeteer, NO axios, NO fetch
|
* - NO Puppeteer, NO axios, NO fetch
|
||||||
* - Fingerprint rotation on 403
|
* - Uses intoli/user-agents via CrawlRotator for realistic fingerprints
|
||||||
* - Residential IP compatible
|
* - Residential IP compatible
|
||||||
*
|
*
|
||||||
* USAGE:
|
* USAGE:
|
||||||
* import { curlPost, curlGet, executeGraphQL } from '@dutchie/client';
|
* import { curlPost, curlGet, executeGraphQL, startSession } from '@dutchie/client';
|
||||||
*
|
*
|
||||||
* ============================================================
|
* ============================================================
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { execSync } from 'child_process';
|
import { execSync } from 'child_process';
|
||||||
|
import {
|
||||||
|
buildOrderedHeaders,
|
||||||
|
buildRefererFromMenuUrl,
|
||||||
|
getCurlBinary,
|
||||||
|
isCurlImpersonateAvailable,
|
||||||
|
HeaderContext,
|
||||||
|
BrowserType,
|
||||||
|
} from '../../services/http-fingerprint';
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// TYPES
|
// TYPES
|
||||||
@@ -32,6 +45,8 @@ export interface CurlResponse {
|
|||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: fingerprint comes from CrawlRotator's BrowserFingerprint
|
||||||
|
// We keep a simplified interface here for header building
|
||||||
export interface Fingerprint {
|
export interface Fingerprint {
|
||||||
userAgent: string;
|
userAgent: string;
|
||||||
acceptLanguage: string;
|
acceptLanguage: string;
|
||||||
@@ -57,15 +72,13 @@ export const DUTCHIE_CONFIG = {
|
|||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// PROXY SUPPORT
|
// PROXY SUPPORT
|
||||||
// ============================================================
|
// Per workflow-12102025.md:
|
||||||
// Integrates with the CrawlRotator system from proxy-rotator.ts
|
// - On 403: recordBlock() → increment consecutive_403_count
|
||||||
// On 403 errors:
|
// - After 3 consecutive 403s → proxy disabled
|
||||||
// 1. Record failure on current proxy
|
// - Immediately rotate to new IP + new fingerprint on 403
|
||||||
// 2. Rotate to next proxy
|
|
||||||
// 3. Retry with new proxy
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
import type { CrawlRotator, Proxy } from '../../services/crawl-rotator';
|
import type { CrawlRotator, BrowserFingerprint } from '../../services/crawl-rotator';
|
||||||
|
|
||||||
let currentProxy: string | null = null;
|
let currentProxy: string | null = null;
|
||||||
let crawlRotator: CrawlRotator | null = null;
|
let crawlRotator: CrawlRotator | null = null;
|
||||||
@@ -92,13 +105,12 @@ export function getProxy(): string | null {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Set CrawlRotator for proxy rotation on 403s
|
* Set CrawlRotator for proxy rotation on 403s
|
||||||
* This enables automatic proxy rotation when blocked
|
* Per workflow-12102025.md: enables automatic rotation when blocked
|
||||||
*/
|
*/
|
||||||
export function setCrawlRotator(rotator: CrawlRotator | null): void {
|
export function setCrawlRotator(rotator: CrawlRotator | null): void {
|
||||||
crawlRotator = rotator;
|
crawlRotator = rotator;
|
||||||
if (rotator) {
|
if (rotator) {
|
||||||
console.log('[Dutchie Client] CrawlRotator attached - proxy rotation enabled');
|
console.log('[Dutchie Client] CrawlRotator attached - proxy rotation enabled');
|
||||||
// Set initial proxy from rotator
|
|
||||||
const proxy = rotator.proxy.getCurrent();
|
const proxy = rotator.proxy.getCurrent();
|
||||||
if (proxy) {
|
if (proxy) {
|
||||||
currentProxy = rotator.proxy.getProxyUrl(proxy);
|
currentProxy = rotator.proxy.getProxyUrl(proxy);
|
||||||
@@ -115,30 +127,41 @@ export function getCrawlRotator(): CrawlRotator | null {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rotate to next proxy (called on 403)
|
* Handle 403 block - per workflow-12102025.md:
|
||||||
|
* 1. Record block on current proxy (increments consecutive_403_count)
|
||||||
|
* 2. Immediately rotate to new proxy (new IP)
|
||||||
|
* 3. Rotate fingerprint
|
||||||
|
* Returns false if no more proxies available
|
||||||
*/
|
*/
|
||||||
async function rotateProxyOn403(error?: string): Promise<boolean> {
|
async function handle403Block(): Promise<boolean> {
|
||||||
if (!crawlRotator) {
|
if (!crawlRotator) {
|
||||||
|
console.warn('[Dutchie Client] No CrawlRotator - cannot handle 403');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Record failure on current proxy
|
// Per workflow-12102025.md: record block (tracks consecutive 403s)
|
||||||
await crawlRotator.recordFailure(error || '403 Forbidden');
|
const wasDisabled = await crawlRotator.recordBlock();
|
||||||
|
if (wasDisabled) {
|
||||||
|
console.log('[Dutchie Client] Current proxy was disabled (3 consecutive 403s)');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: immediately get new IP + new fingerprint
|
||||||
|
const { proxy: nextProxy, fingerprint } = crawlRotator.rotateBoth();
|
||||||
|
|
||||||
// Rotate to next proxy
|
|
||||||
const nextProxy = crawlRotator.rotateProxy();
|
|
||||||
if (nextProxy) {
|
if (nextProxy) {
|
||||||
currentProxy = crawlRotator.proxy.getProxyUrl(nextProxy);
|
currentProxy = crawlRotator.proxy.getProxyUrl(nextProxy);
|
||||||
console.log(`[Dutchie Client] Rotated proxy: ${currentProxy.replace(/:[^:@]+@/, ':***@')}`);
|
console.log(`[Dutchie Client] Rotated to new proxy: ${currentProxy.replace(/:[^:@]+@/, ':***@')}`);
|
||||||
|
console.log(`[Dutchie Client] New fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.warn('[Dutchie Client] No more proxies available');
|
console.error('[Dutchie Client] No more proxies available!');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Record success on current proxy
|
* Record success on current proxy
|
||||||
|
* Per workflow-12102025.md: resets consecutive_403_count
|
||||||
*/
|
*/
|
||||||
async function recordProxySuccess(responseTimeMs?: number): Promise<void> {
|
async function recordProxySuccess(responseTimeMs?: number): Promise<void> {
|
||||||
if (crawlRotator) {
|
if (crawlRotator) {
|
||||||
@@ -163,69 +186,89 @@ export const GRAPHQL_HASHES = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// FINGERPRINTS - Browser profiles for anti-detect
|
// SESSION MANAGEMENT
|
||||||
|
// Per workflow-12102025.md:
|
||||||
|
// - Session identity comes from PROXY LOCATION
|
||||||
|
// - NOT from task params (no stateCode/timezone params)
|
||||||
|
// - Language is always English
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
const FINGERPRINTS: Fingerprint[] = [
|
export interface CrawlSession {
|
||||||
// Chrome Windows (latest) - typical residential user, use first
|
sessionId: string;
|
||||||
{
|
fingerprint: BrowserFingerprint;
|
||||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
proxyUrl: string | null;
|
||||||
acceptLanguage: 'en-US,en;q=0.9',
|
proxyTimezone?: string;
|
||||||
secChUa: '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
proxyState?: string;
|
||||||
secChUaPlatform: '"Windows"',
|
startedAt: Date;
|
||||||
secChUaMobile: '?0',
|
// Per workflow-12102025.md: Dynamic Referer per dispensary
|
||||||
},
|
menuUrl?: string;
|
||||||
// Chrome Mac (latest)
|
referer: string;
|
||||||
{
|
|
||||||
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
||||||
acceptLanguage: 'en-US,en;q=0.9',
|
|
||||||
secChUa: '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
|
||||||
secChUaPlatform: '"macOS"',
|
|
||||||
secChUaMobile: '?0',
|
|
||||||
},
|
|
||||||
// Chrome Windows (120)
|
|
||||||
{
|
|
||||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
||||||
acceptLanguage: 'en-US,en;q=0.9',
|
|
||||||
secChUa: '"Chromium";v="120", "Google Chrome";v="120", "Not-A.Brand";v="99"',
|
|
||||||
secChUaPlatform: '"Windows"',
|
|
||||||
secChUaMobile: '?0',
|
|
||||||
},
|
|
||||||
// Firefox Windows
|
|
||||||
{
|
|
||||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
|
|
||||||
acceptLanguage: 'en-US,en;q=0.5',
|
|
||||||
},
|
|
||||||
// Safari Mac
|
|
||||||
{
|
|
||||||
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
|
|
||||||
acceptLanguage: 'en-US,en;q=0.9',
|
|
||||||
},
|
|
||||||
// Edge Windows
|
|
||||||
{
|
|
||||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
|
|
||||||
acceptLanguage: 'en-US,en;q=0.9',
|
|
||||||
secChUa: '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
|
||||||
secChUaPlatform: '"Windows"',
|
|
||||||
secChUaMobile: '?0',
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
let currentFingerprintIndex = 0;
|
|
||||||
|
|
||||||
export function getFingerprint(): Fingerprint {
|
|
||||||
return FINGERPRINTS[currentFingerprintIndex];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function rotateFingerprint(): Fingerprint {
|
let currentSession: CrawlSession | null = null;
|
||||||
currentFingerprintIndex = (currentFingerprintIndex + 1) % FINGERPRINTS.length;
|
|
||||||
const fp = FINGERPRINTS[currentFingerprintIndex];
|
/**
|
||||||
console.log(`[Dutchie Client] Rotated to fingerprint: ${fp.userAgent.slice(0, 50)}...`);
|
* Start a new crawl session
|
||||||
return fp;
|
*
|
||||||
|
* Per workflow-12102025.md:
|
||||||
|
* - NO state/timezone params - identity comes from proxy location
|
||||||
|
* - Gets fingerprint from CrawlRotator (uses intoli/user-agents)
|
||||||
|
* - Language is always English (en-US)
|
||||||
|
* - Dynamic Referer per dispensary (from menuUrl)
|
||||||
|
*
|
||||||
|
* @param menuUrl - The dispensary's menu URL for dynamic Referer header
|
||||||
|
*/
|
||||||
|
export function startSession(menuUrl?: string): CrawlSession {
|
||||||
|
if (!crawlRotator) {
|
||||||
|
throw new Error('[Dutchie Client] Cannot start session without CrawlRotator');
|
||||||
}
|
}
|
||||||
|
|
||||||
export function resetFingerprint(): void {
|
// Per workflow-12102025.md: get identity from proxy location
|
||||||
currentFingerprintIndex = 0;
|
const proxyLocation = crawlRotator.getProxyLocation();
|
||||||
|
const fingerprint = crawlRotator.userAgent.getCurrent();
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Dynamic Referer per dispensary
|
||||||
|
const referer = buildRefererFromMenuUrl(menuUrl);
|
||||||
|
|
||||||
|
currentSession = {
|
||||||
|
sessionId: `session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
||||||
|
fingerprint,
|
||||||
|
proxyUrl: currentProxy,
|
||||||
|
proxyTimezone: proxyLocation?.timezone,
|
||||||
|
proxyState: proxyLocation?.state,
|
||||||
|
startedAt: new Date(),
|
||||||
|
menuUrl,
|
||||||
|
referer,
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`[Dutchie Client] Started session ${currentSession.sessionId}`);
|
||||||
|
console.log(`[Dutchie Client] Browser: ${fingerprint.browserName} (${fingerprint.deviceCategory})`);
|
||||||
|
console.log(`[Dutchie Client] DNT: ${fingerprint.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`);
|
||||||
|
console.log(`[Dutchie Client] TLS: ${fingerprint.httpFingerprint.curlImpersonateBinary}`);
|
||||||
|
console.log(`[Dutchie Client] Referer: ${referer}`);
|
||||||
|
if (proxyLocation?.timezone) {
|
||||||
|
console.log(`[Dutchie Client] Proxy: ${proxyLocation.state || 'unknown'} (${proxyLocation.timezone})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return currentSession;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* End the current crawl session
|
||||||
|
*/
|
||||||
|
export function endSession(): void {
|
||||||
|
if (currentSession) {
|
||||||
|
const duration = Math.round((Date.now() - currentSession.startedAt.getTime()) / 1000);
|
||||||
|
console.log(`[Dutchie Client] Ended session ${currentSession.sessionId} (${duration}s)`);
|
||||||
|
currentSession = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current active session
|
||||||
|
*/
|
||||||
|
export function getCurrentSession(): CrawlSession | null {
|
||||||
|
return currentSession;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -233,48 +276,80 @@ export function resetFingerprint(): void {
|
|||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build headers for Dutchie requests
|
* Per workflow-12102025.md: Build headers using HTTP fingerprint system
|
||||||
|
* Returns headers in browser-specific order with all natural variations
|
||||||
*/
|
*/
|
||||||
export function buildHeaders(refererPath: string, fingerprint?: Fingerprint): Record<string, string> {
|
export function buildHeaders(isPost: boolean, contentLength?: number): { headers: Record<string, string>; orderedHeaders: string[] } {
|
||||||
const fp = fingerprint || getFingerprint();
|
if (!currentSession || !crawlRotator) {
|
||||||
const refererUrl = `https://dutchie.com${refererPath}`;
|
throw new Error('[Dutchie Client] Cannot build headers without active session');
|
||||||
|
|
||||||
const headers: Record<string, string> = {
|
|
||||||
'accept': 'application/json, text/plain, */*',
|
|
||||||
'accept-language': fp.acceptLanguage,
|
|
||||||
'content-type': 'application/json',
|
|
||||||
'origin': 'https://dutchie.com',
|
|
||||||
'referer': refererUrl,
|
|
||||||
'user-agent': fp.userAgent,
|
|
||||||
'apollographql-client-name': 'Marketplace (production)',
|
|
||||||
};
|
|
||||||
|
|
||||||
if (fp.secChUa) {
|
|
||||||
headers['sec-ch-ua'] = fp.secChUa;
|
|
||||||
headers['sec-ch-ua-mobile'] = fp.secChUaMobile || '?0';
|
|
||||||
headers['sec-ch-ua-platform'] = fp.secChUaPlatform || '"Windows"';
|
|
||||||
headers['sec-fetch-dest'] = 'empty';
|
|
||||||
headers['sec-fetch-mode'] = 'cors';
|
|
||||||
headers['sec-fetch-site'] = 'same-site';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return headers;
|
const fp = currentSession.fingerprint;
|
||||||
|
const httpFp = fp.httpFingerprint;
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Build context for ordered headers
|
||||||
|
const context: HeaderContext = {
|
||||||
|
userAgent: fp.userAgent,
|
||||||
|
secChUa: fp.secChUa,
|
||||||
|
secChUaPlatform: fp.secChUaPlatform,
|
||||||
|
secChUaMobile: fp.secChUaMobile,
|
||||||
|
referer: currentSession.referer,
|
||||||
|
isPost,
|
||||||
|
contentLength,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Get ordered headers from HTTP fingerprint service
|
||||||
|
return buildOrderedHeaders(httpFp, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute HTTP POST using curl (bypasses TLS fingerprinting)
|
* Per workflow-12102025.md: Get curl binary for current session's browser
|
||||||
|
* Uses curl-impersonate for TLS fingerprint matching
|
||||||
*/
|
*/
|
||||||
export function curlPost(url: string, body: any, headers: Record<string, string>, timeout = 30000): CurlResponse {
|
function getCurlBinaryForSession(): string {
|
||||||
const filteredHeaders = Object.entries(headers)
|
if (!currentSession) {
|
||||||
.filter(([k]) => k.toLowerCase() !== 'accept-encoding')
|
return 'curl'; // Fallback to standard curl
|
||||||
.map(([k, v]) => `-H '${k}: ${v}'`)
|
}
|
||||||
|
|
||||||
|
const browserType = currentSession.fingerprint.browserName as BrowserType;
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Check if curl-impersonate is available
|
||||||
|
if (isCurlImpersonateAvailable(browserType)) {
|
||||||
|
return getCurlBinary(browserType);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to standard curl with warning
|
||||||
|
console.warn(`[Dutchie Client] curl-impersonate not available for ${browserType}, using standard curl`);
|
||||||
|
return 'curl';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Execute HTTP POST using curl/curl-impersonate
|
||||||
|
* - Uses browser-specific TLS fingerprint via curl-impersonate
|
||||||
|
* - Headers sent in browser-specific order
|
||||||
|
* - Dynamic Referer per dispensary
|
||||||
|
*/
|
||||||
|
export function curlPost(url: string, body: any, timeout = 30000): CurlResponse {
|
||||||
|
const bodyJson = JSON.stringify(body);
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Build ordered headers for POST request
|
||||||
|
const { headers, orderedHeaders } = buildHeaders(true, bodyJson.length);
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Build header args in browser-specific order
|
||||||
|
const headerArgs = orderedHeaders
|
||||||
|
.filter(h => h !== 'Host' && h !== 'Content-Length') // curl handles these
|
||||||
|
.map(h => `-H '${h}: ${headers[h]}'`)
|
||||||
.join(' ');
|
.join(' ');
|
||||||
|
|
||||||
const bodyJson = JSON.stringify(body).replace(/'/g, "'\\''");
|
const bodyEscaped = bodyJson.replace(/'/g, "'\\''");
|
||||||
const timeoutSec = Math.ceil(timeout / 1000);
|
const timeoutSec = Math.ceil(timeout / 1000);
|
||||||
const separator = '___HTTP_STATUS___';
|
const separator = '___HTTP_STATUS___';
|
||||||
const proxyArg = getProxyArg();
|
const proxyArg = getProxyArg();
|
||||||
const cmd = `curl -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${filteredHeaders} -d '${bodyJson}' '${url}'`;
|
|
||||||
|
// Per workflow-12102025.md: Use curl-impersonate for TLS fingerprint matching
|
||||||
|
const curlBinary = getCurlBinaryForSession();
|
||||||
|
|
||||||
|
const cmd = `${curlBinary} -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${headerArgs} -d '${bodyEscaped}' '${url}'`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const output = execSync(cmd, {
|
const output = execSync(cmd, {
|
||||||
@@ -313,19 +388,29 @@ export function curlPost(url: string, body: any, headers: Record<string, string>
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute HTTP GET using curl (bypasses TLS fingerprinting)
|
* Per workflow-12102025.md: Execute HTTP GET using curl/curl-impersonate
|
||||||
* Returns HTML or JSON depending on response content-type
|
* - Uses browser-specific TLS fingerprint via curl-impersonate
|
||||||
|
* - Headers sent in browser-specific order
|
||||||
|
* - Dynamic Referer per dispensary
|
||||||
*/
|
*/
|
||||||
export function curlGet(url: string, headers: Record<string, string>, timeout = 30000): CurlResponse {
|
export function curlGet(url: string, timeout = 30000): CurlResponse {
|
||||||
const filteredHeaders = Object.entries(headers)
|
// Per workflow-12102025.md: Build ordered headers for GET request
|
||||||
.filter(([k]) => k.toLowerCase() !== 'accept-encoding')
|
const { headers, orderedHeaders } = buildHeaders(false);
|
||||||
.map(([k, v]) => `-H '${k}: ${v}'`)
|
|
||||||
|
// Per workflow-12102025.md: Build header args in browser-specific order
|
||||||
|
const headerArgs = orderedHeaders
|
||||||
|
.filter(h => h !== 'Host' && h !== 'Content-Length') // curl handles these
|
||||||
|
.map(h => `-H '${h}: ${headers[h]}'`)
|
||||||
.join(' ');
|
.join(' ');
|
||||||
|
|
||||||
const timeoutSec = Math.ceil(timeout / 1000);
|
const timeoutSec = Math.ceil(timeout / 1000);
|
||||||
const separator = '___HTTP_STATUS___';
|
const separator = '___HTTP_STATUS___';
|
||||||
const proxyArg = getProxyArg();
|
const proxyArg = getProxyArg();
|
||||||
const cmd = `curl -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${filteredHeaders} '${url}'`;
|
|
||||||
|
// Per workflow-12102025.md: Use curl-impersonate for TLS fingerprint matching
|
||||||
|
const curlBinary = getCurlBinaryForSession();
|
||||||
|
|
||||||
|
const cmd = `${curlBinary} -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${headerArgs} '${url}'`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const output = execSync(cmd, {
|
const output = execSync(cmd, {
|
||||||
@@ -345,7 +430,6 @@ export function curlGet(url: string, headers: Record<string, string>, timeout =
|
|||||||
const responseBody = output.slice(0, separatorIndex);
|
const responseBody = output.slice(0, separatorIndex);
|
||||||
const statusCode = parseInt(output.slice(separatorIndex + separator.length).trim(), 10);
|
const statusCode = parseInt(output.slice(separatorIndex + separator.length).trim(), 10);
|
||||||
|
|
||||||
// Try to parse as JSON, otherwise return as string (HTML)
|
|
||||||
try {
|
try {
|
||||||
return { status: statusCode, data: JSON.parse(responseBody) };
|
return { status: statusCode, data: JSON.parse(responseBody) };
|
||||||
} catch {
|
} catch {
|
||||||
@@ -362,16 +446,22 @@ export function curlGet(url: string, headers: Record<string, string>, timeout =
|
|||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// GRAPHQL EXECUTION
|
// GRAPHQL EXECUTION
|
||||||
|
// Per workflow-12102025.md:
|
||||||
|
// - On 403: immediately rotate IP + fingerprint (no delay first)
|
||||||
|
// - Then retry
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
export interface ExecuteGraphQLOptions {
|
export interface ExecuteGraphQLOptions {
|
||||||
maxRetries?: number;
|
maxRetries?: number;
|
||||||
retryOn403?: boolean;
|
retryOn403?: boolean;
|
||||||
cName?: string; // Optional - used for Referer header, defaults to 'cities'
|
cName?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute GraphQL query with curl (bypasses TLS fingerprinting)
|
* Per workflow-12102025.md: Execute GraphQL query with curl/curl-impersonate
|
||||||
|
* - Uses browser-specific TLS fingerprint
|
||||||
|
* - Headers in browser-specific order
|
||||||
|
* - On 403: immediately rotate IP + fingerprint, then retry
|
||||||
*/
|
*/
|
||||||
export async function executeGraphQL(
|
export async function executeGraphQL(
|
||||||
operationName: string,
|
operationName: string,
|
||||||
@@ -379,7 +469,12 @@ export async function executeGraphQL(
|
|||||||
hash: string,
|
hash: string,
|
||||||
options: ExecuteGraphQLOptions
|
options: ExecuteGraphQLOptions
|
||||||
): Promise<any> {
|
): Promise<any> {
|
||||||
const { maxRetries = 3, retryOn403 = true, cName = 'cities' } = options;
|
const { maxRetries = 3, retryOn403 = true } = options;
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Session must be active for requests
|
||||||
|
if (!currentSession) {
|
||||||
|
throw new Error('[Dutchie Client] Cannot execute GraphQL without active session - call startSession() first');
|
||||||
|
}
|
||||||
|
|
||||||
const body = {
|
const body = {
|
||||||
operationName,
|
operationName,
|
||||||
@@ -393,14 +488,14 @@ export async function executeGraphQL(
|
|||||||
let attempt = 0;
|
let attempt = 0;
|
||||||
|
|
||||||
while (attempt <= maxRetries) {
|
while (attempt <= maxRetries) {
|
||||||
const fingerprint = getFingerprint();
|
|
||||||
const headers = buildHeaders(`/embedded-menu/${cName}`, fingerprint);
|
|
||||||
|
|
||||||
console.log(`[Dutchie Client] curl POST ${operationName} (attempt ${attempt + 1}/${maxRetries + 1})`);
|
console.log(`[Dutchie Client] curl POST ${operationName} (attempt ${attempt + 1}/${maxRetries + 1})`);
|
||||||
|
|
||||||
const response = curlPost(DUTCHIE_CONFIG.graphqlEndpoint, body, headers, DUTCHIE_CONFIG.timeout);
|
const startTime = Date.now();
|
||||||
|
// Per workflow-12102025.md: curlPost now uses ordered headers and curl-impersonate
|
||||||
|
const response = curlPost(DUTCHIE_CONFIG.graphqlEndpoint, body, DUTCHIE_CONFIG.timeout);
|
||||||
|
const responseTime = Date.now() - startTime;
|
||||||
|
|
||||||
console.log(`[Dutchie Client] Response status: ${response.status}`);
|
console.log(`[Dutchie Client] Response status: ${response.status} (${responseTime}ms)`);
|
||||||
|
|
||||||
if (response.error) {
|
if (response.error) {
|
||||||
console.error(`[Dutchie Client] curl error: ${response.error}`);
|
console.error(`[Dutchie Client] curl error: ${response.error}`);
|
||||||
@@ -413,6 +508,9 @@ export async function executeGraphQL(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
|
// Per workflow-12102025.md: success resets consecutive 403 count
|
||||||
|
await recordProxySuccess(responseTime);
|
||||||
|
|
||||||
if (response.data?.errors?.length > 0) {
|
if (response.data?.errors?.length > 0) {
|
||||||
console.warn(`[Dutchie Client] GraphQL errors: ${JSON.stringify(response.data.errors[0])}`);
|
console.warn(`[Dutchie Client] GraphQL errors: ${JSON.stringify(response.data.errors[0])}`);
|
||||||
}
|
}
|
||||||
@@ -420,10 +518,20 @@ export async function executeGraphQL(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (response.status === 403 && retryOn403) {
|
if (response.status === 403 && retryOn403) {
|
||||||
console.warn(`[Dutchie Client] 403 blocked - rotating fingerprint...`);
|
// Per workflow-12102025.md: immediately rotate IP + fingerprint
|
||||||
rotateFingerprint();
|
console.warn(`[Dutchie Client] 403 blocked - immediately rotating proxy + fingerprint...`);
|
||||||
|
const hasMoreProxies = await handle403Block();
|
||||||
|
|
||||||
|
if (!hasMoreProxies) {
|
||||||
|
throw new Error('All proxies exhausted - no more IPs available');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Update session referer after rotation
|
||||||
|
currentSession.referer = buildRefererFromMenuUrl(currentSession.menuUrl);
|
||||||
|
|
||||||
attempt++;
|
attempt++;
|
||||||
await sleep(1000 * attempt);
|
// Per workflow-12102025.md: small backoff after rotation
|
||||||
|
await sleep(500);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -452,8 +560,10 @@ export interface FetchPageOptions {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetch HTML page from Dutchie (for city pages, dispensary pages, etc.)
|
* Per workflow-12102025.md: Fetch HTML page from Dutchie
|
||||||
* Returns raw HTML string
|
* - Uses browser-specific TLS fingerprint
|
||||||
|
* - Headers in browser-specific order
|
||||||
|
* - Same 403 handling as GraphQL
|
||||||
*/
|
*/
|
||||||
export async function fetchPage(
|
export async function fetchPage(
|
||||||
path: string,
|
path: string,
|
||||||
@@ -462,32 +572,22 @@ export async function fetchPage(
|
|||||||
const { maxRetries = 3, retryOn403 = true } = options;
|
const { maxRetries = 3, retryOn403 = true } = options;
|
||||||
const url = `${DUTCHIE_CONFIG.baseUrl}${path}`;
|
const url = `${DUTCHIE_CONFIG.baseUrl}${path}`;
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Session must be active for requests
|
||||||
|
if (!currentSession) {
|
||||||
|
throw new Error('[Dutchie Client] Cannot fetch page without active session - call startSession() first');
|
||||||
|
}
|
||||||
|
|
||||||
let attempt = 0;
|
let attempt = 0;
|
||||||
|
|
||||||
while (attempt <= maxRetries) {
|
while (attempt <= maxRetries) {
|
||||||
const fingerprint = getFingerprint();
|
// Per workflow-12102025.md: curlGet now uses ordered headers and curl-impersonate
|
||||||
const headers: Record<string, string> = {
|
|
||||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
|
||||||
'accept-language': fingerprint.acceptLanguage,
|
|
||||||
'user-agent': fingerprint.userAgent,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (fingerprint.secChUa) {
|
|
||||||
headers['sec-ch-ua'] = fingerprint.secChUa;
|
|
||||||
headers['sec-ch-ua-mobile'] = fingerprint.secChUaMobile || '?0';
|
|
||||||
headers['sec-ch-ua-platform'] = fingerprint.secChUaPlatform || '"Windows"';
|
|
||||||
headers['sec-fetch-dest'] = 'document';
|
|
||||||
headers['sec-fetch-mode'] = 'navigate';
|
|
||||||
headers['sec-fetch-site'] = 'none';
|
|
||||||
headers['sec-fetch-user'] = '?1';
|
|
||||||
headers['upgrade-insecure-requests'] = '1';
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`[Dutchie Client] curl GET ${path} (attempt ${attempt + 1}/${maxRetries + 1})`);
|
console.log(`[Dutchie Client] curl GET ${path} (attempt ${attempt + 1}/${maxRetries + 1})`);
|
||||||
|
|
||||||
const response = curlGet(url, headers, DUTCHIE_CONFIG.timeout);
|
const startTime = Date.now();
|
||||||
|
const response = curlGet(url, DUTCHIE_CONFIG.timeout);
|
||||||
|
const responseTime = Date.now() - startTime;
|
||||||
|
|
||||||
console.log(`[Dutchie Client] Response status: ${response.status}`);
|
console.log(`[Dutchie Client] Response status: ${response.status} (${responseTime}ms)`);
|
||||||
|
|
||||||
if (response.error) {
|
if (response.error) {
|
||||||
console.error(`[Dutchie Client] curl error: ${response.error}`);
|
console.error(`[Dutchie Client] curl error: ${response.error}`);
|
||||||
@@ -499,14 +599,26 @@ export async function fetchPage(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
|
// Per workflow-12102025.md: success resets consecutive 403 count
|
||||||
|
await recordProxySuccess(responseTime);
|
||||||
return { html: response.data, status: response.status };
|
return { html: response.data, status: response.status };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (response.status === 403 && retryOn403) {
|
if (response.status === 403 && retryOn403) {
|
||||||
console.warn(`[Dutchie Client] 403 blocked - rotating fingerprint...`);
|
// Per workflow-12102025.md: immediately rotate IP + fingerprint
|
||||||
rotateFingerprint();
|
console.warn(`[Dutchie Client] 403 blocked - immediately rotating proxy + fingerprint...`);
|
||||||
|
const hasMoreProxies = await handle403Block();
|
||||||
|
|
||||||
|
if (!hasMoreProxies) {
|
||||||
|
throw new Error('All proxies exhausted - no more IPs available');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Update session after rotation
|
||||||
|
currentSession.referer = buildRefererFromMenuUrl(currentSession.menuUrl);
|
||||||
|
|
||||||
attempt++;
|
attempt++;
|
||||||
await sleep(1000 * attempt);
|
// Per workflow-12102025.md: small backoff after rotation
|
||||||
|
await sleep(500);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,18 +6,20 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
export {
|
export {
|
||||||
// HTTP Client
|
// HTTP Client (per workflow-12102025.md: uses curl-impersonate + ordered headers)
|
||||||
curlPost,
|
curlPost,
|
||||||
curlGet,
|
curlGet,
|
||||||
executeGraphQL,
|
executeGraphQL,
|
||||||
fetchPage,
|
fetchPage,
|
||||||
extractNextData,
|
extractNextData,
|
||||||
|
|
||||||
// Headers & Fingerprints
|
// Headers (per workflow-12102025.md: browser-specific ordering)
|
||||||
buildHeaders,
|
buildHeaders,
|
||||||
getFingerprint,
|
|
||||||
rotateFingerprint,
|
// Session Management (per workflow-12102025.md: menuUrl for dynamic Referer)
|
||||||
resetFingerprint,
|
startSession,
|
||||||
|
endSession,
|
||||||
|
getCurrentSession,
|
||||||
|
|
||||||
// Proxy
|
// Proxy
|
||||||
setProxy,
|
setProxy,
|
||||||
@@ -32,6 +34,7 @@ export {
|
|||||||
// Types
|
// Types
|
||||||
type CurlResponse,
|
type CurlResponse,
|
||||||
type Fingerprint,
|
type Fingerprint,
|
||||||
|
type CrawlSession,
|
||||||
type ExecuteGraphQLOptions,
|
type ExecuteGraphQLOptions,
|
||||||
type FetchPageOptions,
|
type FetchPageOptions,
|
||||||
} from './client';
|
} from './client';
|
||||||
|
|||||||
@@ -7,15 +7,23 @@
|
|||||||
* Routes are prefixed with /api/analytics/v2
|
* Routes are prefixed with /api/analytics/v2
|
||||||
*
|
*
|
||||||
* Phase 3: Analytics Engine + Rec/Med by State
|
* Phase 3: Analytics Engine + Rec/Med by State
|
||||||
|
*
|
||||||
|
* SECURITY: All routes require authentication via authMiddleware.
|
||||||
|
* Access is granted to:
|
||||||
|
* - Trusted origins (cannaiq.co, findadispo.com, etc.)
|
||||||
|
* - Trusted IPs (localhost, internal pods)
|
||||||
|
* - Valid JWT or API tokens
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { Pool } from 'pg';
|
import { Pool } from 'pg';
|
||||||
|
import { authMiddleware } from '../auth/middleware';
|
||||||
import { PriceAnalyticsService } from '../services/analytics/PriceAnalyticsService';
|
import { PriceAnalyticsService } from '../services/analytics/PriceAnalyticsService';
|
||||||
import { BrandPenetrationService } from '../services/analytics/BrandPenetrationService';
|
import { BrandPenetrationService } from '../services/analytics/BrandPenetrationService';
|
||||||
import { CategoryAnalyticsService } from '../services/analytics/CategoryAnalyticsService';
|
import { CategoryAnalyticsService } from '../services/analytics/CategoryAnalyticsService';
|
||||||
import { StoreAnalyticsService } from '../services/analytics/StoreAnalyticsService';
|
import { StoreAnalyticsService } from '../services/analytics/StoreAnalyticsService';
|
||||||
import { StateAnalyticsService } from '../services/analytics/StateAnalyticsService';
|
import { StateAnalyticsService } from '../services/analytics/StateAnalyticsService';
|
||||||
|
import { BrandIntelligenceService } from '../services/analytics/BrandIntelligenceService';
|
||||||
import { TimeWindow, LegalType } from '../services/analytics/types';
|
import { TimeWindow, LegalType } from '../services/analytics/types';
|
||||||
|
|
||||||
function parseTimeWindow(window?: string): TimeWindow {
|
function parseTimeWindow(window?: string): TimeWindow {
|
||||||
@@ -35,12 +43,17 @@ function parseLegalType(legalType?: string): LegalType {
|
|||||||
export function createAnalyticsV2Router(pool: Pool): Router {
|
export function createAnalyticsV2Router(pool: Pool): Router {
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
|
// SECURITY: Apply auth middleware to ALL routes
|
||||||
|
// This gate ensures only authenticated requests can access analytics data
|
||||||
|
router.use(authMiddleware);
|
||||||
|
|
||||||
// Initialize services
|
// Initialize services
|
||||||
const priceService = new PriceAnalyticsService(pool);
|
const priceService = new PriceAnalyticsService(pool);
|
||||||
const brandService = new BrandPenetrationService(pool);
|
const brandService = new BrandPenetrationService(pool);
|
||||||
const categoryService = new CategoryAnalyticsService(pool);
|
const categoryService = new CategoryAnalyticsService(pool);
|
||||||
const storeService = new StoreAnalyticsService(pool);
|
const storeService = new StoreAnalyticsService(pool);
|
||||||
const stateService = new StateAnalyticsService(pool);
|
const stateService = new StateAnalyticsService(pool);
|
||||||
|
const brandIntelligenceService = new BrandIntelligenceService(pool);
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// PRICE ANALYTICS
|
// PRICE ANALYTICS
|
||||||
@@ -231,6 +244,76 @@ export function createAnalyticsV2Router(pool: Pool): Router {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /brand/:name/promotions
|
||||||
|
* Get brand promotional history - tracks specials, discounts, duration, and sales estimates
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - window: 7d|30d|90d (default: 90d)
|
||||||
|
* - state: state code filter (e.g., AZ)
|
||||||
|
* - category: category filter (e.g., Flower)
|
||||||
|
*/
|
||||||
|
router.get('/brand/:name/promotions', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const brandName = decodeURIComponent(req.params.name);
|
||||||
|
const window = parseTimeWindow(req.query.window as string) || '90d';
|
||||||
|
const stateCode = req.query.state as string | undefined;
|
||||||
|
const category = req.query.category as string | undefined;
|
||||||
|
|
||||||
|
const result = await brandService.getBrandPromotionalHistory(brandName, {
|
||||||
|
window,
|
||||||
|
stateCode,
|
||||||
|
category,
|
||||||
|
});
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[AnalyticsV2] Brand promotions error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch brand promotional history' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /brand/:name/intelligence
|
||||||
|
* Get comprehensive B2B brand intelligence dashboard data
|
||||||
|
*
|
||||||
|
* Returns all brand metrics in a single unified response:
|
||||||
|
* - Performance Snapshot (active SKUs, revenue, stores, market share)
|
||||||
|
* - Alerts/Slippage (lost stores, delisted SKUs, competitor takeovers)
|
||||||
|
* - Product Velocity (daily rates, velocity status)
|
||||||
|
* - Retail Footprint (penetration, whitespace opportunities)
|
||||||
|
* - Competitive Landscape (price position, market share trend)
|
||||||
|
* - Inventory Health (days of stock, risk levels)
|
||||||
|
* - Promotion Effectiveness (baseline vs promo velocity, ROI)
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - window: 7d|30d|90d (default: 30d)
|
||||||
|
* - state: state code filter (e.g., AZ)
|
||||||
|
* - category: category filter (e.g., Flower)
|
||||||
|
*/
|
||||||
|
router.get('/brand/:name/intelligence', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const brandName = decodeURIComponent(req.params.name);
|
||||||
|
const window = parseTimeWindow(req.query.window as string);
|
||||||
|
const stateCode = req.query.state as string | undefined;
|
||||||
|
const category = req.query.category as string | undefined;
|
||||||
|
|
||||||
|
const result = await brandIntelligenceService.getBrandIntelligence(brandName, {
|
||||||
|
window,
|
||||||
|
stateCode,
|
||||||
|
category,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
return res.status(404).json({ error: 'Brand not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[AnalyticsV2] Brand intelligence error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch brand intelligence' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// CATEGORY ANALYTICS
|
// CATEGORY ANALYTICS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -400,6 +483,31 @@ export function createAnalyticsV2Router(pool: Pool): Router {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /store/:id/quantity-changes
|
||||||
|
* Get quantity changes for a store (increases/decreases)
|
||||||
|
* Useful for estimating sales (decreases) or restocks (increases)
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - window: 7d|30d|90d (default: 7d)
|
||||||
|
* - direction: increase|decrease|all (default: all)
|
||||||
|
* - limit: number (default: 100)
|
||||||
|
*/
|
||||||
|
router.get('/store/:id/quantity-changes', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const dispensaryId = parseInt(req.params.id);
|
||||||
|
const window = parseTimeWindow(req.query.window as string);
|
||||||
|
const direction = (req.query.direction as 'increase' | 'decrease' | 'all') || 'all';
|
||||||
|
const limit = req.query.limit ? parseInt(req.query.limit as string) : 100;
|
||||||
|
|
||||||
|
const result = await storeService.getQuantityChanges(dispensaryId, { window, direction, limit });
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[AnalyticsV2] Store quantity changes error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch store quantity changes' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /store/:id/inventory
|
* GET /store/:id/inventory
|
||||||
* Get store inventory composition
|
* Get store inventory composition
|
||||||
|
|||||||
@@ -47,4 +47,27 @@ router.post('/refresh', authMiddleware, async (req: AuthRequest, res) => {
|
|||||||
res.json({ token });
|
res.json({ token });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Verify password for sensitive actions (requires current user to be authenticated)
|
||||||
|
router.post('/verify-password', authMiddleware, async (req: AuthRequest, res) => {
|
||||||
|
try {
|
||||||
|
const { password } = req.body;
|
||||||
|
|
||||||
|
if (!password) {
|
||||||
|
return res.status(400).json({ error: 'Password required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-authenticate the current user with the provided password
|
||||||
|
const user = await authenticateUser(req.user!.email, password);
|
||||||
|
|
||||||
|
if (!user) {
|
||||||
|
return res.status(401).json({ error: 'Invalid password', verified: false });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ verified: true });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Password verification error:', error);
|
||||||
|
res.status(500).json({ error: 'Internal server error' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
@@ -5,31 +5,35 @@ import { pool } from '../db/pool';
|
|||||||
const router = Router();
|
const router = Router();
|
||||||
router.use(authMiddleware);
|
router.use(authMiddleware);
|
||||||
|
|
||||||
// Get categories (flat list)
|
// Get categories (flat list) - derived from actual product data
|
||||||
router.get('/', async (req, res) => {
|
router.get('/', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { store_id } = req.query;
|
const { store_id, in_stock_only } = req.query;
|
||||||
|
|
||||||
let query = `
|
let query = `
|
||||||
SELECT
|
SELECT
|
||||||
c.*,
|
category_raw as name,
|
||||||
COUNT(DISTINCT p.id) as product_count,
|
category_raw as slug,
|
||||||
pc.name as parent_name
|
COUNT(*) as product_count,
|
||||||
FROM categories c
|
COUNT(*) FILTER (WHERE is_in_stock = true) as in_stock_count
|
||||||
LEFT JOIN store_products p ON c.name = p.category_raw
|
FROM store_products
|
||||||
LEFT JOIN categories pc ON c.parent_id = pc.id
|
WHERE category_raw IS NOT NULL
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const params: any[] = [];
|
const params: any[] = [];
|
||||||
|
|
||||||
if (store_id) {
|
if (store_id) {
|
||||||
query += ' WHERE c.store_id = $1';
|
|
||||||
params.push(store_id);
|
params.push(store_id);
|
||||||
|
query += ` AND dispensary_id = $${params.length}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_stock_only === 'true') {
|
||||||
|
query += ` AND is_in_stock = true`;
|
||||||
}
|
}
|
||||||
|
|
||||||
query += `
|
query += `
|
||||||
GROUP BY c.id, pc.name
|
GROUP BY category_raw
|
||||||
ORDER BY c.display_order, c.name
|
ORDER BY category_raw
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const result = await pool.query(query, params);
|
const result = await pool.query(query, params);
|
||||||
@@ -40,49 +44,85 @@ router.get('/', async (req, res) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Get category tree (hierarchical)
|
// Get category tree (hierarchical) - category -> subcategory structure from product data
|
||||||
router.get('/tree', async (req, res) => {
|
router.get('/tree', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { store_id } = req.query;
|
const { store_id, in_stock_only } = req.query;
|
||||||
|
|
||||||
if (!store_id) {
|
// Get category + subcategory combinations with counts
|
||||||
return res.status(400).json({ error: 'store_id is required' });
|
let query = `
|
||||||
}
|
|
||||||
|
|
||||||
// Get all categories for the store
|
|
||||||
const result = await pool.query(`
|
|
||||||
SELECT
|
SELECT
|
||||||
c.*,
|
category_raw as category,
|
||||||
COUNT(DISTINCT p.id) as product_count
|
subcategory_raw as subcategory,
|
||||||
FROM categories c
|
COUNT(*) as product_count,
|
||||||
LEFT JOIN store_products p ON c.name = p.category_raw AND p.is_in_stock = true AND p.dispensary_id = $1
|
COUNT(*) FILTER (WHERE is_in_stock = true) as in_stock_count
|
||||||
WHERE c.store_id = $1
|
FROM store_products
|
||||||
GROUP BY c.id
|
WHERE category_raw IS NOT NULL
|
||||||
ORDER BY c.display_order, c.name
|
`;
|
||||||
`, [store_id]);
|
|
||||||
|
|
||||||
// Build tree structure
|
const params: any[] = [];
|
||||||
const categories = result.rows;
|
|
||||||
const categoryMap = new Map();
|
|
||||||
const tree: any[] = [];
|
|
||||||
|
|
||||||
// First pass: create map
|
if (store_id) {
|
||||||
categories.forEach((cat: { id: number; parent_id?: number }) => {
|
params.push(store_id);
|
||||||
categoryMap.set(cat.id, { ...cat, children: [] });
|
query += ` AND dispensary_id = $${params.length}`;
|
||||||
});
|
|
||||||
|
|
||||||
// Second pass: build tree
|
|
||||||
categories.forEach((cat: { id: number; parent_id?: number }) => {
|
|
||||||
const node = categoryMap.get(cat.id);
|
|
||||||
if (cat.parent_id) {
|
|
||||||
const parent = categoryMap.get(cat.parent_id);
|
|
||||||
if (parent) {
|
|
||||||
parent.children.push(node);
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
tree.push(node);
|
if (in_stock_only === 'true') {
|
||||||
|
query += ` AND is_in_stock = true`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
query += `
|
||||||
|
GROUP BY category_raw, subcategory_raw
|
||||||
|
ORDER BY category_raw, subcategory_raw
|
||||||
|
`;
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
|
||||||
|
// Build tree structure: category -> subcategories
|
||||||
|
const categoryMap = new Map<string, {
|
||||||
|
name: string;
|
||||||
|
slug: string;
|
||||||
|
product_count: number;
|
||||||
|
in_stock_count: number;
|
||||||
|
subcategories: Array<{
|
||||||
|
name: string;
|
||||||
|
slug: string;
|
||||||
|
product_count: number;
|
||||||
|
in_stock_count: number;
|
||||||
|
}>;
|
||||||
|
}>();
|
||||||
|
|
||||||
|
for (const row of result.rows) {
|
||||||
|
const category = row.category;
|
||||||
|
const subcategory = row.subcategory;
|
||||||
|
const count = parseInt(row.product_count);
|
||||||
|
const inStockCount = parseInt(row.in_stock_count);
|
||||||
|
|
||||||
|
if (!categoryMap.has(category)) {
|
||||||
|
categoryMap.set(category, {
|
||||||
|
name: category,
|
||||||
|
slug: category.toLowerCase().replace(/\s+/g, '-'),
|
||||||
|
product_count: 0,
|
||||||
|
in_stock_count: 0,
|
||||||
|
subcategories: []
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const cat = categoryMap.get(category)!;
|
||||||
|
cat.product_count += count;
|
||||||
|
cat.in_stock_count += inStockCount;
|
||||||
|
|
||||||
|
if (subcategory) {
|
||||||
|
cat.subcategories.push({
|
||||||
|
name: subcategory,
|
||||||
|
slug: subcategory.toLowerCase().replace(/\s+/g, '-'),
|
||||||
|
product_count: count,
|
||||||
|
in_stock_count: inStockCount
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const tree = Array.from(categoryMap.values());
|
||||||
|
|
||||||
res.json({ tree });
|
res.json({ tree });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -91,4 +131,91 @@ router.get('/tree', async (req, res) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Get all unique subcategories for a category
|
||||||
|
router.get('/:category/subcategories', async (req, res) => {
|
||||||
|
try {
|
||||||
|
const { category } = req.params;
|
||||||
|
const { store_id, in_stock_only } = req.query;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT
|
||||||
|
subcategory_raw as name,
|
||||||
|
subcategory_raw as slug,
|
||||||
|
COUNT(*) as product_count,
|
||||||
|
COUNT(*) FILTER (WHERE is_in_stock = true) as in_stock_count
|
||||||
|
FROM store_products
|
||||||
|
WHERE category_raw = $1
|
||||||
|
AND subcategory_raw IS NOT NULL
|
||||||
|
`;
|
||||||
|
|
||||||
|
const params: any[] = [category];
|
||||||
|
|
||||||
|
if (store_id) {
|
||||||
|
params.push(store_id);
|
||||||
|
query += ` AND dispensary_id = $${params.length}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_stock_only === 'true') {
|
||||||
|
query += ` AND is_in_stock = true`;
|
||||||
|
}
|
||||||
|
|
||||||
|
query += `
|
||||||
|
GROUP BY subcategory_raw
|
||||||
|
ORDER BY subcategory_raw
|
||||||
|
`;
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
res.json({
|
||||||
|
category,
|
||||||
|
subcategories: result.rows
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching subcategories:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch subcategories' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get global category summary (across all stores)
|
||||||
|
router.get('/summary', async (req, res) => {
|
||||||
|
try {
|
||||||
|
const { state } = req.query;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT
|
||||||
|
sp.category_raw as category,
|
||||||
|
COUNT(DISTINCT sp.id) as product_count,
|
||||||
|
COUNT(DISTINCT sp.dispensary_id) as store_count,
|
||||||
|
COUNT(*) FILTER (WHERE sp.is_in_stock = true) as in_stock_count
|
||||||
|
FROM store_products sp
|
||||||
|
`;
|
||||||
|
|
||||||
|
const params: any[] = [];
|
||||||
|
|
||||||
|
if (state) {
|
||||||
|
query += `
|
||||||
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE sp.category_raw IS NOT NULL
|
||||||
|
AND d.state = $1
|
||||||
|
`;
|
||||||
|
params.push(state);
|
||||||
|
} else {
|
||||||
|
query += ` WHERE sp.category_raw IS NOT NULL`;
|
||||||
|
}
|
||||||
|
|
||||||
|
query += `
|
||||||
|
GROUP BY sp.category_raw
|
||||||
|
ORDER BY product_count DESC
|
||||||
|
`;
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
res.json({
|
||||||
|
categories: result.rows,
|
||||||
|
total_categories: result.rows.length
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching category summary:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch category summary' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
269
backend/src/routes/deploy-status.ts
Normal file
269
backend/src/routes/deploy-status.ts
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import axios from 'axios';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
// Woodpecker API config - uses env vars or falls back
|
||||||
|
const WOODPECKER_SERVER = process.env.WOODPECKER_SERVER || 'https://ci.cannabrands.app';
|
||||||
|
const WOODPECKER_TOKEN = process.env.WOODPECKER_TOKEN;
|
||||||
|
const GITEA_SERVER = process.env.GITEA_SERVER || 'https://code.cannabrands.app';
|
||||||
|
const GITEA_TOKEN = process.env.GITEA_TOKEN;
|
||||||
|
const REPO_OWNER = 'Creationshop';
|
||||||
|
const REPO_NAME = 'dispensary-scraper';
|
||||||
|
|
||||||
|
interface PipelineStep {
|
||||||
|
name: string;
|
||||||
|
state: 'pending' | 'running' | 'success' | 'failure' | 'skipped';
|
||||||
|
started?: number;
|
||||||
|
stopped?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PipelineInfo {
|
||||||
|
number: number;
|
||||||
|
status: string;
|
||||||
|
event: string;
|
||||||
|
branch: string;
|
||||||
|
message: string;
|
||||||
|
commit: string;
|
||||||
|
author: string;
|
||||||
|
created: number;
|
||||||
|
started?: number;
|
||||||
|
finished?: number;
|
||||||
|
steps?: PipelineStep[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DeployStatusResponse {
|
||||||
|
running: {
|
||||||
|
sha: string;
|
||||||
|
sha_full: string;
|
||||||
|
build_time: string;
|
||||||
|
image_tag: string;
|
||||||
|
};
|
||||||
|
latest: {
|
||||||
|
sha: string;
|
||||||
|
sha_full: string;
|
||||||
|
message: string;
|
||||||
|
author: string;
|
||||||
|
timestamp: string;
|
||||||
|
} | null;
|
||||||
|
is_latest: boolean;
|
||||||
|
commits_behind: number;
|
||||||
|
pipeline: PipelineInfo | null;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch latest commit from Gitea
|
||||||
|
*/
|
||||||
|
async function getLatestCommit(): Promise<{
|
||||||
|
sha: string;
|
||||||
|
message: string;
|
||||||
|
author: string;
|
||||||
|
timestamp: string;
|
||||||
|
} | null> {
|
||||||
|
if (!GITEA_TOKEN) {
|
||||||
|
console.warn('[DeployStatus] GITEA_TOKEN not set, skipping latest commit fetch');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.get(
|
||||||
|
`${GITEA_SERVER}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/commits?limit=1`,
|
||||||
|
{
|
||||||
|
headers: { Authorization: `token ${GITEA_TOKEN}` },
|
||||||
|
timeout: 5000,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.data && response.data.length > 0) {
|
||||||
|
const commit = response.data[0];
|
||||||
|
return {
|
||||||
|
sha: commit.sha,
|
||||||
|
message: commit.commit?.message?.split('\n')[0] || '',
|
||||||
|
author: commit.commit?.author?.name || commit.author?.login || 'unknown',
|
||||||
|
timestamp: commit.commit?.author?.date || commit.created,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[DeployStatus] Failed to fetch latest commit:', error.message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch latest pipeline from Woodpecker
|
||||||
|
*/
|
||||||
|
async function getLatestPipeline(): Promise<PipelineInfo | null> {
|
||||||
|
if (!WOODPECKER_TOKEN) {
|
||||||
|
console.warn('[DeployStatus] WOODPECKER_TOKEN not set, skipping pipeline fetch');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get latest pipeline
|
||||||
|
const listResponse = await axios.get(
|
||||||
|
`${WOODPECKER_SERVER}/api/repos/${REPO_OWNER}/${REPO_NAME}/pipelines?page=1&per_page=1`,
|
||||||
|
{
|
||||||
|
headers: { Authorization: `Bearer ${WOODPECKER_TOKEN}` },
|
||||||
|
timeout: 5000,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!listResponse.data || listResponse.data.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pipeline = listResponse.data[0];
|
||||||
|
|
||||||
|
// Get pipeline steps
|
||||||
|
let steps: PipelineStep[] = [];
|
||||||
|
try {
|
||||||
|
const stepsResponse = await axios.get(
|
||||||
|
`${WOODPECKER_SERVER}/api/repos/${REPO_OWNER}/${REPO_NAME}/pipelines/${pipeline.number}`,
|
||||||
|
{
|
||||||
|
headers: { Authorization: `Bearer ${WOODPECKER_TOKEN}` },
|
||||||
|
timeout: 5000,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (stepsResponse.data?.workflows) {
|
||||||
|
for (const workflow of stepsResponse.data.workflows) {
|
||||||
|
if (workflow.children) {
|
||||||
|
for (const step of workflow.children) {
|
||||||
|
steps.push({
|
||||||
|
name: step.name,
|
||||||
|
state: step.state,
|
||||||
|
started: step.start_time,
|
||||||
|
stopped: step.end_time,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (stepError) {
|
||||||
|
// Steps fetch failed, continue without them
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
number: pipeline.number,
|
||||||
|
status: pipeline.status,
|
||||||
|
event: pipeline.event,
|
||||||
|
branch: pipeline.branch,
|
||||||
|
message: pipeline.message?.split('\n')[0] || '',
|
||||||
|
commit: pipeline.commit?.slice(0, 8) || '',
|
||||||
|
author: pipeline.author || 'unknown',
|
||||||
|
created: pipeline.created_at,
|
||||||
|
started: pipeline.started_at,
|
||||||
|
finished: pipeline.finished_at,
|
||||||
|
steps,
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[DeployStatus] Failed to fetch pipeline:', error.message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Count commits between two SHAs
|
||||||
|
*/
|
||||||
|
async function countCommitsBetween(fromSha: string, toSha: string): Promise<number> {
|
||||||
|
if (!GITEA_TOKEN || !fromSha || !toSha) return 0;
|
||||||
|
if (fromSha === toSha) return 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.get(
|
||||||
|
`${GITEA_SERVER}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/commits?sha=${toSha}&limit=50`,
|
||||||
|
{
|
||||||
|
headers: { Authorization: `token ${GITEA_TOKEN}` },
|
||||||
|
timeout: 5000,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.data) {
|
||||||
|
const commits = response.data;
|
||||||
|
for (let i = 0; i < commits.length; i++) {
|
||||||
|
if (commits[i].sha.startsWith(fromSha)) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If not found in first 50, assume more than 50 behind
|
||||||
|
return commits.length;
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[DeployStatus] Failed to count commits:', error.message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/deploy-status
|
||||||
|
* Returns deployment status with version comparison and CI info
|
||||||
|
*/
|
||||||
|
router.get('/', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// Get running version from env vars (set during Docker build)
|
||||||
|
const runningSha = process.env.APP_GIT_SHA || 'unknown';
|
||||||
|
const running = {
|
||||||
|
sha: runningSha.slice(0, 8),
|
||||||
|
sha_full: runningSha,
|
||||||
|
build_time: process.env.APP_BUILD_TIME || new Date().toISOString(),
|
||||||
|
image_tag: process.env.CONTAINER_IMAGE_TAG?.slice(0, 8) || 'local',
|
||||||
|
};
|
||||||
|
|
||||||
|
// Fetch latest commit and pipeline in parallel
|
||||||
|
const [latestCommit, pipeline] = await Promise.all([
|
||||||
|
getLatestCommit(),
|
||||||
|
getLatestPipeline(),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Build latest info
|
||||||
|
const latest = latestCommit ? {
|
||||||
|
sha: latestCommit.sha.slice(0, 8),
|
||||||
|
sha_full: latestCommit.sha,
|
||||||
|
message: latestCommit.message,
|
||||||
|
author: latestCommit.author,
|
||||||
|
timestamp: latestCommit.timestamp,
|
||||||
|
} : null;
|
||||||
|
|
||||||
|
// Determine if running latest
|
||||||
|
const isLatest = latest
|
||||||
|
? runningSha.startsWith(latest.sha_full.slice(0, 8)) ||
|
||||||
|
latest.sha_full.startsWith(runningSha.slice(0, 8))
|
||||||
|
: true;
|
||||||
|
|
||||||
|
// Count commits behind
|
||||||
|
const commitsBehind = isLatest
|
||||||
|
? 0
|
||||||
|
: await countCommitsBetween(runningSha, latest?.sha_full || '');
|
||||||
|
|
||||||
|
const response: DeployStatusResponse = {
|
||||||
|
running,
|
||||||
|
latest,
|
||||||
|
is_latest: isLatest,
|
||||||
|
commits_behind: commitsBehind,
|
||||||
|
pipeline,
|
||||||
|
};
|
||||||
|
|
||||||
|
res.json(response);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[DeployStatus] Error:', error);
|
||||||
|
res.status(500).json({
|
||||||
|
error: error.message,
|
||||||
|
running: {
|
||||||
|
sha: process.env.APP_GIT_SHA?.slice(0, 8) || 'unknown',
|
||||||
|
sha_full: process.env.APP_GIT_SHA || 'unknown',
|
||||||
|
build_time: process.env.APP_BUILD_TIME || 'unknown',
|
||||||
|
image_tag: process.env.CONTAINER_IMAGE_TAG?.slice(0, 8) || 'local',
|
||||||
|
},
|
||||||
|
latest: null,
|
||||||
|
is_latest: true,
|
||||||
|
commits_behind: 0,
|
||||||
|
pipeline: null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -8,10 +8,12 @@ router.use(authMiddleware);
|
|||||||
// Valid menu_type values
|
// Valid menu_type values
|
||||||
const VALID_MENU_TYPES = ['dutchie', 'treez', 'jane', 'weedmaps', 'leafly', 'meadow', 'blaze', 'flowhub', 'dispense', 'cova', 'other', 'unknown'];
|
const VALID_MENU_TYPES = ['dutchie', 'treez', 'jane', 'weedmaps', 'leafly', 'meadow', 'blaze', 'flowhub', 'dispense', 'cova', 'other', 'unknown'];
|
||||||
|
|
||||||
// Get all dispensaries
|
// Get all dispensaries (with pagination)
|
||||||
router.get('/', async (req, res) => {
|
router.get('/', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { menu_type, city, state, crawl_enabled, dutchie_verified } = req.query;
|
const { menu_type, city, state, crawl_enabled, dutchie_verified, status, limit, offset, search } = req.query;
|
||||||
|
const pageLimit = Math.min(parseInt(limit as string) || 50, 500);
|
||||||
|
const pageOffset = parseInt(offset as string) || 0;
|
||||||
|
|
||||||
let query = `
|
let query = `
|
||||||
SELECT
|
SELECT
|
||||||
@@ -98,15 +100,40 @@ router.get('/', async (req, res) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (conditions.length > 0) {
|
// Filter by status (e.g., 'dropped', 'open', 'closed')
|
||||||
query += ` WHERE ${conditions.join(' AND ')}`;
|
if (status) {
|
||||||
|
conditions.push(`status = $${params.length + 1}`);
|
||||||
|
params.push(status);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Search filter (name, dba_name, city, company_name)
|
||||||
|
if (search) {
|
||||||
|
conditions.push(`(name ILIKE $${params.length + 1} OR dba_name ILIKE $${params.length + 1} OR city ILIKE $${params.length + 1})`);
|
||||||
|
params.push(`%${search}%`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build WHERE clause
|
||||||
|
const whereClause = conditions.length > 0 ? ` WHERE ${conditions.join(' AND ')}` : '';
|
||||||
|
|
||||||
|
// Get total count first
|
||||||
|
const countResult = await pool.query(`SELECT COUNT(*) FROM dispensaries${whereClause}`, params);
|
||||||
|
const total = parseInt(countResult.rows[0].count);
|
||||||
|
|
||||||
|
// Add pagination
|
||||||
|
query += whereClause;
|
||||||
query += ` ORDER BY name`;
|
query += ` ORDER BY name`;
|
||||||
|
query += ` LIMIT $${params.length + 1} OFFSET $${params.length + 2}`;
|
||||||
|
params.push(pageLimit, pageOffset);
|
||||||
|
|
||||||
const result = await pool.query(query, params);
|
const result = await pool.query(query, params);
|
||||||
|
|
||||||
res.json({ dispensaries: result.rows, total: result.rowCount });
|
res.json({
|
||||||
|
dispensaries: result.rows,
|
||||||
|
total,
|
||||||
|
limit: pageLimit,
|
||||||
|
offset: pageOffset,
|
||||||
|
hasMore: pageOffset + result.rows.length < total
|
||||||
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching dispensaries:', error);
|
console.error('Error fetching dispensaries:', error);
|
||||||
res.status(500).json({ error: 'Failed to fetch dispensaries' });
|
res.status(500).json({ error: 'Failed to fetch dispensaries' });
|
||||||
@@ -140,6 +167,7 @@ router.get('/stats/crawl-status', async (req, res) => {
|
|||||||
COUNT(*) FILTER (WHERE crawl_enabled = false OR crawl_enabled IS NULL) as disabled_count,
|
COUNT(*) FILTER (WHERE crawl_enabled = false OR crawl_enabled IS NULL) as disabled_count,
|
||||||
COUNT(*) FILTER (WHERE dutchie_verified = true) as verified_count,
|
COUNT(*) FILTER (WHERE dutchie_verified = true) as verified_count,
|
||||||
COUNT(*) FILTER (WHERE dutchie_verified = false OR dutchie_verified IS NULL) as unverified_count,
|
COUNT(*) FILTER (WHERE dutchie_verified = false OR dutchie_verified IS NULL) as unverified_count,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'dropped') as dropped_count,
|
||||||
COUNT(*) as total_count
|
COUNT(*) as total_count
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
`;
|
`;
|
||||||
@@ -169,6 +197,34 @@ router.get('/stats/crawl-status', async (req, res) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Get dropped stores count (for dashboard alert)
|
||||||
|
router.get('/stats/dropped', async (req, res) => {
|
||||||
|
try {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as dropped_count,
|
||||||
|
json_agg(json_build_object(
|
||||||
|
'id', id,
|
||||||
|
'name', name,
|
||||||
|
'city', city,
|
||||||
|
'state', state,
|
||||||
|
'dropped_at', updated_at
|
||||||
|
) ORDER BY updated_at DESC) FILTER (WHERE status = 'dropped') as dropped_stores
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE status = 'dropped'
|
||||||
|
`);
|
||||||
|
|
||||||
|
const row = result.rows[0];
|
||||||
|
res.json({
|
||||||
|
dropped_count: parseInt(row.dropped_count) || 0,
|
||||||
|
dropped_stores: row.dropped_stores || []
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching dropped stores:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch dropped stores' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Get single dispensary by slug or ID
|
// Get single dispensary by slug or ID
|
||||||
router.get('/:slugOrId', async (req, res) => {
|
router.get('/:slugOrId', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -22,11 +22,17 @@ interface ProductClickEventPayload {
|
|||||||
store_id?: string;
|
store_id?: string;
|
||||||
brand_id?: string;
|
brand_id?: string;
|
||||||
campaign_id?: string;
|
campaign_id?: string;
|
||||||
|
dispensary_name?: string;
|
||||||
action: 'view' | 'open_store' | 'open_product' | 'compare' | 'other';
|
action: 'view' | 'open_store' | 'open_product' | 'compare' | 'other';
|
||||||
source: string;
|
source: string;
|
||||||
page_type?: string; // Page where event occurred (e.g., StoreDetailPage, BrandsIntelligence)
|
page_type?: string; // Page where event occurred (e.g., StoreDetailPage, BrandsIntelligence)
|
||||||
url_path?: string; // URL path for debugging
|
url_path?: string; // URL path for debugging
|
||||||
occurred_at?: string;
|
occurred_at?: string;
|
||||||
|
// Visitor location (from frontend IP geolocation)
|
||||||
|
visitor_city?: string;
|
||||||
|
visitor_state?: string;
|
||||||
|
visitor_lat?: number;
|
||||||
|
visitor_lng?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -77,13 +83,14 @@ router.post('/product-click', optionalAuthMiddleware, async (req: Request, res:
|
|||||||
// Insert the event with enhanced fields
|
// Insert the event with enhanced fields
|
||||||
await pool.query(
|
await pool.query(
|
||||||
`INSERT INTO product_click_events
|
`INSERT INTO product_click_events
|
||||||
(product_id, store_id, brand_id, campaign_id, action, source, user_id, ip_address, user_agent, occurred_at, event_type, page_type, url_path, device_type)
|
(product_id, store_id, brand_id, campaign_id, dispensary_name, action, source, user_id, ip_address, user_agent, occurred_at, event_type, page_type, url_path, device_type, visitor_city, visitor_state, visitor_lat, visitor_lng)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)`,
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)`,
|
||||||
[
|
[
|
||||||
payload.product_id,
|
payload.product_id,
|
||||||
payload.store_id || null,
|
payload.store_id || null,
|
||||||
payload.brand_id || null,
|
payload.brand_id || null,
|
||||||
payload.campaign_id || null,
|
payload.campaign_id || null,
|
||||||
|
payload.dispensary_name || null,
|
||||||
payload.action,
|
payload.action,
|
||||||
payload.source,
|
payload.source,
|
||||||
userId,
|
userId,
|
||||||
@@ -93,7 +100,11 @@ router.post('/product-click', optionalAuthMiddleware, async (req: Request, res:
|
|||||||
'product_click', // event_type
|
'product_click', // event_type
|
||||||
payload.page_type || null,
|
payload.page_type || null,
|
||||||
payload.url_path || null,
|
payload.url_path || null,
|
||||||
deviceType
|
deviceType,
|
||||||
|
payload.visitor_city || null,
|
||||||
|
payload.visitor_state || null,
|
||||||
|
payload.visitor_lat || null,
|
||||||
|
payload.visitor_lng || null
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -45,6 +45,8 @@ interface ApiHealth extends HealthStatus {
|
|||||||
uptime: number;
|
uptime: number;
|
||||||
timestamp: string;
|
timestamp: string;
|
||||||
version: string;
|
version: string;
|
||||||
|
build_sha: string | null;
|
||||||
|
build_time: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface DbHealth extends HealthStatus {
|
interface DbHealth extends HealthStatus {
|
||||||
@@ -113,6 +115,8 @@ async function getApiHealth(): Promise<ApiHealth> {
|
|||||||
uptime: Math.floor((Date.now() - serverStartTime) / 1000),
|
uptime: Math.floor((Date.now() - serverStartTime) / 1000),
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
version: packageVersion,
|
version: packageVersion,
|
||||||
|
build_sha: process.env.APP_GIT_SHA && process.env.APP_GIT_SHA !== 'unknown' ? process.env.APP_GIT_SHA : null,
|
||||||
|
build_time: process.env.APP_BUILD_TIME && process.env.APP_BUILD_TIME !== 'unknown' ? process.env.APP_BUILD_TIME : null,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,14 +142,16 @@ async function getDbHealth(): Promise<DbHealth> {
|
|||||||
|
|
||||||
async function getRedisHealth(): Promise<RedisHealth> {
|
async function getRedisHealth(): Promise<RedisHealth> {
|
||||||
const start = Date.now();
|
const start = Date.now();
|
||||||
|
const isLocal = process.env.NODE_ENV === 'development' || process.env.NODE_ENV === 'local' || !process.env.NODE_ENV;
|
||||||
|
|
||||||
// Check if Redis is configured
|
// Check if Redis is configured
|
||||||
if (!process.env.REDIS_URL && !process.env.REDIS_HOST) {
|
if (!process.env.REDIS_URL && !process.env.REDIS_HOST) {
|
||||||
|
// Redis is optional in local dev, required in prod/staging
|
||||||
return {
|
return {
|
||||||
status: 'ok', // Redis is optional
|
status: isLocal ? 'ok' : 'error',
|
||||||
connected: false,
|
connected: false,
|
||||||
latency_ms: 0,
|
latency_ms: 0,
|
||||||
error: 'Redis not configured',
|
error: isLocal ? 'Redis not configured (optional in local)' : 'Redis not configured (required in production)',
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
214
backend/src/routes/image-proxy.ts
Normal file
214
backend/src/routes/image-proxy.ts
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
/**
|
||||||
|
* Image Proxy Route
|
||||||
|
*
|
||||||
|
* On-demand image resizing service. Serves images with URL-based transforms.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* /img/<path>?w=200&h=200&q=80&fit=cover
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* w - width (pixels)
|
||||||
|
* h - height (pixels)
|
||||||
|
* q - quality (1-100, default 80)
|
||||||
|
* fit - resize fit: cover, contain, fill, inside, outside (default: inside)
|
||||||
|
* blur - blur sigma (0.3-1000)
|
||||||
|
* gray - grayscale (1 = enabled)
|
||||||
|
* format - output format: webp, jpeg, png, avif (default: webp)
|
||||||
|
*
|
||||||
|
* Examples:
|
||||||
|
* /img/products/az/store/brand/product/image.webp?w=200
|
||||||
|
* /img/products/az/store/brand/product/image.webp?w=600&h=400&fit=cover
|
||||||
|
* /img/products/az/store/brand/product/image.webp?w=100&blur=5&gray=1
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import * as fs from 'fs/promises';
|
||||||
|
import * as path from 'path';
|
||||||
|
// @ts-ignore
|
||||||
|
const sharp = require('sharp');
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
// Base path for images
|
||||||
|
function getImagesBasePath(): string {
|
||||||
|
if (process.env.IMAGES_PATH) {
|
||||||
|
return process.env.IMAGES_PATH;
|
||||||
|
}
|
||||||
|
if (process.env.STORAGE_BASE_PATH) {
|
||||||
|
return path.join(process.env.STORAGE_BASE_PATH, 'images');
|
||||||
|
}
|
||||||
|
return './storage/images';
|
||||||
|
}
|
||||||
|
|
||||||
|
const IMAGES_BASE_PATH = getImagesBasePath();
|
||||||
|
|
||||||
|
// Allowed fit modes
|
||||||
|
const ALLOWED_FITS = ['cover', 'contain', 'fill', 'inside', 'outside'] as const;
|
||||||
|
type FitMode = typeof ALLOWED_FITS[number];
|
||||||
|
|
||||||
|
// Allowed formats
|
||||||
|
const ALLOWED_FORMATS = ['webp', 'jpeg', 'jpg', 'png', 'avif'] as const;
|
||||||
|
type OutputFormat = typeof ALLOWED_FORMATS[number];
|
||||||
|
|
||||||
|
// Cache headers (1 year for immutable content-addressed images)
|
||||||
|
const CACHE_MAX_AGE = 31536000; // 1 year in seconds
|
||||||
|
|
||||||
|
interface TransformParams {
|
||||||
|
width?: number;
|
||||||
|
height?: number;
|
||||||
|
quality: number;
|
||||||
|
fit: FitMode;
|
||||||
|
blur?: number;
|
||||||
|
grayscale: boolean;
|
||||||
|
format: OutputFormat;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseTransformParams(query: any): TransformParams {
|
||||||
|
return {
|
||||||
|
width: query.w ? Math.min(Math.max(parseInt(query.w, 10), 1), 4000) : undefined,
|
||||||
|
height: query.h ? Math.min(Math.max(parseInt(query.h, 10), 1), 4000) : undefined,
|
||||||
|
quality: query.q ? Math.min(Math.max(parseInt(query.q, 10), 1), 100) : 80,
|
||||||
|
fit: ALLOWED_FITS.includes(query.fit) ? query.fit : 'inside',
|
||||||
|
blur: query.blur ? Math.min(Math.max(parseFloat(query.blur), 0.3), 1000) : undefined,
|
||||||
|
grayscale: query.gray === '1' || query.grayscale === '1',
|
||||||
|
format: ALLOWED_FORMATS.includes(query.format) ? query.format : 'webp',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function getContentType(format: OutputFormat): string {
|
||||||
|
switch (format) {
|
||||||
|
case 'jpeg':
|
||||||
|
case 'jpg':
|
||||||
|
return 'image/jpeg';
|
||||||
|
case 'png':
|
||||||
|
return 'image/png';
|
||||||
|
case 'avif':
|
||||||
|
return 'image/avif';
|
||||||
|
case 'webp':
|
||||||
|
default:
|
||||||
|
return 'image/webp';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Image proxy endpoint
|
||||||
|
* GET /img/*
|
||||||
|
*/
|
||||||
|
router.get('/*', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// Get the image path from URL (everything after /img/)
|
||||||
|
const imagePath = req.params[0];
|
||||||
|
|
||||||
|
if (!imagePath) {
|
||||||
|
return res.status(400).json({ error: 'Image path required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Security: prevent directory traversal
|
||||||
|
const normalizedPath = path.normalize(imagePath).replace(/^(\.\.(\/|\\|$))+/, '');
|
||||||
|
const basePath = path.resolve(IMAGES_BASE_PATH);
|
||||||
|
const fullPath = path.resolve(path.join(IMAGES_BASE_PATH, normalizedPath));
|
||||||
|
|
||||||
|
// Ensure path is within base directory
|
||||||
|
if (!fullPath.startsWith(basePath)) {
|
||||||
|
console.error(`[ImageProxy] Path traversal attempt: ${fullPath} not in ${basePath}`);
|
||||||
|
return res.status(403).json({ error: 'Access denied' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if file exists
|
||||||
|
try {
|
||||||
|
await fs.access(fullPath);
|
||||||
|
} catch {
|
||||||
|
return res.status(404).json({ error: 'Image not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse transform parameters
|
||||||
|
const params = parseTransformParams(req.query);
|
||||||
|
|
||||||
|
// Check if any transforms are requested
|
||||||
|
const hasTransforms = params.width || params.height || params.blur || params.grayscale;
|
||||||
|
|
||||||
|
// Read the original image
|
||||||
|
const imageBuffer = await fs.readFile(fullPath);
|
||||||
|
|
||||||
|
let outputBuffer: Buffer;
|
||||||
|
|
||||||
|
if (hasTransforms) {
|
||||||
|
// Apply transforms
|
||||||
|
let pipeline = sharp(imageBuffer);
|
||||||
|
|
||||||
|
// Resize
|
||||||
|
if (params.width || params.height) {
|
||||||
|
pipeline = pipeline.resize(params.width, params.height, {
|
||||||
|
fit: params.fit,
|
||||||
|
withoutEnlargement: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blur
|
||||||
|
if (params.blur) {
|
||||||
|
pipeline = pipeline.blur(params.blur);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Grayscale
|
||||||
|
if (params.grayscale) {
|
||||||
|
pipeline = pipeline.grayscale();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output format
|
||||||
|
switch (params.format) {
|
||||||
|
case 'jpeg':
|
||||||
|
case 'jpg':
|
||||||
|
pipeline = pipeline.jpeg({ quality: params.quality });
|
||||||
|
break;
|
||||||
|
case 'png':
|
||||||
|
pipeline = pipeline.png({ quality: params.quality });
|
||||||
|
break;
|
||||||
|
case 'avif':
|
||||||
|
pipeline = pipeline.avif({ quality: params.quality });
|
||||||
|
break;
|
||||||
|
case 'webp':
|
||||||
|
default:
|
||||||
|
pipeline = pipeline.webp({ quality: params.quality });
|
||||||
|
}
|
||||||
|
|
||||||
|
outputBuffer = await pipeline.toBuffer();
|
||||||
|
} else {
|
||||||
|
// No transforms - serve original (but maybe convert format)
|
||||||
|
if (params.format !== 'webp' || params.quality !== 80) {
|
||||||
|
let pipeline = sharp(imageBuffer);
|
||||||
|
switch (params.format) {
|
||||||
|
case 'jpeg':
|
||||||
|
case 'jpg':
|
||||||
|
pipeline = pipeline.jpeg({ quality: params.quality });
|
||||||
|
break;
|
||||||
|
case 'png':
|
||||||
|
pipeline = pipeline.png({ quality: params.quality });
|
||||||
|
break;
|
||||||
|
case 'avif':
|
||||||
|
pipeline = pipeline.avif({ quality: params.quality });
|
||||||
|
break;
|
||||||
|
case 'webp':
|
||||||
|
default:
|
||||||
|
pipeline = pipeline.webp({ quality: params.quality });
|
||||||
|
}
|
||||||
|
outputBuffer = await pipeline.toBuffer();
|
||||||
|
} else {
|
||||||
|
outputBuffer = imageBuffer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set headers
|
||||||
|
res.setHeader('Content-Type', getContentType(params.format));
|
||||||
|
res.setHeader('Cache-Control', `public, max-age=${CACHE_MAX_AGE}, immutable`);
|
||||||
|
res.setHeader('X-Image-Size', outputBuffer.length);
|
||||||
|
|
||||||
|
// Send image
|
||||||
|
res.send(outputBuffer);
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[ImageProxy] Error:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to process image' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -14,35 +14,56 @@ router.use(authMiddleware);
|
|||||||
/**
|
/**
|
||||||
* GET /api/admin/intelligence/brands
|
* GET /api/admin/intelligence/brands
|
||||||
* List all brands with state presence, store counts, and pricing
|
* List all brands with state presence, store counts, and pricing
|
||||||
|
* Query params:
|
||||||
|
* - state: Filter by state (e.g., "AZ")
|
||||||
|
* - limit: Max results (default 500)
|
||||||
|
* - offset: Pagination offset
|
||||||
*/
|
*/
|
||||||
router.get('/brands', async (req: Request, res: Response) => {
|
router.get('/brands', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const { limit = '500', offset = '0' } = req.query;
|
const { limit = '500', offset = '0', state } = req.query;
|
||||||
const limitNum = Math.min(parseInt(limit as string, 10), 1000);
|
const limitNum = Math.min(parseInt(limit as string, 10), 1000);
|
||||||
const offsetNum = parseInt(offset as string, 10);
|
const offsetNum = parseInt(offset as string, 10);
|
||||||
|
|
||||||
|
// Build WHERE clause based on state filter
|
||||||
|
let stateFilter = '';
|
||||||
|
const params: any[] = [limitNum, offsetNum];
|
||||||
|
if (state && state !== 'all') {
|
||||||
|
stateFilter = 'AND d.state = $3';
|
||||||
|
params.push(state);
|
||||||
|
}
|
||||||
|
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name_raw as brand_name,
|
sp.brand_name_raw as brand_name,
|
||||||
array_agg(DISTINCT d.state) FILTER (WHERE d.state IS NOT NULL) as states,
|
array_agg(DISTINCT d.state) FILTER (WHERE d.state IS NOT NULL) as states,
|
||||||
COUNT(DISTINCT d.id) as store_count,
|
COUNT(DISTINCT d.id) as store_count,
|
||||||
COUNT(DISTINCT sp.id) as sku_count,
|
COUNT(DISTINCT sp.id) as sku_count,
|
||||||
ROUND(AVG(sp.price_rec)::numeric, 2) FILTER (WHERE sp.price_rec > 0) as avg_price_rec,
|
ROUND(AVG(sp.price_rec) FILTER (WHERE sp.price_rec > 0)::numeric, 2) as avg_price_rec,
|
||||||
ROUND(AVG(sp.price_med)::numeric, 2) FILTER (WHERE sp.price_med > 0) as avg_price_med
|
ROUND(AVG(sp.price_med) FILTER (WHERE sp.price_med > 0)::numeric, 2) as avg_price_med
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
|
WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
|
||||||
|
${stateFilter}
|
||||||
GROUP BY sp.brand_name_raw
|
GROUP BY sp.brand_name_raw
|
||||||
ORDER BY store_count DESC, sku_count DESC
|
ORDER BY store_count DESC, sku_count DESC
|
||||||
LIMIT $1 OFFSET $2
|
LIMIT $1 OFFSET $2
|
||||||
`, [limitNum, offsetNum]);
|
`, params);
|
||||||
|
|
||||||
// Get total count
|
// Get total count with same state filter
|
||||||
|
const countParams: any[] = [];
|
||||||
|
let countStateFilter = '';
|
||||||
|
if (state && state !== 'all') {
|
||||||
|
countStateFilter = 'AND d.state = $1';
|
||||||
|
countParams.push(state);
|
||||||
|
}
|
||||||
const { rows: countRows } = await pool.query(`
|
const { rows: countRows } = await pool.query(`
|
||||||
SELECT COUNT(DISTINCT brand_name_raw) as total
|
SELECT COUNT(DISTINCT sp.brand_name_raw) as total
|
||||||
FROM store_products
|
FROM store_products sp
|
||||||
WHERE brand_name_raw IS NOT NULL AND brand_name_raw != ''
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
`);
|
WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
|
||||||
|
${countStateFilter}
|
||||||
|
`, countParams);
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
brands: rows.map((r: any) => ({
|
brands: rows.map((r: any) => ({
|
||||||
@@ -147,29 +168,63 @@ router.get('/brands/:brandName/penetration', async (req: Request, res: Response)
|
|||||||
/**
|
/**
|
||||||
* GET /api/admin/intelligence/pricing
|
* GET /api/admin/intelligence/pricing
|
||||||
* Get pricing analytics by category
|
* Get pricing analytics by category
|
||||||
|
* Query params:
|
||||||
|
* - state: Filter by state (e.g., "AZ")
|
||||||
*/
|
*/
|
||||||
router.get('/pricing', async (req: Request, res: Response) => {
|
router.get('/pricing', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const { rows: categoryRows } = await pool.query(`
|
const { state } = req.query;
|
||||||
|
|
||||||
|
// Build WHERE clause based on state filter
|
||||||
|
let stateFilter = '';
|
||||||
|
const categoryParams: any[] = [];
|
||||||
|
const stateQueryParams: any[] = [];
|
||||||
|
const overallParams: any[] = [];
|
||||||
|
|
||||||
|
if (state && state !== 'all') {
|
||||||
|
stateFilter = 'AND d.state = $1';
|
||||||
|
categoryParams.push(state);
|
||||||
|
overallParams.push(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Category pricing with optional state filter
|
||||||
|
const categoryQuery = state && state !== 'all'
|
||||||
|
? `
|
||||||
SELECT
|
SELECT
|
||||||
sp.category_raw as category,
|
sp.category_raw as category,
|
||||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||||
MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) as min_price,
|
MIN(sp.price_rec) as min_price,
|
||||||
MAX(sp.price_rec) as max_price,
|
MAX(sp.price_rec) as max_price,
|
||||||
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2)
|
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) as median_price,
|
||||||
FILTER (WHERE sp.price_rec > 0) as median_price,
|
COUNT(*) as product_count
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0 ${stateFilter}
|
||||||
|
GROUP BY sp.category_raw
|
||||||
|
ORDER BY product_count DESC
|
||||||
|
`
|
||||||
|
: `
|
||||||
|
SELECT
|
||||||
|
sp.category_raw as category,
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||||
|
MIN(sp.price_rec) as min_price,
|
||||||
|
MAX(sp.price_rec) as max_price,
|
||||||
|
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) as median_price,
|
||||||
COUNT(*) as product_count
|
COUNT(*) as product_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0
|
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0
|
||||||
GROUP BY sp.category_raw
|
GROUP BY sp.category_raw
|
||||||
ORDER BY product_count DESC
|
ORDER BY product_count DESC
|
||||||
`);
|
`;
|
||||||
|
|
||||||
|
const { rows: categoryRows } = await pool.query(categoryQuery, categoryParams);
|
||||||
|
|
||||||
|
// State pricing
|
||||||
const { rows: stateRows } = await pool.query(`
|
const { rows: stateRows } = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
d.state,
|
d.state,
|
||||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||||
MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) as min_price,
|
MIN(sp.price_rec) as min_price,
|
||||||
MAX(sp.price_rec) as max_price,
|
MAX(sp.price_rec) as max_price,
|
||||||
COUNT(DISTINCT sp.id) as product_count
|
COUNT(DISTINCT sp.id) as product_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
@@ -179,6 +234,31 @@ router.get('/pricing', async (req: Request, res: Response) => {
|
|||||||
ORDER BY avg_price DESC
|
ORDER BY avg_price DESC
|
||||||
`);
|
`);
|
||||||
|
|
||||||
|
// Overall stats with optional state filter
|
||||||
|
const overallQuery = state && state !== 'all'
|
||||||
|
? `
|
||||||
|
SELECT
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||||
|
MIN(sp.price_rec) as min_price,
|
||||||
|
MAX(sp.price_rec) as max_price,
|
||||||
|
COUNT(*) as total_products
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE sp.price_rec > 0 ${stateFilter}
|
||||||
|
`
|
||||||
|
: `
|
||||||
|
SELECT
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||||
|
MIN(sp.price_rec) as min_price,
|
||||||
|
MAX(sp.price_rec) as max_price,
|
||||||
|
COUNT(*) as total_products
|
||||||
|
FROM store_products sp
|
||||||
|
WHERE sp.price_rec > 0
|
||||||
|
`;
|
||||||
|
|
||||||
|
const { rows: overallRows } = await pool.query(overallQuery, overallParams);
|
||||||
|
const overall = overallRows[0];
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
byCategory: categoryRows.map((r: any) => ({
|
byCategory: categoryRows.map((r: any) => ({
|
||||||
category: r.category,
|
category: r.category,
|
||||||
@@ -195,6 +275,12 @@ router.get('/pricing', async (req: Request, res: Response) => {
|
|||||||
maxPrice: r.max_price ? parseFloat(r.max_price) : null,
|
maxPrice: r.max_price ? parseFloat(r.max_price) : null,
|
||||||
productCount: parseInt(r.product_count, 10),
|
productCount: parseInt(r.product_count, 10),
|
||||||
})),
|
})),
|
||||||
|
overall: {
|
||||||
|
avgPrice: overall?.avg_price ? parseFloat(overall.avg_price) : null,
|
||||||
|
minPrice: overall?.min_price ? parseFloat(overall.min_price) : null,
|
||||||
|
maxPrice: overall?.max_price ? parseFloat(overall.max_price) : null,
|
||||||
|
totalProducts: parseInt(overall?.total_products || '0', 10),
|
||||||
|
},
|
||||||
});
|
});
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.error('[Intelligence] Error fetching pricing:', error.message);
|
console.error('[Intelligence] Error fetching pricing:', error.message);
|
||||||
@@ -205,9 +291,23 @@ router.get('/pricing', async (req: Request, res: Response) => {
|
|||||||
/**
|
/**
|
||||||
* GET /api/admin/intelligence/stores
|
* GET /api/admin/intelligence/stores
|
||||||
* Get store intelligence summary
|
* Get store intelligence summary
|
||||||
|
* Query params:
|
||||||
|
* - state: Filter by state (e.g., "AZ")
|
||||||
|
* - limit: Max results (default 200)
|
||||||
*/
|
*/
|
||||||
router.get('/stores', async (req: Request, res: Response) => {
|
router.get('/stores', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
|
const { state, limit = '200' } = req.query;
|
||||||
|
const limitNum = Math.min(parseInt(limit as string, 10), 500);
|
||||||
|
|
||||||
|
// Build WHERE clause based on state filter
|
||||||
|
let stateFilter = '';
|
||||||
|
const params: any[] = [limitNum];
|
||||||
|
if (state && state !== 'all') {
|
||||||
|
stateFilter = 'AND d.state = $2';
|
||||||
|
params.push(state);
|
||||||
|
}
|
||||||
|
|
||||||
const { rows: storeRows } = await pool.query(`
|
const { rows: storeRows } = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
d.id,
|
d.id,
|
||||||
@@ -217,17 +317,22 @@ router.get('/stores', async (req: Request, res: Response) => {
|
|||||||
d.state,
|
d.state,
|
||||||
d.menu_type,
|
d.menu_type,
|
||||||
d.crawl_enabled,
|
d.crawl_enabled,
|
||||||
COUNT(DISTINCT sp.id) as product_count,
|
c.name as chain_name,
|
||||||
|
COUNT(DISTINCT sp.id) as sku_count,
|
||||||
COUNT(DISTINCT sp.brand_name_raw) as brand_count,
|
COUNT(DISTINCT sp.brand_name_raw) as brand_count,
|
||||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||||
MAX(sp.updated_at) as last_product_update
|
MAX(sp.updated_at) as last_crawl,
|
||||||
|
(SELECT COUNT(*) FROM store_product_snapshots sps
|
||||||
|
WHERE sps.store_product_id IN (SELECT id FROM store_products WHERE dispensary_id = d.id)) as snapshot_count
|
||||||
FROM dispensaries d
|
FROM dispensaries d
|
||||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||||
WHERE d.state IS NOT NULL
|
LEFT JOIN chains c ON d.chain_id = c.id
|
||||||
GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled
|
WHERE d.state IS NOT NULL AND d.crawl_enabled = true
|
||||||
ORDER BY product_count DESC
|
${stateFilter}
|
||||||
LIMIT 200
|
GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled, c.name
|
||||||
`);
|
ORDER BY sku_count DESC
|
||||||
|
LIMIT $1
|
||||||
|
`, params);
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
stores: storeRows.map((r: any) => ({
|
stores: storeRows.map((r: any) => ({
|
||||||
@@ -238,10 +343,13 @@ router.get('/stores', async (req: Request, res: Response) => {
|
|||||||
state: r.state,
|
state: r.state,
|
||||||
menuType: r.menu_type,
|
menuType: r.menu_type,
|
||||||
crawlEnabled: r.crawl_enabled,
|
crawlEnabled: r.crawl_enabled,
|
||||||
productCount: parseInt(r.product_count || '0', 10),
|
chainName: r.chain_name || null,
|
||||||
|
skuCount: parseInt(r.sku_count || '0', 10),
|
||||||
|
snapshotCount: parseInt(r.snapshot_count || '0', 10),
|
||||||
brandCount: parseInt(r.brand_count || '0', 10),
|
brandCount: parseInt(r.brand_count || '0', 10),
|
||||||
avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
|
avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
|
||||||
lastProductUpdate: r.last_product_update,
|
lastCrawl: r.last_crawl,
|
||||||
|
crawlFrequencyHours: 4, // Default crawl frequency
|
||||||
})),
|
})),
|
||||||
total: storeRows.length,
|
total: storeRows.length,
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -143,6 +143,152 @@ router.get('/', async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/job-queue/available - List dispensaries available for crawling
|
||||||
|
* Query: { state_code?: string, limit?: number }
|
||||||
|
* NOTE: Must be defined BEFORE /:id route to avoid conflict
|
||||||
|
*/
|
||||||
|
router.get('/available', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { state_code, limit = '100' } = req.query;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT
|
||||||
|
d.id,
|
||||||
|
d.name,
|
||||||
|
d.city,
|
||||||
|
s.code as state_code,
|
||||||
|
d.platform_dispensary_id,
|
||||||
|
d.crawl_enabled,
|
||||||
|
(SELECT MAX(created_at) FROM dispensary_crawl_jobs WHERE dispensary_id = d.id AND status = 'completed') as last_crawl,
|
||||||
|
EXISTS (
|
||||||
|
SELECT 1 FROM dispensary_crawl_jobs
|
||||||
|
WHERE dispensary_id = d.id AND status IN ('pending', 'running')
|
||||||
|
) as has_pending_job
|
||||||
|
FROM dispensaries d
|
||||||
|
LEFT JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
`;
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (state_code) {
|
||||||
|
params.push((state_code as string).toUpperCase());
|
||||||
|
query += ` AND s.code = $${paramIndex++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
query += ` ORDER BY d.name LIMIT $${paramIndex}`;
|
||||||
|
params.push(parseInt(limit as string));
|
||||||
|
|
||||||
|
const { rows } = await pool.query(query, params);
|
||||||
|
|
||||||
|
// Get counts by state
|
||||||
|
const { rows: stateCounts } = await pool.query(`
|
||||||
|
SELECT s.code, COUNT(*) as count
|
||||||
|
FROM dispensaries d
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
GROUP BY s.code
|
||||||
|
ORDER BY count DESC
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
dispensaries: rows,
|
||||||
|
total: rows.length,
|
||||||
|
by_state: stateCounts
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error listing available:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/job-queue/history - Get recent job history with results
|
||||||
|
* Query: { state_code?: string, status?: string, limit?: number, hours?: number }
|
||||||
|
* NOTE: Must be defined BEFORE /:id route to avoid conflict
|
||||||
|
*/
|
||||||
|
router.get('/history', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const {
|
||||||
|
state_code,
|
||||||
|
status,
|
||||||
|
limit = '50',
|
||||||
|
hours = '24'
|
||||||
|
} = req.query;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT
|
||||||
|
j.id,
|
||||||
|
j.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
s.code as state_code,
|
||||||
|
j.job_type,
|
||||||
|
j.status,
|
||||||
|
j.products_found,
|
||||||
|
j.error_message,
|
||||||
|
j.started_at,
|
||||||
|
j.completed_at,
|
||||||
|
j.duration_ms,
|
||||||
|
j.created_at
|
||||||
|
FROM dispensary_crawl_jobs j
|
||||||
|
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
|
||||||
|
LEFT JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE j.created_at > NOW() - INTERVAL '${parseInt(hours as string)} hours'
|
||||||
|
`;
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (status && status !== 'all') {
|
||||||
|
params.push(status);
|
||||||
|
query += ` AND j.status = $${paramIndex++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state_code) {
|
||||||
|
params.push((state_code as string).toUpperCase());
|
||||||
|
query += ` AND s.code = $${paramIndex++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
query += ` ORDER BY j.created_at DESC LIMIT $${paramIndex}`;
|
||||||
|
params.push(parseInt(limit as string));
|
||||||
|
|
||||||
|
const { rows } = await pool.query(query, params);
|
||||||
|
|
||||||
|
// Get summary stats
|
||||||
|
const { rows: stats } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE status = 'completed') as completed,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'failed') as failed,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'running') as running,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending') as pending,
|
||||||
|
SUM(products_found) FILTER (WHERE status = 'completed') as total_products,
|
||||||
|
AVG(duration_ms) FILTER (WHERE status = 'completed') as avg_duration_ms
|
||||||
|
FROM dispensary_crawl_jobs
|
||||||
|
WHERE created_at > NOW() - INTERVAL '${parseInt(hours as string)} hours'
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
jobs: rows,
|
||||||
|
summary: {
|
||||||
|
completed: parseInt(stats[0].completed) || 0,
|
||||||
|
failed: parseInt(stats[0].failed) || 0,
|
||||||
|
running: parseInt(stats[0].running) || 0,
|
||||||
|
pending: parseInt(stats[0].pending) || 0,
|
||||||
|
total_products: parseInt(stats[0].total_products) || 0,
|
||||||
|
avg_duration_ms: Math.round(parseFloat(stats[0].avg_duration_ms)) || null
|
||||||
|
},
|
||||||
|
hours: parseInt(hours as string)
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error getting history:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/job-queue/stats - Queue statistics
|
* GET /api/job-queue/stats - Queue statistics
|
||||||
*/
|
*/
|
||||||
@@ -397,6 +543,9 @@ router.post('/bulk-priority', async (req: Request, res: Response) => {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* POST /api/job-queue/enqueue - Add a new job to the queue
|
* POST /api/job-queue/enqueue - Add a new job to the queue
|
||||||
|
*
|
||||||
|
* 2024-12-10: Rewired to use worker_tasks via taskService.
|
||||||
|
* Legacy dispensary_crawl_jobs code commented out below.
|
||||||
*/
|
*/
|
||||||
router.post('/enqueue', async (req: Request, res: Response) => {
|
router.post('/enqueue', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
@@ -406,6 +555,59 @@ router.post('/enqueue', async (req: Request, res: Response) => {
|
|||||||
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 2024-12-10: Map legacy job_type to new task role
|
||||||
|
const roleMap: Record<string, string> = {
|
||||||
|
'dutchie_product_crawl': 'product_refresh',
|
||||||
|
'menu_detection': 'entry_point_discovery',
|
||||||
|
'menu_detection_single': 'entry_point_discovery',
|
||||||
|
'product_discovery': 'product_discovery',
|
||||||
|
'store_discovery': 'store_discovery',
|
||||||
|
};
|
||||||
|
const role = roleMap[job_type] || 'product_refresh';
|
||||||
|
|
||||||
|
// 2024-12-10: Use taskService to create task in worker_tasks table
|
||||||
|
const { taskService } = await import('../tasks/task-service');
|
||||||
|
|
||||||
|
// Check if task already pending for this dispensary
|
||||||
|
const existingTasks = await taskService.listTasks({
|
||||||
|
dispensary_id,
|
||||||
|
role: role as any,
|
||||||
|
status: ['pending', 'claimed', 'running'],
|
||||||
|
limit: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (existingTasks.length > 0) {
|
||||||
|
return res.json({
|
||||||
|
success: true,
|
||||||
|
task_id: existingTasks[0].id,
|
||||||
|
message: 'Task already queued'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const task = await taskService.createTask({
|
||||||
|
role: role as any,
|
||||||
|
dispensary_id,
|
||||||
|
priority,
|
||||||
|
});
|
||||||
|
|
||||||
|
res.json({ success: true, task_id: task.id, message: 'Task enqueued' });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error enqueuing task:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/*
|
||||||
|
* LEGACY CODE - 2024-12-10: Commented out, was using orphaned dispensary_crawl_jobs table
|
||||||
|
*
|
||||||
|
router.post('/enqueue', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
|
||||||
|
|
||||||
|
if (!dispensary_id) {
|
||||||
|
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
||||||
|
}
|
||||||
|
|
||||||
// Check if job already pending for this dispensary
|
// Check if job already pending for this dispensary
|
||||||
const existing = await pool.query(`
|
const existing = await pool.query(`
|
||||||
SELECT id FROM dispensary_crawl_jobs
|
SELECT id FROM dispensary_crawl_jobs
|
||||||
@@ -439,6 +641,7 @@ router.post('/enqueue', async (req: Request, res: Response) => {
|
|||||||
res.status(500).json({ success: false, error: error.message });
|
res.status(500).json({ success: false, error: error.message });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* POST /api/job-queue/pause - Pause queue processing
|
* POST /api/job-queue/pause - Pause queue processing
|
||||||
@@ -463,5 +666,167 @@ router.get('/paused', async (_req: Request, res: Response) => {
|
|||||||
res.json({ success: true, queue_paused: queuePaused });
|
res.json({ success: true, queue_paused: queuePaused });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/job-queue/enqueue-batch - Queue multiple dispensaries at once
|
||||||
|
* Body: { dispensary_ids: number[], job_type?: string, priority?: number }
|
||||||
|
*
|
||||||
|
* 2024-12-10: Rewired to use worker_tasks via taskService.
|
||||||
|
*/
|
||||||
|
router.post('/enqueue-batch', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { dispensary_ids, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
|
||||||
|
|
||||||
|
if (!Array.isArray(dispensary_ids) || dispensary_ids.length === 0) {
|
||||||
|
return res.status(400).json({ success: false, error: 'dispensary_ids array is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dispensary_ids.length > 500) {
|
||||||
|
return res.status(400).json({ success: false, error: 'Maximum 500 dispensaries per batch' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2024-12-10: Map legacy job_type to new task role
|
||||||
|
const roleMap: Record<string, string> = {
|
||||||
|
'dutchie_product_crawl': 'product_refresh',
|
||||||
|
'menu_detection': 'entry_point_discovery',
|
||||||
|
'product_discovery': 'product_discovery',
|
||||||
|
};
|
||||||
|
const role = roleMap[job_type] || 'product_refresh';
|
||||||
|
|
||||||
|
// 2024-12-10: Use taskService to create tasks in worker_tasks table
|
||||||
|
const { taskService } = await import('../tasks/task-service');
|
||||||
|
|
||||||
|
const tasks = dispensary_ids.map(dispensary_id => ({
|
||||||
|
role: role as any,
|
||||||
|
dispensary_id,
|
||||||
|
priority,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const createdCount = await taskService.createTasks(tasks);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
queued: createdCount,
|
||||||
|
requested: dispensary_ids.length,
|
||||||
|
message: `Queued ${createdCount} of ${dispensary_ids.length} dispensaries`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error batch enqueuing:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/job-queue/enqueue-state - Queue all crawl-enabled dispensaries for a state
|
||||||
|
* Body: { state_code: string, job_type?: string, priority?: number, limit?: number }
|
||||||
|
*
|
||||||
|
* 2024-12-10: Rewired to use worker_tasks via taskService.
|
||||||
|
*/
|
||||||
|
router.post('/enqueue-state', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { state_code, job_type = 'dutchie_product_crawl', priority = 0, limit = 200 } = req.body;
|
||||||
|
|
||||||
|
if (!state_code) {
|
||||||
|
return res.status(400).json({ success: false, error: 'state_code is required (e.g., "AZ")' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2024-12-10: Map legacy job_type to new task role
|
||||||
|
const roleMap: Record<string, string> = {
|
||||||
|
'dutchie_product_crawl': 'product_refresh',
|
||||||
|
'menu_detection': 'entry_point_discovery',
|
||||||
|
'product_discovery': 'product_discovery',
|
||||||
|
};
|
||||||
|
const role = roleMap[job_type] || 'product_refresh';
|
||||||
|
|
||||||
|
// Get dispensary IDs for the state
|
||||||
|
const dispensaryResult = await pool.query(`
|
||||||
|
SELECT d.id
|
||||||
|
FROM dispensaries d
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE s.code = $1
|
||||||
|
AND d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
LIMIT $2
|
||||||
|
`, [state_code.toUpperCase(), limit]);
|
||||||
|
|
||||||
|
const dispensary_ids = dispensaryResult.rows.map((r: any) => r.id);
|
||||||
|
|
||||||
|
// 2024-12-10: Use taskService to create tasks in worker_tasks table
|
||||||
|
const { taskService } = await import('../tasks/task-service');
|
||||||
|
|
||||||
|
const tasks = dispensary_ids.map((dispensary_id: number) => ({
|
||||||
|
role: role as any,
|
||||||
|
dispensary_id,
|
||||||
|
priority,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const createdCount = await taskService.createTasks(tasks);
|
||||||
|
|
||||||
|
// Get total available count
|
||||||
|
const countResult = await pool.query(`
|
||||||
|
SELECT COUNT(*) as total
|
||||||
|
FROM dispensaries d
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE s.code = $1
|
||||||
|
AND d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
`, [state_code.toUpperCase()]);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
queued: createdCount,
|
||||||
|
total_available: parseInt(countResult.rows[0].total),
|
||||||
|
state: state_code.toUpperCase(),
|
||||||
|
role,
|
||||||
|
message: `Queued ${createdCount} dispensaries for ${state_code.toUpperCase()}`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error enqueuing state:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/job-queue/clear-pending - Clear all pending jobs (optionally filtered)
|
||||||
|
* Body: { state_code?: string, job_type?: string }
|
||||||
|
*/
|
||||||
|
router.post('/clear-pending', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { state_code, job_type } = req.body;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
UPDATE dispensary_crawl_jobs
|
||||||
|
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
|
||||||
|
WHERE status = 'pending'
|
||||||
|
`;
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (job_type) {
|
||||||
|
params.push(job_type);
|
||||||
|
query += ` AND job_type = $${paramIndex++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state_code) {
|
||||||
|
params.push((state_code as string).toUpperCase());
|
||||||
|
query += ` AND dispensary_id IN (
|
||||||
|
SELECT d.id FROM dispensaries d
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE s.code = $${paramIndex++}
|
||||||
|
)`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
cleared: result.rowCount,
|
||||||
|
message: `Cancelled ${result.rowCount} pending jobs`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error clearing pending:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
export { queuePaused };
|
export { queuePaused };
|
||||||
|
|||||||
140
backend/src/routes/k8s.ts
Normal file
140
backend/src/routes/k8s.ts
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
/**
|
||||||
|
* Kubernetes Control Routes
|
||||||
|
*
|
||||||
|
* Provides admin UI control over k8s resources like worker scaling.
|
||||||
|
* Uses in-cluster config when running in k8s, or kubeconfig locally.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import * as k8s from '@kubernetes/client-node';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
// K8s client setup - lazy initialization
|
||||||
|
let appsApi: k8s.AppsV1Api | null = null;
|
||||||
|
let k8sError: string | null = null;
|
||||||
|
|
||||||
|
function getK8sClient(): k8s.AppsV1Api | null {
|
||||||
|
if (appsApi) return appsApi;
|
||||||
|
if (k8sError) return null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const kc = new k8s.KubeConfig();
|
||||||
|
|
||||||
|
// Try in-cluster config first (when running in k8s)
|
||||||
|
try {
|
||||||
|
kc.loadFromCluster();
|
||||||
|
console.log('[K8s] Loaded in-cluster config');
|
||||||
|
} catch {
|
||||||
|
// Fall back to default kubeconfig (local dev)
|
||||||
|
try {
|
||||||
|
kc.loadFromDefault();
|
||||||
|
console.log('[K8s] Loaded default kubeconfig');
|
||||||
|
} catch (e) {
|
||||||
|
k8sError = 'No k8s config available';
|
||||||
|
console.log('[K8s] No config available - k8s routes disabled');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
appsApi = kc.makeApiClient(k8s.AppsV1Api);
|
||||||
|
return appsApi;
|
||||||
|
} catch (e: any) {
|
||||||
|
k8sError = e.message;
|
||||||
|
console.error('[K8s] Failed to initialize client:', e.message);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
||||||
|
const WORKER_DEPLOYMENT = 'scraper-worker';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/k8s/workers
|
||||||
|
* Get current worker deployment status
|
||||||
|
*/
|
||||||
|
router.get('/workers', async (_req: Request, res: Response) => {
|
||||||
|
const client = getK8sClient();
|
||||||
|
|
||||||
|
if (!client) {
|
||||||
|
return res.json({
|
||||||
|
success: true,
|
||||||
|
available: false,
|
||||||
|
error: k8sError || 'K8s not available',
|
||||||
|
replicas: 0,
|
||||||
|
readyReplicas: 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const deployment = await client.readNamespacedDeployment({
|
||||||
|
name: WORKER_DEPLOYMENT,
|
||||||
|
namespace: NAMESPACE,
|
||||||
|
});
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
available: true,
|
||||||
|
replicas: deployment.spec?.replicas || 0,
|
||||||
|
readyReplicas: deployment.status?.readyReplicas || 0,
|
||||||
|
availableReplicas: deployment.status?.availableReplicas || 0,
|
||||||
|
updatedReplicas: deployment.status?.updatedReplicas || 0,
|
||||||
|
});
|
||||||
|
} catch (e: any) {
|
||||||
|
console.error('[K8s] Error getting deployment:', e.message);
|
||||||
|
res.status(500).json({
|
||||||
|
success: false,
|
||||||
|
error: e.message,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/k8s/workers/scale
|
||||||
|
* Scale worker deployment
|
||||||
|
* Body: { replicas: number }
|
||||||
|
*/
|
||||||
|
router.post('/workers/scale', async (req: Request, res: Response) => {
|
||||||
|
const client = getK8sClient();
|
||||||
|
|
||||||
|
if (!client) {
|
||||||
|
return res.status(503).json({
|
||||||
|
success: false,
|
||||||
|
error: k8sError || 'K8s not available',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const { replicas } = req.body;
|
||||||
|
|
||||||
|
if (typeof replicas !== 'number' || replicas < 0 || replicas > 50) {
|
||||||
|
return res.status(400).json({
|
||||||
|
success: false,
|
||||||
|
error: 'replicas must be a number between 0 and 50',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Patch the deployment to set replicas
|
||||||
|
await client.patchNamespacedDeploymentScale({
|
||||||
|
name: WORKER_DEPLOYMENT,
|
||||||
|
namespace: NAMESPACE,
|
||||||
|
body: { spec: { replicas } },
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`[K8s] Scaled ${WORKER_DEPLOYMENT} to ${replicas} replicas`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
replicas,
|
||||||
|
message: `Scaled to ${replicas} workers`,
|
||||||
|
});
|
||||||
|
} catch (e: any) {
|
||||||
|
console.error('[K8s] Error scaling deployment:', e.message);
|
||||||
|
res.status(500).json({
|
||||||
|
success: false,
|
||||||
|
error: e.message,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -291,6 +291,107 @@ router.get('/stores/:id/summary', async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/markets/stores/:id/crawl-history
|
||||||
|
* Get crawl history for a specific store
|
||||||
|
*/
|
||||||
|
router.get('/stores/:id/crawl-history', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const { limit = '50' } = req.query;
|
||||||
|
const dispensaryId = parseInt(id, 10);
|
||||||
|
const limitNum = Math.min(parseInt(limit as string, 10), 100);
|
||||||
|
|
||||||
|
// Get crawl history from crawl_orchestration_traces
|
||||||
|
const { rows: historyRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
run_id,
|
||||||
|
profile_key,
|
||||||
|
crawler_module,
|
||||||
|
state_at_start,
|
||||||
|
state_at_end,
|
||||||
|
total_steps,
|
||||||
|
duration_ms,
|
||||||
|
success,
|
||||||
|
error_message,
|
||||||
|
products_found,
|
||||||
|
started_at,
|
||||||
|
completed_at
|
||||||
|
FROM crawl_orchestration_traces
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
ORDER BY started_at DESC
|
||||||
|
LIMIT $2
|
||||||
|
`, [dispensaryId, limitNum]);
|
||||||
|
|
||||||
|
// Get next scheduled crawl if available
|
||||||
|
const { rows: scheduleRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
js.id as schedule_id,
|
||||||
|
js.job_name,
|
||||||
|
js.enabled,
|
||||||
|
js.base_interval_minutes,
|
||||||
|
js.jitter_minutes,
|
||||||
|
js.next_run_at,
|
||||||
|
js.last_run_at,
|
||||||
|
js.last_status
|
||||||
|
FROM job_schedules js
|
||||||
|
WHERE js.enabled = true
|
||||||
|
AND js.job_config->>'dispensaryId' = $1::text
|
||||||
|
ORDER BY js.next_run_at
|
||||||
|
LIMIT 1
|
||||||
|
`, [dispensaryId.toString()]);
|
||||||
|
|
||||||
|
// Get dispensary info for slug
|
||||||
|
const { rows: dispRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
dba_name,
|
||||||
|
slug,
|
||||||
|
state,
|
||||||
|
city,
|
||||||
|
menu_type,
|
||||||
|
platform_dispensary_id,
|
||||||
|
last_menu_scrape
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
dispensary: dispRows[0] || null,
|
||||||
|
history: historyRows.map(row => ({
|
||||||
|
id: row.id,
|
||||||
|
runId: row.run_id,
|
||||||
|
profileKey: row.profile_key,
|
||||||
|
crawlerModule: row.crawler_module,
|
||||||
|
stateAtStart: row.state_at_start,
|
||||||
|
stateAtEnd: row.state_at_end,
|
||||||
|
totalSteps: row.total_steps,
|
||||||
|
durationMs: row.duration_ms,
|
||||||
|
success: row.success,
|
||||||
|
errorMessage: row.error_message,
|
||||||
|
productsFound: row.products_found,
|
||||||
|
startedAt: row.started_at?.toISOString() || null,
|
||||||
|
completedAt: row.completed_at?.toISOString() || null,
|
||||||
|
})),
|
||||||
|
nextSchedule: scheduleRows[0] ? {
|
||||||
|
scheduleId: scheduleRows[0].schedule_id,
|
||||||
|
jobName: scheduleRows[0].job_name,
|
||||||
|
enabled: scheduleRows[0].enabled,
|
||||||
|
baseIntervalMinutes: scheduleRows[0].base_interval_minutes,
|
||||||
|
jitterMinutes: scheduleRows[0].jitter_minutes,
|
||||||
|
nextRunAt: scheduleRows[0].next_run_at?.toISOString() || null,
|
||||||
|
lastRunAt: scheduleRows[0].last_run_at?.toISOString() || null,
|
||||||
|
lastStatus: scheduleRows[0].last_status,
|
||||||
|
} : null,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Markets] Error fetching crawl history:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/markets/stores/:id/products
|
* GET /api/markets/stores/:id/products
|
||||||
* Get products for a store with filtering and pagination
|
* Get products for a store with filtering and pagination
|
||||||
|
|||||||
@@ -78,14 +78,14 @@ router.get('/metrics', async (_req: Request, res: Response) => {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/admin/orchestrator/states
|
* GET /api/admin/orchestrator/states
|
||||||
* Returns array of states with at least one known dispensary
|
* Returns array of states with at least one crawl-enabled dispensary
|
||||||
*/
|
*/
|
||||||
router.get('/states', async (_req: Request, res: Response) => {
|
router.get('/states', async (_req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
SELECT DISTINCT state, COUNT(*) as store_count
|
SELECT DISTINCT state, COUNT(*) as store_count
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
WHERE state IS NOT NULL
|
WHERE state IS NOT NULL AND crawl_enabled = true
|
||||||
GROUP BY state
|
GROUP BY state
|
||||||
ORDER BY state
|
ORDER BY state
|
||||||
`);
|
`);
|
||||||
|
|||||||
334
backend/src/routes/payloads.ts
Normal file
334
backend/src/routes/payloads.ts
Normal file
@@ -0,0 +1,334 @@
|
|||||||
|
/**
|
||||||
|
* Payload Routes
|
||||||
|
*
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: API access to raw crawl payloads.
|
||||||
|
*
|
||||||
|
* Endpoints:
|
||||||
|
* - GET /api/payloads - List payload metadata (paginated)
|
||||||
|
* - GET /api/payloads/:id - Get payload metadata by ID
|
||||||
|
* - GET /api/payloads/:id/data - Get full payload JSON
|
||||||
|
* - GET /api/payloads/store/:dispensaryId - List payloads for a store
|
||||||
|
* - GET /api/payloads/store/:dispensaryId/latest - Get latest payload for a store
|
||||||
|
* - GET /api/payloads/store/:dispensaryId/diff - Diff two payloads
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { getPool } from '../db/pool';
|
||||||
|
import {
|
||||||
|
loadRawPayloadById,
|
||||||
|
getLatestPayload,
|
||||||
|
getRecentPayloads,
|
||||||
|
listPayloadMetadata,
|
||||||
|
} from '../utils/payload-storage';
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
// Get pool instance for queries
|
||||||
|
const getDbPool = (): Pool => getPool() as unknown as Pool;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/payloads
|
||||||
|
* List payload metadata (paginated)
|
||||||
|
*/
|
||||||
|
router.get('/', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const pool = getDbPool();
|
||||||
|
const limit = Math.min(parseInt(req.query.limit as string) || 50, 100);
|
||||||
|
const offset = parseInt(req.query.offset as string) || 0;
|
||||||
|
const dispensaryId = req.query.dispensary_id ? parseInt(req.query.dispensary_id as string) : undefined;
|
||||||
|
|
||||||
|
const payloads = await listPayloadMetadata(pool, {
|
||||||
|
dispensaryId,
|
||||||
|
limit,
|
||||||
|
offset,
|
||||||
|
});
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
payloads,
|
||||||
|
pagination: { limit, offset },
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Payloads] List error:', error.message);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/payloads/:id
|
||||||
|
* Get payload metadata by ID
|
||||||
|
*/
|
||||||
|
router.get('/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const pool = getDbPool();
|
||||||
|
const id = parseInt(req.params.id);
|
||||||
|
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
p.id,
|
||||||
|
p.dispensary_id,
|
||||||
|
p.crawl_run_id,
|
||||||
|
p.storage_path,
|
||||||
|
p.product_count,
|
||||||
|
p.size_bytes,
|
||||||
|
p.size_bytes_raw,
|
||||||
|
p.fetched_at,
|
||||||
|
p.processed_at,
|
||||||
|
p.checksum_sha256,
|
||||||
|
d.name as dispensary_name
|
||||||
|
FROM raw_crawl_payloads p
|
||||||
|
LEFT JOIN dispensaries d ON d.id = p.dispensary_id
|
||||||
|
WHERE p.id = $1
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Payload not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
payload: result.rows[0],
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Payloads] Get error:', error.message);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/payloads/:id/data
|
||||||
|
* Get full payload JSON (decompressed from disk)
|
||||||
|
*/
|
||||||
|
router.get('/:id/data', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const pool = getDbPool();
|
||||||
|
const id = parseInt(req.params.id);
|
||||||
|
|
||||||
|
const result = await loadRawPayloadById(pool, id);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Payload not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
metadata: result.metadata,
|
||||||
|
data: result.payload,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Payloads] Get data error:', error.message);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/payloads/store/:dispensaryId
|
||||||
|
* List payloads for a specific store
|
||||||
|
*/
|
||||||
|
router.get('/store/:dispensaryId', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const pool = getDbPool();
|
||||||
|
const dispensaryId = parseInt(req.params.dispensaryId);
|
||||||
|
const limit = Math.min(parseInt(req.query.limit as string) || 20, 100);
|
||||||
|
const offset = parseInt(req.query.offset as string) || 0;
|
||||||
|
|
||||||
|
const payloads = await listPayloadMetadata(pool, {
|
||||||
|
dispensaryId,
|
||||||
|
limit,
|
||||||
|
offset,
|
||||||
|
});
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
dispensaryId,
|
||||||
|
payloads,
|
||||||
|
pagination: { limit, offset },
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Payloads] Store list error:', error.message);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/payloads/store/:dispensaryId/latest
|
||||||
|
* Get the latest payload for a store (with full data)
|
||||||
|
*/
|
||||||
|
router.get('/store/:dispensaryId/latest', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const pool = getDbPool();
|
||||||
|
const dispensaryId = parseInt(req.params.dispensaryId);
|
||||||
|
|
||||||
|
const result = await getLatestPayload(pool, dispensaryId);
|
||||||
|
|
||||||
|
if (!result) {
|
||||||
|
return res.status(404).json({
|
||||||
|
success: false,
|
||||||
|
error: `No payloads found for dispensary ${dispensaryId}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
metadata: result.metadata,
|
||||||
|
data: result.payload,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Payloads] Latest error:', error.message);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/payloads/store/:dispensaryId/diff
|
||||||
|
* Compare two payloads for a store
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - from: payload ID (older)
|
||||||
|
* - to: payload ID (newer) - optional, defaults to latest
|
||||||
|
*/
|
||||||
|
router.get('/store/:dispensaryId/diff', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const pool = getDbPool();
|
||||||
|
const dispensaryId = parseInt(req.params.dispensaryId);
|
||||||
|
const fromId = req.query.from ? parseInt(req.query.from as string) : undefined;
|
||||||
|
const toId = req.query.to ? parseInt(req.query.to as string) : undefined;
|
||||||
|
|
||||||
|
let fromPayload: any;
|
||||||
|
let toPayload: any;
|
||||||
|
|
||||||
|
if (fromId && toId) {
|
||||||
|
// Load specific payloads
|
||||||
|
const [from, to] = await Promise.all([
|
||||||
|
loadRawPayloadById(pool, fromId),
|
||||||
|
loadRawPayloadById(pool, toId),
|
||||||
|
]);
|
||||||
|
fromPayload = from;
|
||||||
|
toPayload = to;
|
||||||
|
} else {
|
||||||
|
// Load two most recent
|
||||||
|
const recent = await getRecentPayloads(pool, dispensaryId, 2);
|
||||||
|
if (recent.length < 2) {
|
||||||
|
return res.status(400).json({
|
||||||
|
success: false,
|
||||||
|
error: 'Need at least 2 payloads to diff. Only found ' + recent.length,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
toPayload = recent[0]; // Most recent
|
||||||
|
fromPayload = recent[1]; // Previous
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!fromPayload || !toPayload) {
|
||||||
|
return res.status(404).json({ success: false, error: 'One or both payloads not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build product maps by ID
|
||||||
|
const fromProducts = new Map<string, any>();
|
||||||
|
const toProducts = new Map<string, any>();
|
||||||
|
|
||||||
|
for (const p of fromPayload.payload.products || []) {
|
||||||
|
const id = p._id || p.id;
|
||||||
|
if (id) fromProducts.set(id, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const p of toPayload.payload.products || []) {
|
||||||
|
const id = p._id || p.id;
|
||||||
|
if (id) toProducts.set(id, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find differences
|
||||||
|
const added: any[] = [];
|
||||||
|
const removed: any[] = [];
|
||||||
|
const priceChanges: any[] = [];
|
||||||
|
const stockChanges: any[] = [];
|
||||||
|
|
||||||
|
// Products in "to" but not in "from" = added
|
||||||
|
for (const [id, product] of toProducts) {
|
||||||
|
if (!fromProducts.has(id)) {
|
||||||
|
added.push({
|
||||||
|
id,
|
||||||
|
name: product.name,
|
||||||
|
brand: product.brand?.name,
|
||||||
|
price: product.Prices?.[0]?.price,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Products in "from" but not in "to" = removed
|
||||||
|
for (const [id, product] of fromProducts) {
|
||||||
|
if (!toProducts.has(id)) {
|
||||||
|
removed.push({
|
||||||
|
id,
|
||||||
|
name: product.name,
|
||||||
|
brand: product.brand?.name,
|
||||||
|
price: product.Prices?.[0]?.price,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Products in both - check for changes
|
||||||
|
for (const [id, toProduct] of toProducts) {
|
||||||
|
const fromProduct = fromProducts.get(id);
|
||||||
|
if (!fromProduct) continue;
|
||||||
|
|
||||||
|
const fromPrice = fromProduct.Prices?.[0]?.price;
|
||||||
|
const toPrice = toProduct.Prices?.[0]?.price;
|
||||||
|
|
||||||
|
if (fromPrice !== toPrice) {
|
||||||
|
priceChanges.push({
|
||||||
|
id,
|
||||||
|
name: toProduct.name,
|
||||||
|
brand: toProduct.brand?.name,
|
||||||
|
oldPrice: fromPrice,
|
||||||
|
newPrice: toPrice,
|
||||||
|
change: toPrice && fromPrice ? toPrice - fromPrice : null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const fromStock = fromProduct.Status || fromProduct.status;
|
||||||
|
const toStock = toProduct.Status || toProduct.status;
|
||||||
|
|
||||||
|
if (fromStock !== toStock) {
|
||||||
|
stockChanges.push({
|
||||||
|
id,
|
||||||
|
name: toProduct.name,
|
||||||
|
brand: toProduct.brand?.name,
|
||||||
|
oldStatus: fromStock,
|
||||||
|
newStatus: toStock,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
from: {
|
||||||
|
id: fromPayload.metadata.id,
|
||||||
|
fetchedAt: fromPayload.metadata.fetchedAt,
|
||||||
|
productCount: fromPayload.metadata.productCount,
|
||||||
|
},
|
||||||
|
to: {
|
||||||
|
id: toPayload.metadata.id,
|
||||||
|
fetchedAt: toPayload.metadata.fetchedAt,
|
||||||
|
productCount: toPayload.metadata.productCount,
|
||||||
|
},
|
||||||
|
diff: {
|
||||||
|
added: added.length,
|
||||||
|
removed: removed.length,
|
||||||
|
priceChanges: priceChanges.length,
|
||||||
|
stockChanges: stockChanges.length,
|
||||||
|
},
|
||||||
|
details: {
|
||||||
|
added,
|
||||||
|
removed,
|
||||||
|
priceChanges,
|
||||||
|
stockChanges,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Payloads] Diff error:', error.message);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -1,11 +1,29 @@
|
|||||||
import { Router } from 'express';
|
import { Router } from 'express';
|
||||||
import { authMiddleware } from '../auth/middleware';
|
import { authMiddleware } from '../auth/middleware';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import { getImageUrl } from '../utils/minio';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
router.use(authMiddleware);
|
router.use(authMiddleware);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert local image path to proxy URL
|
||||||
|
* /images/products/... -> /img/products/...
|
||||||
|
*/
|
||||||
|
function getImageUrl(localPath: string): string {
|
||||||
|
if (!localPath) return '';
|
||||||
|
// If already a full URL, return as-is
|
||||||
|
if (localPath.startsWith('http')) return localPath;
|
||||||
|
// Convert /images/ path to /img/ proxy path
|
||||||
|
if (localPath.startsWith('/images/')) {
|
||||||
|
return '/img' + localPath.substring(7);
|
||||||
|
}
|
||||||
|
// Handle paths without leading slash
|
||||||
|
if (localPath.startsWith('images/')) {
|
||||||
|
return '/img/' + localPath.substring(7);
|
||||||
|
}
|
||||||
|
return '/img/' + localPath;
|
||||||
|
}
|
||||||
|
|
||||||
// Freshness threshold: data older than this is considered stale
|
// Freshness threshold: data older than this is considered stale
|
||||||
const STALE_THRESHOLD_HOURS = 4;
|
const STALE_THRESHOLD_HOURS = 4;
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import { Router } from 'express';
|
|||||||
import { authMiddleware, requireRole } from '../auth/middleware';
|
import { authMiddleware, requireRole } from '../auth/middleware';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import { testProxy, addProxy, addProxiesFromList } from '../services/proxy';
|
import { testProxy, addProxy, addProxiesFromList } from '../services/proxy';
|
||||||
import { createProxyTestJob, getProxyTestJob, getActiveProxyTestJob, cancelProxyTestJob } from '../services/proxyTestQueue';
|
import { createProxyTestJob, getProxyTestJob, getActiveProxyTestJob, cancelProxyTestJob, ProxyTestMode } from '../services/proxyTestQueue';
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
router.use(authMiddleware);
|
router.use(authMiddleware);
|
||||||
@@ -11,9 +11,10 @@ router.use(authMiddleware);
|
|||||||
router.get('/', async (req, res) => {
|
router.get('/', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
SELECT id, host, port, protocol, active, is_anonymous,
|
SELECT id, host, port, protocol, username, password, active, is_anonymous,
|
||||||
last_tested_at, test_result, response_time_ms, created_at,
|
last_tested_at, test_result, response_time_ms, created_at,
|
||||||
city, state, country, country_code, location_updated_at
|
city, state, country, country_code, location_updated_at,
|
||||||
|
COALESCE(max_connections, 1) as max_connections
|
||||||
FROM proxies
|
FROM proxies
|
||||||
ORDER BY created_at DESC
|
ORDER BY created_at DESC
|
||||||
`);
|
`);
|
||||||
@@ -166,13 +167,39 @@ router.post('/:id/test', requireRole('superadmin', 'admin'), async (req, res) =>
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Start proxy test job
|
// Start proxy test job
|
||||||
|
// Query params: mode=all|failed|inactive, concurrency=10
|
||||||
router.post('/test-all', requireRole('superadmin', 'admin'), async (req, res) => {
|
router.post('/test-all', requireRole('superadmin', 'admin'), async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const jobId = await createProxyTestJob();
|
const mode = (req.query.mode as ProxyTestMode) || 'all';
|
||||||
res.json({ jobId, message: 'Proxy test job started' });
|
const concurrency = parseInt(req.query.concurrency as string) || 10;
|
||||||
} catch (error) {
|
|
||||||
|
// Validate mode
|
||||||
|
if (!['all', 'failed', 'inactive'].includes(mode)) {
|
||||||
|
return res.status(400).json({ error: 'Invalid mode. Use: all, failed, or inactive' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate concurrency (1-50)
|
||||||
|
if (concurrency < 1 || concurrency > 50) {
|
||||||
|
return res.status(400).json({ error: 'Concurrency must be between 1 and 50' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const { jobId, totalProxies } = await createProxyTestJob(mode, concurrency);
|
||||||
|
res.json({ jobId, total: totalProxies, mode, concurrency, message: `Proxy test job started (mode: ${mode}, concurrency: ${concurrency})` });
|
||||||
|
} catch (error: any) {
|
||||||
console.error('Error starting proxy test job:', error);
|
console.error('Error starting proxy test job:', error);
|
||||||
res.status(500).json({ error: 'Failed to start proxy test job' });
|
res.status(500).json({ error: error.message || 'Failed to start proxy test job' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Convenience endpoint: Test only failed proxies
|
||||||
|
router.post('/test-failed', requireRole('superadmin', 'admin'), async (req, res) => {
|
||||||
|
try {
|
||||||
|
const concurrency = parseInt(req.query.concurrency as string) || 10;
|
||||||
|
const { jobId, totalProxies } = await createProxyTestJob('failed', concurrency);
|
||||||
|
res.json({ jobId, total: totalProxies, mode: 'failed', concurrency, message: 'Retesting failed proxies...' });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Error starting failed proxy test:', error);
|
||||||
|
res.status(500).json({ error: error.message || 'Failed to start proxy test job' });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -197,7 +224,7 @@ router.post('/test-job/:jobId/cancel', requireRole('superadmin', 'admin'), async
|
|||||||
router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
|
router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { id } = req.params;
|
const { id } = req.params;
|
||||||
const { host, port, protocol, username, password, active } = req.body;
|
const { host, port, protocol, username, password, active, max_connections } = req.body;
|
||||||
|
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
UPDATE proxies
|
UPDATE proxies
|
||||||
@@ -207,10 +234,11 @@ router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
|
|||||||
username = COALESCE($4, username),
|
username = COALESCE($4, username),
|
||||||
password = COALESCE($5, password),
|
password = COALESCE($5, password),
|
||||||
active = COALESCE($6, active),
|
active = COALESCE($6, active),
|
||||||
|
max_connections = COALESCE($7, max_connections),
|
||||||
updated_at = CURRENT_TIMESTAMP
|
updated_at = CURRENT_TIMESTAMP
|
||||||
WHERE id = $7
|
WHERE id = $8
|
||||||
RETURNING *
|
RETURNING *
|
||||||
`, [host, port, protocol, username, password, active, id]);
|
`, [host, port, protocol, username, password, active, max_connections, id]);
|
||||||
|
|
||||||
if (result.rows.length === 0) {
|
if (result.rows.length === 0) {
|
||||||
return res.status(404).json({ error: 'Proxy not found' });
|
return res.status(404).json({ error: 'Proxy not found' });
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
639
backend/src/routes/tasks.ts
Normal file
639
backend/src/routes/tasks.ts
Normal file
@@ -0,0 +1,639 @@
|
|||||||
|
/**
|
||||||
|
* Task Queue API Routes
|
||||||
|
*
|
||||||
|
* Endpoints for managing worker tasks, viewing capacity metrics,
|
||||||
|
* and generating batch tasks.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import {
|
||||||
|
taskService,
|
||||||
|
TaskRole,
|
||||||
|
TaskStatus,
|
||||||
|
TaskFilter,
|
||||||
|
} from '../tasks/task-service';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
import {
|
||||||
|
isTaskPoolPaused,
|
||||||
|
pauseTaskPool,
|
||||||
|
resumeTaskPool,
|
||||||
|
getTaskPoolStatus,
|
||||||
|
} from '../tasks/task-pool-state';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks
|
||||||
|
* List tasks with optional filters
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - role: Filter by role
|
||||||
|
* - status: Filter by status (comma-separated for multiple)
|
||||||
|
* - dispensary_id: Filter by dispensary
|
||||||
|
* - worker_id: Filter by worker
|
||||||
|
* - limit: Max results (default 100)
|
||||||
|
* - offset: Pagination offset
|
||||||
|
*/
|
||||||
|
router.get('/', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const filter: TaskFilter = {};
|
||||||
|
|
||||||
|
if (req.query.role) {
|
||||||
|
filter.role = req.query.role as TaskRole;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req.query.status) {
|
||||||
|
const statuses = (req.query.status as string).split(',') as TaskStatus[];
|
||||||
|
filter.status = statuses.length === 1 ? statuses[0] : statuses;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req.query.dispensary_id) {
|
||||||
|
filter.dispensary_id = parseInt(req.query.dispensary_id as string, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req.query.worker_id) {
|
||||||
|
filter.worker_id = req.query.worker_id as string;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req.query.limit) {
|
||||||
|
filter.limit = parseInt(req.query.limit as string, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req.query.offset) {
|
||||||
|
filter.offset = parseInt(req.query.offset as string, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
const tasks = await taskService.listTasks(filter);
|
||||||
|
res.json({ tasks, count: tasks.length });
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error listing tasks:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to list tasks' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/counts
|
||||||
|
* Get task counts by status
|
||||||
|
*/
|
||||||
|
router.get('/counts', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const counts = await taskService.getTaskCounts();
|
||||||
|
res.json(counts);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting task counts:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get task counts' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/capacity
|
||||||
|
* Get capacity metrics for all roles
|
||||||
|
*/
|
||||||
|
router.get('/capacity', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const metrics = await taskService.getCapacityMetrics();
|
||||||
|
res.json({ metrics });
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting capacity metrics:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get capacity metrics' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/capacity/:role
|
||||||
|
* Get capacity metrics for a specific role
|
||||||
|
*/
|
||||||
|
router.get('/capacity/:role', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const role = req.params.role as TaskRole;
|
||||||
|
const capacity = await taskService.getRoleCapacity(role);
|
||||||
|
|
||||||
|
if (!capacity) {
|
||||||
|
return res.status(404).json({ error: 'Role not found or no data' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate workers needed for different SLAs
|
||||||
|
const workersFor1Hour = await taskService.calculateWorkersNeeded(role, 1);
|
||||||
|
const workersFor4Hours = await taskService.calculateWorkersNeeded(role, 4);
|
||||||
|
const workersFor8Hours = await taskService.calculateWorkersNeeded(role, 8);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
...capacity,
|
||||||
|
workers_needed: {
|
||||||
|
for_1_hour: workersFor1Hour,
|
||||||
|
for_4_hours: workersFor4Hours,
|
||||||
|
for_8_hours: workersFor8Hours,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting role capacity:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get role capacity' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/:id
|
||||||
|
* Get a specific task by ID
|
||||||
|
*/
|
||||||
|
router.get('/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const taskId = parseInt(req.params.id, 10);
|
||||||
|
const task = await taskService.getTask(taskId);
|
||||||
|
|
||||||
|
if (!task) {
|
||||||
|
return res.status(404).json({ error: 'Task not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json(task);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting task:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get task' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DELETE /api/tasks/:id
|
||||||
|
* Delete a specific task by ID
|
||||||
|
* Only allows deletion of failed, completed, or pending tasks (not running)
|
||||||
|
*/
|
||||||
|
router.delete('/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const taskId = parseInt(req.params.id, 10);
|
||||||
|
|
||||||
|
// First check if task exists and its status
|
||||||
|
const task = await taskService.getTask(taskId);
|
||||||
|
if (!task) {
|
||||||
|
return res.status(404).json({ error: 'Task not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't allow deleting running tasks
|
||||||
|
if (task.status === 'running' || task.status === 'claimed') {
|
||||||
|
return res.status(400).json({ error: 'Cannot delete a running or claimed task' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete the task
|
||||||
|
await pool.query('DELETE FROM worker_tasks WHERE id = $1', [taskId]);
|
||||||
|
|
||||||
|
res.json({ success: true, message: `Task ${taskId} deleted` });
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error deleting task:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to delete task' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks
|
||||||
|
* Create a new task
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - role: TaskRole (required)
|
||||||
|
* - dispensary_id: number (optional)
|
||||||
|
* - platform: string (optional)
|
||||||
|
* - priority: number (optional, default 0)
|
||||||
|
* - scheduled_for: ISO date string (optional)
|
||||||
|
*/
|
||||||
|
router.post('/', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { role, dispensary_id, platform, priority, scheduled_for } = req.body;
|
||||||
|
|
||||||
|
if (!role) {
|
||||||
|
return res.status(400).json({ error: 'Role is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if store already has an active task
|
||||||
|
if (dispensary_id) {
|
||||||
|
const hasActive = await taskService.hasActiveTask(dispensary_id);
|
||||||
|
if (hasActive) {
|
||||||
|
return res.status(409).json({
|
||||||
|
error: 'Store already has an active task',
|
||||||
|
dispensary_id,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const task = await taskService.createTask({
|
||||||
|
role,
|
||||||
|
dispensary_id,
|
||||||
|
platform,
|
||||||
|
priority,
|
||||||
|
scheduled_for: scheduled_for ? new Date(scheduled_for) : undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
res.status(201).json(task);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error creating task:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to create task' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/generate/resync
|
||||||
|
* Generate daily resync tasks for all active stores
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - batches_per_day: number (optional, default 6 = every 4 hours)
|
||||||
|
* - date: ISO date string (optional, default today)
|
||||||
|
*/
|
||||||
|
router.post('/generate/resync', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { batches_per_day, date } = req.body;
|
||||||
|
const batchesPerDay = batches_per_day ?? 6;
|
||||||
|
const targetDate = date ? new Date(date) : new Date();
|
||||||
|
|
||||||
|
const createdCount = await taskService.generateDailyResyncTasks(
|
||||||
|
batchesPerDay,
|
||||||
|
targetDate
|
||||||
|
);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
tasks_created: createdCount,
|
||||||
|
batches_per_day: batchesPerDay,
|
||||||
|
date: targetDate.toISOString().split('T')[0],
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error generating resync tasks:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to generate resync tasks' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/generate/discovery
|
||||||
|
* Generate store discovery tasks for a platform
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - platform: string (required, e.g., 'dutchie')
|
||||||
|
* - state_code: string (optional, e.g., 'AZ')
|
||||||
|
* - priority: number (optional)
|
||||||
|
*/
|
||||||
|
router.post('/generate/discovery', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { platform, state_code, priority } = req.body;
|
||||||
|
|
||||||
|
if (!platform) {
|
||||||
|
return res.status(400).json({ error: 'Platform is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const task = await taskService.createStoreDiscoveryTask(
|
||||||
|
platform,
|
||||||
|
state_code,
|
||||||
|
priority ?? 0
|
||||||
|
);
|
||||||
|
|
||||||
|
res.status(201).json(task);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error creating discovery task:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to create discovery task' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/recover-stale
|
||||||
|
* Recover stale tasks from dead workers
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - threshold_minutes: number (optional, default 10)
|
||||||
|
*/
|
||||||
|
router.post('/recover-stale', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { threshold_minutes } = req.body;
|
||||||
|
const recovered = await taskService.recoverStaleTasks(threshold_minutes ?? 10);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
tasks_recovered: recovered,
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error recovering stale tasks:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to recover stale tasks' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/role/:role/last-completion
|
||||||
|
* Get the last completion time for a role
|
||||||
|
*/
|
||||||
|
router.get('/role/:role/last-completion', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const role = req.params.role as TaskRole;
|
||||||
|
const lastCompletion = await taskService.getLastCompletion(role);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
role,
|
||||||
|
last_completion: lastCompletion?.toISOString() ?? null,
|
||||||
|
time_since: lastCompletion
|
||||||
|
? Math.floor((Date.now() - lastCompletion.getTime()) / 1000)
|
||||||
|
: null,
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting last completion:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get last completion' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/role/:role/recent
|
||||||
|
* Get recent completions for a role
|
||||||
|
*/
|
||||||
|
router.get('/role/:role/recent', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const role = req.params.role as TaskRole;
|
||||||
|
const limit = parseInt(req.query.limit as string, 10) || 10;
|
||||||
|
|
||||||
|
const tasks = await taskService.getRecentCompletions(role, limit);
|
||||||
|
res.json({ tasks });
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting recent completions:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get recent completions' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/store/:dispensaryId/active
|
||||||
|
* Check if a store has an active task
|
||||||
|
*/
|
||||||
|
router.get('/store/:dispensaryId/active', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const dispensaryId = parseInt(req.params.dispensaryId, 10);
|
||||||
|
const hasActive = await taskService.hasActiveTask(dispensaryId);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
dispensary_id: dispensaryId,
|
||||||
|
has_active_task: hasActive,
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error checking active task:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to check active task' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// MIGRATION ROUTES - Disable old job systems
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/migration/status
|
||||||
|
* Get status of old job systems vs new task queue
|
||||||
|
*/
|
||||||
|
router.get('/migration/status', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// Get old job system counts
|
||||||
|
const [schedules, crawlJobs, rawPayloads, taskCounts] = await Promise.all([
|
||||||
|
pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total,
|
||||||
|
COUNT(*) FILTER (WHERE enabled = true) as enabled
|
||||||
|
FROM job_schedules
|
||||||
|
`),
|
||||||
|
pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending') as pending,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'running') as running
|
||||||
|
FROM dispensary_crawl_jobs
|
||||||
|
`),
|
||||||
|
pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total,
|
||||||
|
COUNT(*) FILTER (WHERE processed = false) as unprocessed
|
||||||
|
FROM raw_payloads
|
||||||
|
`),
|
||||||
|
taskService.getTaskCounts(),
|
||||||
|
]);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
old_systems: {
|
||||||
|
job_schedules: {
|
||||||
|
total: parseInt(schedules.rows[0].total) || 0,
|
||||||
|
enabled: parseInt(schedules.rows[0].enabled) || 0,
|
||||||
|
},
|
||||||
|
dispensary_crawl_jobs: {
|
||||||
|
total: parseInt(crawlJobs.rows[0].total) || 0,
|
||||||
|
pending: parseInt(crawlJobs.rows[0].pending) || 0,
|
||||||
|
running: parseInt(crawlJobs.rows[0].running) || 0,
|
||||||
|
},
|
||||||
|
raw_payloads: {
|
||||||
|
total: parseInt(rawPayloads.rows[0].total) || 0,
|
||||||
|
unprocessed: parseInt(rawPayloads.rows[0].unprocessed) || 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
new_task_queue: taskCounts,
|
||||||
|
recommendation: schedules.rows[0].enabled > 0
|
||||||
|
? 'Disable old job schedules before switching to new task queue'
|
||||||
|
: 'Ready to use new task queue',
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting migration status:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get migration status' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/migration/disable-old-schedules
|
||||||
|
* Disable all old job schedules to prepare for new task queue
|
||||||
|
*/
|
||||||
|
router.post('/migration/disable-old-schedules', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const result = await pool.query(`
|
||||||
|
UPDATE job_schedules
|
||||||
|
SET enabled = false,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE enabled = true
|
||||||
|
RETURNING id, job_name
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
disabled_count: result.rowCount,
|
||||||
|
disabled_schedules: result.rows.map(r => ({ id: r.id, job_name: r.job_name })),
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error disabling old schedules:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to disable old schedules' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/migration/cancel-pending-crawl-jobs
|
||||||
|
* Cancel all pending crawl jobs from the old system
|
||||||
|
*/
|
||||||
|
router.post('/migration/cancel-pending-crawl-jobs', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const result = await pool.query(`
|
||||||
|
UPDATE dispensary_crawl_jobs
|
||||||
|
SET status = 'cancelled',
|
||||||
|
completed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE status = 'pending'
|
||||||
|
RETURNING id
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
cancelled_count: result.rowCount,
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error cancelling pending crawl jobs:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to cancel pending crawl jobs' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/migration/create-resync-tasks
|
||||||
|
* Create product_refresh tasks for all crawl-enabled dispensaries
|
||||||
|
*/
|
||||||
|
router.post('/migration/create-resync-tasks', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { priority = 0, state_code } = req.body;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT id, name FROM dispensaries
|
||||||
|
WHERE crawl_enabled = true
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
|
`;
|
||||||
|
const params: any[] = [];
|
||||||
|
|
||||||
|
if (state_code) {
|
||||||
|
query += `
|
||||||
|
AND state_id = (SELECT id FROM states WHERE code = $1)
|
||||||
|
`;
|
||||||
|
params.push(state_code.toUpperCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
query += ` ORDER BY id`;
|
||||||
|
|
||||||
|
const dispensaries = await pool.query(query, params);
|
||||||
|
let created = 0;
|
||||||
|
|
||||||
|
for (const disp of dispensaries.rows) {
|
||||||
|
// Check if already has pending/running task
|
||||||
|
const hasActive = await taskService.hasActiveTask(disp.id);
|
||||||
|
if (!hasActive) {
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_refresh',
|
||||||
|
dispensary_id: disp.id,
|
||||||
|
platform: 'dutchie',
|
||||||
|
priority,
|
||||||
|
});
|
||||||
|
created++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
tasks_created: created,
|
||||||
|
dispensaries_checked: dispensaries.rows.length,
|
||||||
|
state_filter: state_code || 'all',
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error creating resync tasks:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to create resync tasks' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/migration/full-migrate
|
||||||
|
* One-click migration: disable old systems, create new tasks
|
||||||
|
*/
|
||||||
|
router.post('/migration/full-migrate', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const results: any = {
|
||||||
|
success: true,
|
||||||
|
steps: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Step 1: Disable old job schedules
|
||||||
|
const disableResult = await pool.query(`
|
||||||
|
UPDATE job_schedules
|
||||||
|
SET enabled = false, updated_at = NOW()
|
||||||
|
WHERE enabled = true
|
||||||
|
RETURNING id
|
||||||
|
`);
|
||||||
|
results.steps.push({
|
||||||
|
step: 'disable_job_schedules',
|
||||||
|
count: disableResult.rowCount,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step 2: Cancel pending crawl jobs
|
||||||
|
const cancelResult = await pool.query(`
|
||||||
|
UPDATE dispensary_crawl_jobs
|
||||||
|
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
|
||||||
|
WHERE status = 'pending'
|
||||||
|
RETURNING id
|
||||||
|
`);
|
||||||
|
results.steps.push({
|
||||||
|
step: 'cancel_pending_crawl_jobs',
|
||||||
|
count: cancelResult.rowCount,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step 3: Generate initial resync tasks
|
||||||
|
const resyncCount = await taskService.generateDailyResyncTasks(6);
|
||||||
|
results.steps.push({
|
||||||
|
step: 'generate_resync_tasks',
|
||||||
|
count: resyncCount,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step 4: Create store discovery task
|
||||||
|
const discoveryTask = await taskService.createStoreDiscoveryTask('dutchie', undefined, 0);
|
||||||
|
results.steps.push({
|
||||||
|
step: 'create_discovery_task',
|
||||||
|
task_id: discoveryTask.id,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step 5: Create analytics refresh task
|
||||||
|
const analyticsTask = await taskService.createTask({
|
||||||
|
role: 'analytics_refresh',
|
||||||
|
priority: 0,
|
||||||
|
});
|
||||||
|
results.steps.push({
|
||||||
|
step: 'create_analytics_task',
|
||||||
|
task_id: analyticsTask.id,
|
||||||
|
});
|
||||||
|
|
||||||
|
results.message = 'Migration complete. New task workers will pick up tasks.';
|
||||||
|
res.json(results);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error during full migration:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to complete migration' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/pool/status
|
||||||
|
* Check if task pool is paused
|
||||||
|
*/
|
||||||
|
router.get('/pool/status', async (_req: Request, res: Response) => {
|
||||||
|
const status = getTaskPoolStatus();
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
...status,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/pool/pause
|
||||||
|
* Pause the task pool - workers won't pick up new tasks
|
||||||
|
*/
|
||||||
|
router.post('/pool/pause', async (_req: Request, res: Response) => {
|
||||||
|
pauseTaskPool();
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
paused: true,
|
||||||
|
message: 'Task pool paused - workers will not pick up new tasks',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/pool/resume
|
||||||
|
* Resume the task pool - workers will pick up tasks again
|
||||||
|
*/
|
||||||
|
router.post('/pool/resume', async (_req: Request, res: Response) => {
|
||||||
|
resumeTaskPool();
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
paused: false,
|
||||||
|
message: 'Task pool resumed - workers will pick up new tasks',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -14,23 +14,36 @@ router.get('/', async (req: AuthRequest, res) => {
|
|||||||
try {
|
try {
|
||||||
const { search, domain } = req.query;
|
const { search, domain } = req.query;
|
||||||
|
|
||||||
let query = `
|
// Check which columns exist (schema-tolerant)
|
||||||
SELECT id, email, role, first_name, last_name, phone, domain, created_at, updated_at
|
const columnsResult = await pool.query(`
|
||||||
FROM users
|
SELECT column_name FROM information_schema.columns
|
||||||
WHERE 1=1
|
WHERE table_name = 'users' AND column_name IN ('first_name', 'last_name', 'phone', 'domain')
|
||||||
`;
|
`);
|
||||||
|
const existingColumns = new Set(columnsResult.rows.map((r: any) => r.column_name));
|
||||||
|
|
||||||
|
// Build column list based on what exists
|
||||||
|
const selectCols = ['id', 'email', 'role', 'created_at', 'updated_at'];
|
||||||
|
if (existingColumns.has('first_name')) selectCols.push('first_name');
|
||||||
|
if (existingColumns.has('last_name')) selectCols.push('last_name');
|
||||||
|
if (existingColumns.has('phone')) selectCols.push('phone');
|
||||||
|
if (existingColumns.has('domain')) selectCols.push('domain');
|
||||||
|
|
||||||
|
let query = `SELECT ${selectCols.join(', ')} FROM users WHERE 1=1`;
|
||||||
const params: any[] = [];
|
const params: any[] = [];
|
||||||
let paramIndex = 1;
|
let paramIndex = 1;
|
||||||
|
|
||||||
// Search by email, first_name, or last_name
|
// Search by email (and optionally first_name, last_name if they exist)
|
||||||
if (search && typeof search === 'string') {
|
if (search && typeof search === 'string') {
|
||||||
query += ` AND (email ILIKE $${paramIndex} OR first_name ILIKE $${paramIndex} OR last_name ILIKE $${paramIndex})`;
|
const searchClauses = ['email ILIKE $' + paramIndex];
|
||||||
|
if (existingColumns.has('first_name')) searchClauses.push('first_name ILIKE $' + paramIndex);
|
||||||
|
if (existingColumns.has('last_name')) searchClauses.push('last_name ILIKE $' + paramIndex);
|
||||||
|
query += ` AND (${searchClauses.join(' OR ')})`;
|
||||||
params.push(`%${search}%`);
|
params.push(`%${search}%`);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter by domain
|
// Filter by domain (if column exists)
|
||||||
if (domain && typeof domain === 'string') {
|
if (domain && typeof domain === 'string' && existingColumns.has('domain')) {
|
||||||
query += ` AND domain = $${paramIndex}`;
|
query += ` AND domain = $${paramIndex}`;
|
||||||
params.push(domain);
|
params.push(domain);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
@@ -50,8 +63,22 @@ router.get('/', async (req: AuthRequest, res) => {
|
|||||||
router.get('/:id', async (req: AuthRequest, res) => {
|
router.get('/:id', async (req: AuthRequest, res) => {
|
||||||
try {
|
try {
|
||||||
const { id } = req.params;
|
const { id } = req.params;
|
||||||
|
|
||||||
|
// Check which columns exist (schema-tolerant)
|
||||||
|
const columnsResult = await pool.query(`
|
||||||
|
SELECT column_name FROM information_schema.columns
|
||||||
|
WHERE table_name = 'users' AND column_name IN ('first_name', 'last_name', 'phone', 'domain')
|
||||||
|
`);
|
||||||
|
const existingColumns = new Set(columnsResult.rows.map((r: any) => r.column_name));
|
||||||
|
|
||||||
|
const selectCols = ['id', 'email', 'role', 'created_at', 'updated_at'];
|
||||||
|
if (existingColumns.has('first_name')) selectCols.push('first_name');
|
||||||
|
if (existingColumns.has('last_name')) selectCols.push('last_name');
|
||||||
|
if (existingColumns.has('phone')) selectCols.push('phone');
|
||||||
|
if (existingColumns.has('domain')) selectCols.push('domain');
|
||||||
|
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
SELECT id, email, role, first_name, last_name, phone, domain, created_at, updated_at
|
SELECT ${selectCols.join(', ')}
|
||||||
FROM users
|
FROM users
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, [id]);
|
`, [id]);
|
||||||
|
|||||||
@@ -1,18 +1,32 @@
|
|||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { readFileSync } from 'fs';
|
||||||
|
import { join } from 'path';
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
|
// Read package.json version at startup
|
||||||
|
let packageVersion = 'unknown';
|
||||||
|
try {
|
||||||
|
const packageJson = JSON.parse(readFileSync(join(__dirname, '../../package.json'), 'utf-8'));
|
||||||
|
packageVersion = packageJson.version || 'unknown';
|
||||||
|
} catch {
|
||||||
|
// Fallback if package.json not found
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/version
|
* GET /api/version
|
||||||
* Returns build version information for display in admin UI
|
* Returns build version information for display in admin UI
|
||||||
*/
|
*/
|
||||||
router.get('/', async (req: Request, res: Response) => {
|
router.get('/', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
|
const gitSha = process.env.APP_GIT_SHA || 'unknown';
|
||||||
const versionInfo = {
|
const versionInfo = {
|
||||||
build_version: process.env.APP_BUILD_VERSION || 'dev',
|
version: packageVersion,
|
||||||
git_sha: process.env.APP_GIT_SHA || 'local',
|
build_version: process.env.APP_BUILD_VERSION?.slice(0, 8) || 'dev',
|
||||||
build_time: process.env.APP_BUILD_TIME || new Date().toISOString(),
|
git_sha: gitSha.slice(0, 8) || 'unknown',
|
||||||
image_tag: process.env.CONTAINER_IMAGE_TAG || 'local',
|
git_sha_full: gitSha,
|
||||||
|
build_time: process.env.APP_BUILD_TIME || 'unknown',
|
||||||
|
image_tag: process.env.CONTAINER_IMAGE_TAG?.slice(0, 8) || 'local',
|
||||||
};
|
};
|
||||||
|
|
||||||
res.json(versionInfo);
|
res.json(versionInfo);
|
||||||
|
|||||||
862
backend/src/routes/worker-registry.ts
Normal file
862
backend/src/routes/worker-registry.ts
Normal file
@@ -0,0 +1,862 @@
|
|||||||
|
/**
|
||||||
|
* Worker Registry API Routes
|
||||||
|
*
|
||||||
|
* Dynamic worker management - workers register on startup, get assigned names,
|
||||||
|
* and report heartbeats. Everything is API-driven, no hardcoding.
|
||||||
|
*
|
||||||
|
* Endpoints:
|
||||||
|
* POST /api/worker-registry/register - Worker reports for duty
|
||||||
|
* POST /api/worker-registry/heartbeat - Worker heartbeat
|
||||||
|
* POST /api/worker-registry/deregister - Worker signing off
|
||||||
|
* GET /api/worker-registry/workers - List all workers (for dashboard)
|
||||||
|
* GET /api/worker-registry/workers/:id - Get specific worker
|
||||||
|
* POST /api/worker-registry/cleanup - Mark stale workers offline
|
||||||
|
*
|
||||||
|
* GET /api/worker-registry/names - List all names in pool
|
||||||
|
* POST /api/worker-registry/names - Add names to pool
|
||||||
|
* DELETE /api/worker-registry/names/:name - Remove name from pool
|
||||||
|
*
|
||||||
|
* GET /api/worker-registry/roles - List available task roles
|
||||||
|
* POST /api/worker-registry/roles - Add a new role (future)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
import os from 'os';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// WORKER REGISTRATION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/register
|
||||||
|
* Worker reports for duty - gets assigned a friendly name
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - role: string (optional) - task role, or null for role-agnostic workers
|
||||||
|
* - worker_id: string (optional) - custom ID, auto-generated if not provided
|
||||||
|
* - pod_name: string (optional) - k8s pod name
|
||||||
|
* - hostname: string (optional) - machine hostname
|
||||||
|
* - metadata: object (optional) - additional worker info
|
||||||
|
*
|
||||||
|
* Returns:
|
||||||
|
* - worker_id: assigned worker ID
|
||||||
|
* - friendly_name: assigned name from pool
|
||||||
|
* - role: confirmed role (or null if agnostic)
|
||||||
|
* - message: welcome message
|
||||||
|
*/
|
||||||
|
router.post('/register', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const {
|
||||||
|
role = null, // Role is now optional - null means agnostic
|
||||||
|
worker_id,
|
||||||
|
pod_name,
|
||||||
|
hostname,
|
||||||
|
ip_address,
|
||||||
|
metadata = {}
|
||||||
|
} = req.body;
|
||||||
|
|
||||||
|
// Generate worker_id if not provided
|
||||||
|
const finalWorkerId = worker_id || `worker-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||||
|
const finalHostname = hostname || os.hostname();
|
||||||
|
const clientIp = ip_address || req.ip || req.socket.remoteAddress;
|
||||||
|
|
||||||
|
// Check if worker already registered
|
||||||
|
const existing = await pool.query(
|
||||||
|
'SELECT id, friendly_name, status FROM worker_registry WHERE worker_id = $1',
|
||||||
|
[finalWorkerId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (existing.rows.length > 0) {
|
||||||
|
// Re-activate existing worker
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET status = 'active',
|
||||||
|
role = $1,
|
||||||
|
pod_name = $2,
|
||||||
|
hostname = $3,
|
||||||
|
ip_address = $4,
|
||||||
|
last_heartbeat_at = NOW(),
|
||||||
|
started_at = NOW(),
|
||||||
|
metadata = $5,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = $6
|
||||||
|
RETURNING id, worker_id, friendly_name, role
|
||||||
|
`, [role, pod_name, finalHostname, clientIp, metadata, finalWorkerId]);
|
||||||
|
|
||||||
|
const worker = rows[0];
|
||||||
|
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
||||||
|
console.log(`[WorkerRegistry] Worker "${worker.friendly_name}" (${finalWorkerId}) re-registered ${roleMsg}`);
|
||||||
|
|
||||||
|
return res.json({
|
||||||
|
success: true,
|
||||||
|
worker_id: worker.worker_id,
|
||||||
|
friendly_name: worker.friendly_name,
|
||||||
|
role: worker.role,
|
||||||
|
message: role
|
||||||
|
? `Welcome back, ${worker.friendly_name}! You are assigned to ${role}.`
|
||||||
|
: `Welcome back, ${worker.friendly_name}! You are ready to take any task.`
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assign a friendly name
|
||||||
|
const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]);
|
||||||
|
const friendlyName = nameResult.rows[0].name;
|
||||||
|
|
||||||
|
// Register the worker
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
INSERT INTO worker_registry (
|
||||||
|
worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata
|
||||||
|
) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7)
|
||||||
|
RETURNING id, worker_id, friendly_name, role
|
||||||
|
`, [finalWorkerId, friendlyName, role, pod_name, finalHostname, clientIp, metadata]);
|
||||||
|
|
||||||
|
const worker = rows[0];
|
||||||
|
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
||||||
|
console.log(`[WorkerRegistry] New worker "${friendlyName}" (${finalWorkerId}) reporting for duty ${roleMsg}`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
worker_id: worker.worker_id,
|
||||||
|
friendly_name: worker.friendly_name,
|
||||||
|
role: worker.role,
|
||||||
|
message: role
|
||||||
|
? `Hello ${friendlyName}! You are now registered for ${role}. Ready for work!`
|
||||||
|
: `Hello ${friendlyName}! You are ready to take any task from the pool.`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[WorkerRegistry] Registration error:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/heartbeat
|
||||||
|
* Worker sends heartbeat to stay alive
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - worker_id: string (required)
|
||||||
|
* - current_task_id: number (optional) - task currently being processed (primary task)
|
||||||
|
* - current_task_ids: number[] (optional) - all tasks currently being processed (concurrent)
|
||||||
|
* - active_task_count: number (optional) - number of tasks currently running
|
||||||
|
* - max_concurrent_tasks: number (optional) - max concurrent tasks this worker can handle
|
||||||
|
* - status: string (optional) - 'active', 'idle'
|
||||||
|
* - resources: object (optional) - memory_mb, cpu_user_ms, cpu_system_ms, etc.
|
||||||
|
*/
|
||||||
|
router.post('/heartbeat', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const {
|
||||||
|
worker_id,
|
||||||
|
current_task_id,
|
||||||
|
current_task_ids,
|
||||||
|
active_task_count,
|
||||||
|
max_concurrent_tasks,
|
||||||
|
status = 'active',
|
||||||
|
resources
|
||||||
|
} = req.body;
|
||||||
|
|
||||||
|
if (!worker_id) {
|
||||||
|
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build metadata object with all the new fields
|
||||||
|
const metadata: Record<string, unknown> = {};
|
||||||
|
if (resources) Object.assign(metadata, resources);
|
||||||
|
if (current_task_ids) metadata.current_task_ids = current_task_ids;
|
||||||
|
if (active_task_count !== undefined) metadata.active_task_count = active_task_count;
|
||||||
|
if (max_concurrent_tasks !== undefined) metadata.max_concurrent_tasks = max_concurrent_tasks;
|
||||||
|
|
||||||
|
// Store resources in metadata jsonb column
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET last_heartbeat_at = NOW(),
|
||||||
|
current_task_id = $1,
|
||||||
|
status = $2,
|
||||||
|
metadata = COALESCE(metadata, '{}'::jsonb) || COALESCE($4::jsonb, '{}'::jsonb),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = $3
|
||||||
|
RETURNING id, friendly_name, status
|
||||||
|
`, [current_task_id || null, status, worker_id, Object.keys(metadata).length > 0 ? JSON.stringify(metadata) : null]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found - please register first' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
worker: rows[0]
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[WorkerRegistry] Heartbeat error:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/task-completed
|
||||||
|
* Worker reports task completion
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - worker_id: string (required)
|
||||||
|
* - success: boolean (required)
|
||||||
|
*/
|
||||||
|
router.post('/task-completed', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { worker_id, success } = req.body;
|
||||||
|
|
||||||
|
if (!worker_id) {
|
||||||
|
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const incrementField = success ? 'tasks_completed' : 'tasks_failed';
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET ${incrementField} = ${incrementField} + 1,
|
||||||
|
last_task_at = NOW(),
|
||||||
|
current_task_id = NULL,
|
||||||
|
status = 'idle',
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = $1
|
||||||
|
RETURNING id, friendly_name, tasks_completed, tasks_failed
|
||||||
|
`, [worker_id]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, worker: rows[0] });
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/deregister
|
||||||
|
* Worker signing off (graceful shutdown)
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - worker_id: string (required)
|
||||||
|
*/
|
||||||
|
router.post('/deregister', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { worker_id } = req.body;
|
||||||
|
|
||||||
|
if (!worker_id) {
|
||||||
|
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Release the name back to the pool
|
||||||
|
await pool.query('SELECT release_worker_name($1)', [worker_id]);
|
||||||
|
|
||||||
|
// Mark as terminated
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET status = 'terminated',
|
||||||
|
current_task_id = NULL,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = $1
|
||||||
|
RETURNING id, friendly_name
|
||||||
|
`, [worker_id]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[WorkerRegistry] Worker "${rows[0].friendly_name}" (${worker_id}) signed off`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
message: `Goodbye ${rows[0].friendly_name}! Thanks for your work.`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[WorkerRegistry] Deregister error:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// WORKER LISTING (for Dashboard)
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/worker-registry/workers
|
||||||
|
* List all workers (for dashboard)
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - status: filter by status (active, idle, offline, all)
|
||||||
|
* - role: filter by role
|
||||||
|
* - include_terminated: include terminated workers (default: false)
|
||||||
|
*/
|
||||||
|
router.get('/workers', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// Check if worker_registry table exists
|
||||||
|
const tableCheck = await pool.query(`
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT FROM information_schema.tables
|
||||||
|
WHERE table_name = 'worker_registry'
|
||||||
|
) as exists
|
||||||
|
`);
|
||||||
|
|
||||||
|
if (!tableCheck.rows[0].exists) {
|
||||||
|
// Return empty result if table doesn't exist yet
|
||||||
|
return res.json({
|
||||||
|
success: true,
|
||||||
|
workers: [],
|
||||||
|
summary: {
|
||||||
|
active_count: 0,
|
||||||
|
idle_count: 0,
|
||||||
|
offline_count: 0,
|
||||||
|
total_count: 0,
|
||||||
|
active_roles: 0
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const { status, role, include_terminated = 'false' } = req.query;
|
||||||
|
|
||||||
|
let whereClause = include_terminated === 'true' ? 'WHERE 1=1' : "WHERE status != 'terminated'";
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (status && status !== 'all') {
|
||||||
|
whereClause += ` AND status = $${paramIndex}`;
|
||||||
|
params.push(status);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (role) {
|
||||||
|
whereClause += ` AND role = $${paramIndex}`;
|
||||||
|
params.push(role);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
worker_id,
|
||||||
|
friendly_name,
|
||||||
|
role,
|
||||||
|
pod_name,
|
||||||
|
hostname,
|
||||||
|
ip_address,
|
||||||
|
status,
|
||||||
|
started_at,
|
||||||
|
last_heartbeat_at,
|
||||||
|
last_task_at,
|
||||||
|
tasks_completed,
|
||||||
|
tasks_failed,
|
||||||
|
current_task_id,
|
||||||
|
-- Concurrent task fields from metadata
|
||||||
|
(metadata->>'current_task_ids')::jsonb as current_task_ids,
|
||||||
|
(metadata->>'active_task_count')::int as active_task_count,
|
||||||
|
(metadata->>'max_concurrent_tasks')::int as max_concurrent_tasks,
|
||||||
|
-- Decommission fields
|
||||||
|
COALESCE(decommission_requested, false) as decommission_requested,
|
||||||
|
decommission_reason,
|
||||||
|
-- Full metadata for resources
|
||||||
|
metadata,
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
|
CASE
|
||||||
|
WHEN status = 'offline' OR status = 'terminated' THEN status
|
||||||
|
WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
|
WHEN current_task_id IS NOT NULL THEN 'busy'
|
||||||
|
WHEN (metadata->>'active_task_count')::int > 0 THEN 'busy'
|
||||||
|
ELSE 'ready'
|
||||||
|
END as health_status,
|
||||||
|
created_at
|
||||||
|
FROM worker_registry
|
||||||
|
${whereClause}
|
||||||
|
ORDER BY
|
||||||
|
CASE status
|
||||||
|
WHEN 'active' THEN 1
|
||||||
|
WHEN 'idle' THEN 2
|
||||||
|
WHEN 'offline' THEN 3
|
||||||
|
ELSE 4
|
||||||
|
END,
|
||||||
|
last_heartbeat_at DESC
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
// Get summary counts
|
||||||
|
const { rows: summary } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE status = 'active') as active_count,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'idle') as idle_count,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'offline') as offline_count,
|
||||||
|
COUNT(*) FILTER (WHERE status != 'terminated') as total_count,
|
||||||
|
COUNT(DISTINCT role) FILTER (WHERE status IN ('active', 'idle')) as active_roles
|
||||||
|
FROM worker_registry
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
workers: rows,
|
||||||
|
summary: summary[0]
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[WorkerRegistry] List workers error:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/worker-registry/workers/:workerId
|
||||||
|
* Get specific worker details
|
||||||
|
*/
|
||||||
|
router.get('/workers/:workerId', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { workerId } = req.params;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT * FROM worker_registry WHERE worker_id = $1
|
||||||
|
`, [workerId]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, worker: rows[0] });
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DELETE /api/worker-registry/workers/:workerId
|
||||||
|
* Remove a worker (admin action)
|
||||||
|
*/
|
||||||
|
router.delete('/workers/:workerId', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { workerId } = req.params;
|
||||||
|
|
||||||
|
// Release name
|
||||||
|
await pool.query('SELECT release_worker_name($1)', [workerId]);
|
||||||
|
|
||||||
|
// Delete worker
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
DELETE FROM worker_registry WHERE worker_id = $1 RETURNING friendly_name
|
||||||
|
`, [workerId]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, message: `Worker ${rows[0].friendly_name} removed` });
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/cleanup
|
||||||
|
* Mark stale workers as offline
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - stale_threshold_minutes: number (default: 5)
|
||||||
|
*/
|
||||||
|
router.post('/cleanup', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { stale_threshold_minutes = 5 } = req.body;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(
|
||||||
|
'SELECT mark_stale_workers($1) as count',
|
||||||
|
[stale_threshold_minutes]
|
||||||
|
);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
stale_workers_marked: rows[0].count,
|
||||||
|
message: `Marked ${rows[0].count} stale workers as offline`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// NAME POOL MANAGEMENT
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/worker-registry/names
|
||||||
|
* List all names in the pool
|
||||||
|
*/
|
||||||
|
router.get('/names', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
in_use,
|
||||||
|
assigned_to,
|
||||||
|
assigned_at
|
||||||
|
FROM worker_name_pool
|
||||||
|
ORDER BY in_use DESC, name ASC
|
||||||
|
`);
|
||||||
|
|
||||||
|
const { rows: summary } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total,
|
||||||
|
COUNT(*) FILTER (WHERE in_use = true) as in_use,
|
||||||
|
COUNT(*) FILTER (WHERE in_use = false) as available
|
||||||
|
FROM worker_name_pool
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
names: rows,
|
||||||
|
summary: summary[0]
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/names
|
||||||
|
* Add names to the pool
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - names: string[] (required) - array of names to add
|
||||||
|
*/
|
||||||
|
router.post('/names', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { names } = req.body;
|
||||||
|
|
||||||
|
if (!names || !Array.isArray(names) || names.length === 0) {
|
||||||
|
return res.status(400).json({ success: false, error: 'names array is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const values = names.map(n => `('${n.replace(/'/g, "''")}')`).join(', ');
|
||||||
|
|
||||||
|
const { rowCount } = await pool.query(`
|
||||||
|
INSERT INTO worker_name_pool (name)
|
||||||
|
VALUES ${values}
|
||||||
|
ON CONFLICT (name) DO NOTHING
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
added: rowCount,
|
||||||
|
message: `Added ${rowCount} new names to the pool`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DELETE /api/worker-registry/names/:name
|
||||||
|
* Remove a name from the pool (only if not in use)
|
||||||
|
*/
|
||||||
|
router.delete('/names/:name', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { name } = req.params;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
DELETE FROM worker_name_pool
|
||||||
|
WHERE name = $1 AND in_use = false
|
||||||
|
RETURNING name
|
||||||
|
`, [name]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(400).json({
|
||||||
|
success: false,
|
||||||
|
error: 'Name not found or currently in use'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, message: `Name "${name}" removed from pool` });
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// ROLE MANAGEMENT
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/worker-registry/roles
|
||||||
|
* List available task roles
|
||||||
|
*/
|
||||||
|
router.get('/roles', async (_req: Request, res: Response) => {
|
||||||
|
// These are the roles the task handlers support
|
||||||
|
const roles = [
|
||||||
|
{
|
||||||
|
id: 'product_refresh',
|
||||||
|
name: 'Product Refresh',
|
||||||
|
description: 'Re-crawl dispensary products for price/stock changes',
|
||||||
|
handler: 'handleProductRefresh'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'product_discovery',
|
||||||
|
name: 'Product Discovery',
|
||||||
|
description: 'Initial product discovery for new dispensaries',
|
||||||
|
handler: 'handleProductDiscovery'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'store_discovery',
|
||||||
|
name: 'Store Discovery',
|
||||||
|
description: 'Discover new dispensary locations',
|
||||||
|
handler: 'handleStoreDiscovery'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'entry_point_discovery',
|
||||||
|
name: 'Entry Point Discovery',
|
||||||
|
description: 'Resolve platform IDs from menu URLs',
|
||||||
|
handler: 'handleEntryPointDiscovery'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'analytics_refresh',
|
||||||
|
name: 'Analytics Refresh',
|
||||||
|
description: 'Refresh materialized views and analytics',
|
||||||
|
handler: 'handleAnalyticsRefresh'
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
// Get active worker counts per role
|
||||||
|
try {
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT role, COUNT(*) as worker_count
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE status IN ('active', 'idle')
|
||||||
|
GROUP BY role
|
||||||
|
`);
|
||||||
|
|
||||||
|
const countMap = new Map(rows.map(r => [r.role, parseInt(r.worker_count)]));
|
||||||
|
|
||||||
|
const rolesWithCounts = roles.map(r => ({
|
||||||
|
...r,
|
||||||
|
active_workers: countMap.get(r.id) || 0
|
||||||
|
}));
|
||||||
|
|
||||||
|
res.json({ success: true, roles: rolesWithCounts });
|
||||||
|
} catch {
|
||||||
|
// If table doesn't exist yet, just return roles without counts
|
||||||
|
res.json({ success: true, roles: roles.map(r => ({ ...r, active_workers: 0 })) });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/worker-registry/capacity
|
||||||
|
* Get capacity planning info
|
||||||
|
*/
|
||||||
|
router.get('/capacity', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// Get worker counts by role
|
||||||
|
const { rows: workerCounts } = await pool.query(`
|
||||||
|
SELECT role, COUNT(*) as count
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE status IN ('active', 'idle')
|
||||||
|
GROUP BY role
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Get pending task counts by role (if worker_tasks exists)
|
||||||
|
let taskCounts: any[] = [];
|
||||||
|
try {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT role, COUNT(*) as pending_count
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE status = 'pending'
|
||||||
|
GROUP BY role
|
||||||
|
`);
|
||||||
|
taskCounts = result.rows;
|
||||||
|
} catch {
|
||||||
|
// worker_tasks might not exist yet
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get crawl-enabled store count
|
||||||
|
const storeCountResult = await pool.query(`
|
||||||
|
SELECT COUNT(*) as count
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE crawl_enabled = true AND platform_dispensary_id IS NOT NULL
|
||||||
|
`);
|
||||||
|
const totalStores = parseInt(storeCountResult.rows[0].count);
|
||||||
|
|
||||||
|
const workerMap = new Map(workerCounts.map(r => [r.role, parseInt(r.count)]));
|
||||||
|
const taskMap = new Map(taskCounts.map(r => [r.role, parseInt(r.pending_count)]));
|
||||||
|
|
||||||
|
const roles = ['product_refresh', 'product_discovery', 'store_discovery', 'entry_point_discovery', 'analytics_refresh'];
|
||||||
|
|
||||||
|
const capacity = roles.map(role => ({
|
||||||
|
role,
|
||||||
|
active_workers: workerMap.get(role) || 0,
|
||||||
|
pending_tasks: taskMap.get(role) || 0,
|
||||||
|
// Rough estimate: 20 seconds per task, 4-hour cycle
|
||||||
|
tasks_per_worker_per_cycle: 720,
|
||||||
|
workers_needed_for_all_stores: Math.ceil(totalStores / 720)
|
||||||
|
}));
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
total_stores: totalStores,
|
||||||
|
capacity
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// WORKER LIFECYCLE MANAGEMENT
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/workers/:workerId/decommission
|
||||||
|
* Request graceful decommission of a worker (will stop after current task)
|
||||||
|
*/
|
||||||
|
router.post('/workers/:workerId/decommission', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { workerId } = req.params;
|
||||||
|
const { reason, issued_by } = req.body;
|
||||||
|
|
||||||
|
// Update worker_registry to flag for decommission
|
||||||
|
const result = await pool.query(
|
||||||
|
`UPDATE worker_registry
|
||||||
|
SET decommission_requested = true,
|
||||||
|
decommission_reason = $2,
|
||||||
|
decommission_requested_at = NOW()
|
||||||
|
WHERE worker_id = $1
|
||||||
|
RETURNING friendly_name, status, current_task_id`,
|
||||||
|
[workerId, reason || 'Manual decommission from admin']
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const worker = result.rows[0];
|
||||||
|
|
||||||
|
// Also log to worker_commands for audit trail
|
||||||
|
await pool.query(
|
||||||
|
`INSERT INTO worker_commands (worker_id, command, reason, issued_by)
|
||||||
|
VALUES ($1, 'decommission', $2, $3)
|
||||||
|
ON CONFLICT DO NOTHING`,
|
||||||
|
[workerId, reason || 'Manual decommission', issued_by || 'admin']
|
||||||
|
).catch(() => {
|
||||||
|
// Table might not exist yet - ignore
|
||||||
|
});
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
message: worker.current_task_id
|
||||||
|
? `Worker ${worker.friendly_name} will stop after completing task #${worker.current_task_id}`
|
||||||
|
: `Worker ${worker.friendly_name} will stop on next poll`,
|
||||||
|
worker: {
|
||||||
|
friendly_name: worker.friendly_name,
|
||||||
|
status: worker.status,
|
||||||
|
current_task_id: worker.current_task_id,
|
||||||
|
decommission_requested: true
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/workers/:workerId/cancel-decommission
|
||||||
|
* Cancel a pending decommission request
|
||||||
|
*/
|
||||||
|
router.post('/workers/:workerId/cancel-decommission', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { workerId } = req.params;
|
||||||
|
|
||||||
|
const result = await pool.query(
|
||||||
|
`UPDATE worker_registry
|
||||||
|
SET decommission_requested = false,
|
||||||
|
decommission_reason = NULL,
|
||||||
|
decommission_requested_at = NULL
|
||||||
|
WHERE worker_id = $1
|
||||||
|
RETURNING friendly_name`,
|
||||||
|
[workerId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
message: `Decommission cancelled for ${result.rows[0].friendly_name}`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/spawn
|
||||||
|
* Spawn a new worker in the current pod (only works in multi-worker-per-pod mode)
|
||||||
|
* For now, this is a placeholder - actual spawning requires the pod supervisor
|
||||||
|
*/
|
||||||
|
router.post('/spawn', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { pod_name, role } = req.body;
|
||||||
|
|
||||||
|
// For now, we can't actually spawn workers from the API
|
||||||
|
// This would require a supervisor process in each pod that listens for spawn commands
|
||||||
|
// Instead, return instructions for how to scale
|
||||||
|
res.json({
|
||||||
|
success: false,
|
||||||
|
error: 'Direct worker spawning not yet implemented',
|
||||||
|
instructions: 'To add workers, scale the K8s deployment: kubectl scale deployment/scraper-worker --replicas=N'
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/worker-registry/pods
|
||||||
|
* Get workers grouped by pod
|
||||||
|
*/
|
||||||
|
router.get('/pods', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COALESCE(pod_name, 'Unknown') as pod_name,
|
||||||
|
COUNT(*) as worker_count,
|
||||||
|
COUNT(*) FILTER (WHERE current_task_id IS NOT NULL) as busy_count,
|
||||||
|
COUNT(*) FILTER (WHERE current_task_id IS NULL) as idle_count,
|
||||||
|
SUM(tasks_completed) as total_completed,
|
||||||
|
SUM(tasks_failed) as total_failed,
|
||||||
|
SUM((metadata->>'memory_rss_mb')::int) as total_memory_mb,
|
||||||
|
array_agg(json_build_object(
|
||||||
|
'worker_id', worker_id,
|
||||||
|
'friendly_name', friendly_name,
|
||||||
|
'status', status,
|
||||||
|
'current_task_id', current_task_id,
|
||||||
|
'tasks_completed', tasks_completed,
|
||||||
|
'tasks_failed', tasks_failed,
|
||||||
|
'decommission_requested', COALESCE(decommission_requested, false),
|
||||||
|
'last_heartbeat_at', last_heartbeat_at
|
||||||
|
)) as workers
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE status NOT IN ('offline', 'terminated')
|
||||||
|
GROUP BY pod_name
|
||||||
|
ORDER BY pod_name
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
pods: rows.map(row => ({
|
||||||
|
pod_name: row.pod_name,
|
||||||
|
worker_count: parseInt(row.worker_count),
|
||||||
|
busy_count: parseInt(row.busy_count),
|
||||||
|
idle_count: parseInt(row.idle_count),
|
||||||
|
total_completed: parseInt(row.total_completed) || 0,
|
||||||
|
total_failed: parseInt(row.total_failed) || 0,
|
||||||
|
total_memory_mb: parseInt(row.total_memory_mb) || 0,
|
||||||
|
workers: row.workers
|
||||||
|
}))
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -17,13 +17,234 @@
|
|||||||
* GET /api/monitor/jobs - Get recent job history
|
* GET /api/monitor/jobs - Get recent job history
|
||||||
* GET /api/monitor/active-jobs - Get currently running jobs
|
* GET /api/monitor/active-jobs - Get currently running jobs
|
||||||
* GET /api/monitor/summary - Get monitoring summary
|
* GET /api/monitor/summary - Get monitoring summary
|
||||||
|
*
|
||||||
|
* K8s Scaling (added 2024-12-10):
|
||||||
|
* GET /api/workers/k8s/replicas - Get current replica count
|
||||||
|
* POST /api/workers/k8s/scale - Scale worker replicas up/down
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
|
import * as k8s from '@kubernetes/client-node';
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// K8S SCALING CONFIGURATION (added 2024-12-10)
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Admin can scale workers from UI
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
const K8S_NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
||||||
|
const K8S_DEPLOYMENT_NAME = process.env.K8S_WORKER_DEPLOYMENT || 'scraper-worker';
|
||||||
|
|
||||||
|
// Initialize K8s client - uses in-cluster config when running in K8s,
|
||||||
|
// or kubeconfig when running locally
|
||||||
|
let k8sAppsApi: k8s.AppsV1Api | null = null;
|
||||||
|
|
||||||
|
function getK8sClient(): k8s.AppsV1Api | null {
|
||||||
|
if (k8sAppsApi) return k8sAppsApi;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const kc = new k8s.KubeConfig();
|
||||||
|
|
||||||
|
// Try in-cluster config first (when running as a pod)
|
||||||
|
// Falls back to default kubeconfig (~/.kube/config) for local dev
|
||||||
|
try {
|
||||||
|
kc.loadFromCluster();
|
||||||
|
} catch {
|
||||||
|
kc.loadFromDefault();
|
||||||
|
}
|
||||||
|
|
||||||
|
k8sAppsApi = kc.makeApiClient(k8s.AppsV1Api);
|
||||||
|
return k8sAppsApi;
|
||||||
|
} catch (err: any) {
|
||||||
|
console.warn('[Workers] K8s client not available:', err.message);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// K8S SCALING ROUTES (added 2024-12-10)
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Admin can scale workers from UI
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/workers/k8s/replicas - Get current worker replica count
|
||||||
|
* Returns current and desired replica counts from the Deployment
|
||||||
|
*/
|
||||||
|
router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
||||||
|
const client = getK8sClient();
|
||||||
|
|
||||||
|
if (!client) {
|
||||||
|
return res.status(503).json({
|
||||||
|
success: false,
|
||||||
|
error: 'K8s client not available (not running in cluster or no kubeconfig)',
|
||||||
|
replicas: null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await client.readNamespacedDeployment({
|
||||||
|
name: K8S_DEPLOYMENT_NAME,
|
||||||
|
namespace: K8S_NAMESPACE,
|
||||||
|
});
|
||||||
|
|
||||||
|
const deployment = response;
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
replicas: {
|
||||||
|
current: deployment.status?.readyReplicas || 0,
|
||||||
|
desired: deployment.spec?.replicas || 0,
|
||||||
|
available: deployment.status?.availableReplicas || 0,
|
||||||
|
updated: deployment.status?.updatedReplicas || 0,
|
||||||
|
},
|
||||||
|
deployment: K8S_DEPLOYMENT_NAME,
|
||||||
|
namespace: K8S_NAMESPACE,
|
||||||
|
});
|
||||||
|
} catch (err: any) {
|
||||||
|
console.error('[Workers] K8s replicas error:', err.body?.message || err.message);
|
||||||
|
res.status(500).json({
|
||||||
|
success: false,
|
||||||
|
error: err.body?.message || err.message,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/workers/k8s/scale - Scale worker replicas
|
||||||
|
* Body: { replicas: number } - desired replica count (0-20)
|
||||||
|
*/
|
||||||
|
router.post('/k8s/scale', async (req: Request, res: Response) => {
|
||||||
|
const client = getK8sClient();
|
||||||
|
|
||||||
|
if (!client) {
|
||||||
|
return res.status(503).json({
|
||||||
|
success: false,
|
||||||
|
error: 'K8s client not available (not running in cluster or no kubeconfig)',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const { replicas } = req.body;
|
||||||
|
|
||||||
|
// Validate replica count
|
||||||
|
if (typeof replicas !== 'number' || replicas < 0 || replicas > 20) {
|
||||||
|
return res.status(400).json({
|
||||||
|
success: false,
|
||||||
|
error: 'replicas must be a number between 0 and 20',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get current state first
|
||||||
|
const currentResponse = await client.readNamespacedDeploymentScale({
|
||||||
|
name: K8S_DEPLOYMENT_NAME,
|
||||||
|
namespace: K8S_NAMESPACE,
|
||||||
|
});
|
||||||
|
const currentReplicas = currentResponse.spec?.replicas || 0;
|
||||||
|
|
||||||
|
// Update scale using replaceNamespacedDeploymentScale
|
||||||
|
await client.replaceNamespacedDeploymentScale({
|
||||||
|
name: K8S_DEPLOYMENT_NAME,
|
||||||
|
namespace: K8S_NAMESPACE,
|
||||||
|
body: {
|
||||||
|
apiVersion: 'autoscaling/v1',
|
||||||
|
kind: 'Scale',
|
||||||
|
metadata: {
|
||||||
|
name: K8S_DEPLOYMENT_NAME,
|
||||||
|
namespace: K8S_NAMESPACE,
|
||||||
|
},
|
||||||
|
spec: {
|
||||||
|
replicas: replicas,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`[Workers] Scaled ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${replicas} replicas`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
message: `Scaled from ${currentReplicas} to ${replicas} replicas`,
|
||||||
|
previous: currentReplicas,
|
||||||
|
desired: replicas,
|
||||||
|
deployment: K8S_DEPLOYMENT_NAME,
|
||||||
|
namespace: K8S_NAMESPACE,
|
||||||
|
});
|
||||||
|
} catch (err: any) {
|
||||||
|
console.error('[Workers] K8s scale error:', err.body?.message || err.message);
|
||||||
|
res.status(500).json({
|
||||||
|
success: false,
|
||||||
|
error: err.body?.message || err.message,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/workers/k8s/scale-up - Scale up worker replicas by 1
|
||||||
|
* Convenience endpoint for adding a single worker
|
||||||
|
*/
|
||||||
|
router.post('/k8s/scale-up', async (_req: Request, res: Response) => {
|
||||||
|
const client = getK8sClient();
|
||||||
|
|
||||||
|
if (!client) {
|
||||||
|
return res.status(503).json({
|
||||||
|
success: false,
|
||||||
|
error: 'K8s client not available (not running in cluster or no kubeconfig)',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get current replica count
|
||||||
|
const currentResponse = await client.readNamespacedDeploymentScale({
|
||||||
|
name: K8S_DEPLOYMENT_NAME,
|
||||||
|
namespace: K8S_NAMESPACE,
|
||||||
|
});
|
||||||
|
const currentReplicas = currentResponse.spec?.replicas || 0;
|
||||||
|
const newReplicas = currentReplicas + 1;
|
||||||
|
|
||||||
|
// Cap at 20 replicas
|
||||||
|
if (newReplicas > 20) {
|
||||||
|
return res.status(400).json({
|
||||||
|
success: false,
|
||||||
|
error: 'Maximum replica count (20) reached',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale up by 1
|
||||||
|
await client.replaceNamespacedDeploymentScale({
|
||||||
|
name: K8S_DEPLOYMENT_NAME,
|
||||||
|
namespace: K8S_NAMESPACE,
|
||||||
|
body: {
|
||||||
|
apiVersion: 'autoscaling/v1',
|
||||||
|
kind: 'Scale',
|
||||||
|
metadata: {
|
||||||
|
name: K8S_DEPLOYMENT_NAME,
|
||||||
|
namespace: K8S_NAMESPACE,
|
||||||
|
},
|
||||||
|
spec: {
|
||||||
|
replicas: newReplicas,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`[Workers] Scaled up ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${newReplicas} replicas`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
message: `Added worker (${currentReplicas} → ${newReplicas} replicas)`,
|
||||||
|
previous: currentReplicas,
|
||||||
|
desired: newReplicas,
|
||||||
|
deployment: K8S_DEPLOYMENT_NAME,
|
||||||
|
namespace: K8S_NAMESPACE,
|
||||||
|
});
|
||||||
|
} catch (err: any) {
|
||||||
|
console.error('[Workers] K8s scale-up error:', err.body?.message || err.message);
|
||||||
|
res.status(500).json({
|
||||||
|
success: false,
|
||||||
|
error: err.body?.message || err.message,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// STATIC ROUTES (must come before parameterized routes)
|
// STATIC ROUTES (must come before parameterized routes)
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
259
backend/src/scripts/crawl-single-store.ts
Normal file
259
backend/src/scripts/crawl-single-store.ts
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
#!/usr/bin/env npx tsx
|
||||||
|
/**
|
||||||
|
* Crawl Single Store - Verbose test showing each step
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
|
||||||
|
* npx tsx src/scripts/crawl-single-store.ts <dispensaryId>
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
* DATABASE_URL="..." npx tsx src/scripts/crawl-single-store.ts 112
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
import {
|
||||||
|
executeGraphQL,
|
||||||
|
startSession,
|
||||||
|
endSession,
|
||||||
|
setCrawlRotator,
|
||||||
|
GRAPHQL_HASHES,
|
||||||
|
DUTCHIE_CONFIG,
|
||||||
|
} from '../platforms/dutchie';
|
||||||
|
import { CrawlRotator } from '../services/crawl-rotator';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// DATABASE CONNECTION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
function getConnectionString(): string {
|
||||||
|
if (process.env.DATABASE_URL) {
|
||||||
|
return process.env.DATABASE_URL;
|
||||||
|
}
|
||||||
|
if (process.env.CANNAIQ_DB_URL) {
|
||||||
|
return process.env.CANNAIQ_DB_URL;
|
||||||
|
}
|
||||||
|
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||||
|
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||||
|
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||||
|
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||||
|
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||||
|
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pool = new Pool({ connectionString: getConnectionString() });
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// MAIN
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const dispensaryId = parseInt(process.argv[2], 10);
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
console.error('Usage: npx tsx src/scripts/crawl-single-store.ts <dispensaryId>');
|
||||||
|
console.error('Example: npx tsx src/scripts/crawl-single-store.ts 112');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('');
|
||||||
|
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||||
|
console.log('║ SINGLE STORE CRAWL - VERBOSE OUTPUT ║');
|
||||||
|
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// ============================================================
|
||||||
|
// STEP 1: Get dispensary info from database
|
||||||
|
// ============================================================
|
||||||
|
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||||
|
console.log('│ STEP 1: Load Dispensary Info from Database │');
|
||||||
|
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||||
|
|
||||||
|
const dispResult = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
platform_dispensary_id,
|
||||||
|
menu_url,
|
||||||
|
menu_type,
|
||||||
|
city,
|
||||||
|
state
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (dispResult.rows.length === 0) {
|
||||||
|
throw new Error(`Dispensary ${dispensaryId} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const disp = dispResult.rows[0];
|
||||||
|
console.log(` Dispensary ID: ${disp.id}`);
|
||||||
|
console.log(` Name: ${disp.name}`);
|
||||||
|
console.log(` City, State: ${disp.city}, ${disp.state}`);
|
||||||
|
console.log(` Menu Type: ${disp.menu_type}`);
|
||||||
|
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
|
||||||
|
console.log(` Menu URL: ${disp.menu_url}`);
|
||||||
|
|
||||||
|
if (!disp.platform_dispensary_id) {
|
||||||
|
throw new Error('Dispensary does not have a platform_dispensary_id - cannot crawl');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract cName from menu_url
|
||||||
|
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||||
|
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||||
|
console.log(` cName (derived): ${cName}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 2: Start stealth session
|
||||||
|
// Per workflow-12102025.md: Initialize CrawlRotator and start session with menuUrl
|
||||||
|
// ============================================================
|
||||||
|
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||||
|
console.log('│ STEP 2: Start Stealth Session │');
|
||||||
|
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Initialize CrawlRotator (required for sessions)
|
||||||
|
const rotator = new CrawlRotator();
|
||||||
|
setCrawlRotator(rotator);
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: startSession takes menuUrl for dynamic Referer
|
||||||
|
const session = startSession(disp.menu_url);
|
||||||
|
|
||||||
|
const fp = session.fingerprint;
|
||||||
|
console.log(` Session ID: ${session.sessionId}`);
|
||||||
|
console.log(` Browser: ${fp.browserName} (${fp.deviceCategory})`);
|
||||||
|
console.log(` User-Agent: ${fp.userAgent.slice(0, 60)}...`);
|
||||||
|
console.log(` Accept-Language: ${fp.acceptLanguage}`);
|
||||||
|
console.log(` Referer: ${session.referer}`);
|
||||||
|
console.log(` DNT: ${fp.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`);
|
||||||
|
console.log(` TLS: ${fp.httpFingerprint.curlImpersonateBinary}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 3: Execute GraphQL query
|
||||||
|
// ============================================================
|
||||||
|
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||||
|
console.log('│ STEP 3: Execute GraphQL Query (FilteredProducts) │');
|
||||||
|
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||||
|
|
||||||
|
const variables = {
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId: disp.platform_dispensary_id,
|
||||||
|
pricingType: 'rec',
|
||||||
|
Status: 'Active',
|
||||||
|
types: [],
|
||||||
|
useCache: true,
|
||||||
|
isDefaultSort: true,
|
||||||
|
sortBy: 'popularSortIdx',
|
||||||
|
sortDirection: 1,
|
||||||
|
bypassOnlineThresholds: true,
|
||||||
|
isKioskMenu: false,
|
||||||
|
removeProductsBelowOptionThresholds: false,
|
||||||
|
},
|
||||||
|
page: 0,
|
||||||
|
perPage: 100,
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(` Endpoint: ${DUTCHIE_CONFIG.graphqlEndpoint}`);
|
||||||
|
console.log(` Operation: FilteredProducts`);
|
||||||
|
console.log(` Hash: ${GRAPHQL_HASHES.FilteredProducts.slice(0, 20)}...`);
|
||||||
|
console.log(` dispensaryId: ${variables.productsFilter.dispensaryId}`);
|
||||||
|
console.log(` pricingType: ${variables.productsFilter.pricingType}`);
|
||||||
|
console.log(` Status: ${variables.productsFilter.Status}`);
|
||||||
|
console.log(` perPage: ${variables.perPage}`);
|
||||||
|
console.log('');
|
||||||
|
console.log(' Sending request...');
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
const result = await executeGraphQL(
|
||||||
|
'FilteredProducts',
|
||||||
|
variables,
|
||||||
|
GRAPHQL_HASHES.FilteredProducts,
|
||||||
|
{ cName, maxRetries: 3 }
|
||||||
|
);
|
||||||
|
const elapsed = Date.now() - startTime;
|
||||||
|
|
||||||
|
console.log(` Response time: ${elapsed}ms`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 4: Process response
|
||||||
|
// ============================================================
|
||||||
|
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||||
|
console.log('│ STEP 4: Process Response │');
|
||||||
|
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||||
|
|
||||||
|
const data = result?.data?.filteredProducts;
|
||||||
|
if (!data) {
|
||||||
|
console.log(' ERROR: No data returned from GraphQL');
|
||||||
|
console.log(' Raw result:', JSON.stringify(result, null, 2).slice(0, 500));
|
||||||
|
endSession();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const products = data.products || [];
|
||||||
|
const totalCount = data.queryInfo?.totalCount || 0;
|
||||||
|
const totalPages = Math.ceil(totalCount / 100);
|
||||||
|
|
||||||
|
console.log(` Total products: ${totalCount}`);
|
||||||
|
console.log(` Products in page: ${products.length}`);
|
||||||
|
console.log(` Total pages: ${totalPages}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Show first few products
|
||||||
|
console.log(' First 5 products:');
|
||||||
|
console.log(' ─────────────────────────────────────────────────────────');
|
||||||
|
for (let i = 0; i < Math.min(5, products.length); i++) {
|
||||||
|
const p = products[i];
|
||||||
|
const name = (p.name || 'Unknown').slice(0, 40);
|
||||||
|
const brand = (p.brand?.name || 'Unknown').slice(0, 15);
|
||||||
|
const price = p.Prices?.[0]?.price || p.medPrice || p.recPrice || 'N/A';
|
||||||
|
const category = p.type || p.category || 'N/A';
|
||||||
|
console.log(` ${i + 1}. ${name.padEnd(42)} | ${brand.padEnd(17)} | $${price}`);
|
||||||
|
}
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 5: End session
|
||||||
|
// ============================================================
|
||||||
|
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||||
|
console.log('│ STEP 5: End Session │');
|
||||||
|
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||||
|
|
||||||
|
endSession();
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// SUMMARY
|
||||||
|
// ============================================================
|
||||||
|
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||||
|
console.log('║ SUMMARY ║');
|
||||||
|
console.log('╠════════════════════════════════════════════════════════════╣');
|
||||||
|
console.log(`║ Store: ${disp.name.slice(0, 38).padEnd(38)} ║`);
|
||||||
|
console.log(`║ Products Found: ${String(totalCount).padEnd(38)} ║`);
|
||||||
|
console.log(`║ Response Time: ${(elapsed + 'ms').padEnd(38)} ║`);
|
||||||
|
console.log(`║ Status: ${'SUCCESS'.padEnd(38)} ║`);
|
||||||
|
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('');
|
||||||
|
console.error('╔════════════════════════════════════════════════════════════╗');
|
||||||
|
console.error('║ ERROR ║');
|
||||||
|
console.error('╚════════════════════════════════════════════════════════════╝');
|
||||||
|
console.error(` ${error.message}`);
|
||||||
|
if (error.stack) {
|
||||||
|
console.error('');
|
||||||
|
console.error('Stack trace:');
|
||||||
|
console.error(error.stack.split('\n').slice(0, 5).join('\n'));
|
||||||
|
}
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
@@ -23,6 +23,7 @@ import {
|
|||||||
DutchieNormalizer,
|
DutchieNormalizer,
|
||||||
hydrateToCanonical,
|
hydrateToCanonical,
|
||||||
} from '../hydration';
|
} from '../hydration';
|
||||||
|
import { initializeImageStorage } from '../utils/image-storage';
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
@@ -137,6 +138,11 @@ async function main() {
|
|||||||
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
|
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
|
||||||
console.log('============================================================\n');
|
console.log('============================================================\n');
|
||||||
|
|
||||||
|
// Initialize image storage
|
||||||
|
console.log('[Init] Initializing image storage...');
|
||||||
|
await initializeImageStorage();
|
||||||
|
console.log(' Image storage ready\n');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Step 1: Get dispensary info
|
// Step 1: Get dispensary info
|
||||||
console.log('[Step 1] Getting dispensary info...');
|
console.log('[Step 1] Getting dispensary info...');
|
||||||
|
|||||||
80
backend/src/scripts/test-image-proxy.ts
Normal file
80
backend/src/scripts/test-image-proxy.ts
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
#!/usr/bin/env npx tsx
|
||||||
|
/**
|
||||||
|
* Test Image Proxy - Standalone test without backend
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/scripts/test-image-proxy.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import express from 'express';
|
||||||
|
import imageProxyRoutes from '../routes/image-proxy';
|
||||||
|
|
||||||
|
const app = express();
|
||||||
|
const PORT = 3099;
|
||||||
|
|
||||||
|
// Mount the image proxy
|
||||||
|
app.use('/img', imageProxyRoutes);
|
||||||
|
|
||||||
|
// Start server
|
||||||
|
app.listen(PORT, async () => {
|
||||||
|
console.log(`Test image proxy running on http://localhost:${PORT}`);
|
||||||
|
console.log('');
|
||||||
|
console.log('Testing image proxy...');
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
const axios = require('axios');
|
||||||
|
|
||||||
|
// Test cases
|
||||||
|
const tests = [
|
||||||
|
{
|
||||||
|
name: 'Original image',
|
||||||
|
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Resize to 200px width',
|
||||||
|
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Resize to 100x100 cover',
|
||||||
|
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=100&h=100&fit=cover',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Grayscale + blur',
|
||||||
|
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&gray=1&blur=2',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Convert to JPEG',
|
||||||
|
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&format=jpeg&q=70',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Non-existent image',
|
||||||
|
url: '/img/products/az/nonexistent/image.webp',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const test of tests) {
|
||||||
|
try {
|
||||||
|
const response = await axios.get(`http://localhost:${PORT}${test.url}`, {
|
||||||
|
responseType: 'arraybuffer',
|
||||||
|
validateStatus: () => true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const contentType = response.headers['content-type'];
|
||||||
|
const size = response.data.length;
|
||||||
|
const status = response.status;
|
||||||
|
|
||||||
|
console.log(`${test.name}:`);
|
||||||
|
console.log(` URL: ${test.url.slice(0, 80)}${test.url.length > 80 ? '...' : ''}`);
|
||||||
|
console.log(` Status: ${status}`);
|
||||||
|
console.log(` Content-Type: ${contentType}`);
|
||||||
|
console.log(` Size: ${(size / 1024).toFixed(1)} KB`);
|
||||||
|
console.log('');
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log(`${test.name}: ERROR - ${error.message}`);
|
||||||
|
console.log('');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Tests complete!');
|
||||||
|
process.exit(0);
|
||||||
|
});
|
||||||
155
backend/src/scripts/test-stealth-session.ts
Normal file
155
backend/src/scripts/test-stealth-session.ts
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
/**
|
||||||
|
* Test script for stealth session management
|
||||||
|
*
|
||||||
|
* Per workflow-12102025.md:
|
||||||
|
* - Tests HTTP fingerprinting (browser-specific headers + ordering)
|
||||||
|
* - Tests UA generation (device distribution, browser filtering)
|
||||||
|
* - Tests dynamic Referer per dispensary
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/scripts/test-stealth-session.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
startSession,
|
||||||
|
endSession,
|
||||||
|
getCurrentSession,
|
||||||
|
buildHeaders,
|
||||||
|
setCrawlRotator,
|
||||||
|
} from '../platforms/dutchie';
|
||||||
|
|
||||||
|
import { CrawlRotator } from '../services/crawl-rotator';
|
||||||
|
import {
|
||||||
|
generateHTTPFingerprint,
|
||||||
|
buildRefererFromMenuUrl,
|
||||||
|
BrowserType,
|
||||||
|
} from '../services/http-fingerprint';
|
||||||
|
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('STEALTH SESSION TEST (per workflow-12102025.md)');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
// Initialize CrawlRotator (required for sessions)
|
||||||
|
console.log('\n[Setup] Initializing CrawlRotator...');
|
||||||
|
const rotator = new CrawlRotator();
|
||||||
|
setCrawlRotator(rotator);
|
||||||
|
console.log(' CrawlRotator initialized');
|
||||||
|
|
||||||
|
// Test 1: HTTP Fingerprint Generation
|
||||||
|
console.log('\n[Test 1] HTTP Fingerprint Generation:');
|
||||||
|
const browsers: BrowserType[] = ['Chrome', 'Firefox', 'Safari', 'Edge'];
|
||||||
|
|
||||||
|
for (const browser of browsers) {
|
||||||
|
const httpFp = generateHTTPFingerprint(browser);
|
||||||
|
console.log(` ${browser}:`);
|
||||||
|
console.log(` TLS binary: ${httpFp.curlImpersonateBinary}`);
|
||||||
|
console.log(` DNT: ${httpFp.hasDNT ? 'enabled' : 'disabled'}`);
|
||||||
|
console.log(` Header order: ${httpFp.headerOrder.slice(0, 5).join(', ')}...`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test 2: Dynamic Referer from menu URLs
|
||||||
|
console.log('\n[Test 2] Dynamic Referer from Menu URLs:');
|
||||||
|
const testUrls = [
|
||||||
|
'https://dutchie.com/embedded-menu/harvest-of-tempe',
|
||||||
|
'https://dutchie.com/dispensary/zen-leaf-mesa',
|
||||||
|
'/embedded-menu/deeply-rooted',
|
||||||
|
'/dispensary/curaleaf-phoenix',
|
||||||
|
null,
|
||||||
|
undefined,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const url of testUrls) {
|
||||||
|
const referer = buildRefererFromMenuUrl(url);
|
||||||
|
console.log(` ${url || '(null/undefined)'}`);
|
||||||
|
console.log(` → ${referer}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test 3: Session with Dynamic Referer
|
||||||
|
console.log('\n[Test 3] Session with Dynamic Referer:');
|
||||||
|
const testMenuUrl = 'https://dutchie.com/dispensary/harvest-of-tempe';
|
||||||
|
console.log(` Starting session with menuUrl: ${testMenuUrl}`);
|
||||||
|
|
||||||
|
const session1 = startSession(testMenuUrl);
|
||||||
|
console.log(` Session ID: ${session1.sessionId}`);
|
||||||
|
console.log(` Browser: ${session1.fingerprint.browserName}`);
|
||||||
|
console.log(` Device: ${session1.fingerprint.deviceCategory}`);
|
||||||
|
console.log(` Referer: ${session1.referer}`);
|
||||||
|
console.log(` DNT: ${session1.fingerprint.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`);
|
||||||
|
console.log(` TLS: ${session1.fingerprint.httpFingerprint.curlImpersonateBinary}`);
|
||||||
|
|
||||||
|
// Test 4: Build Headers (browser-specific order)
|
||||||
|
console.log('\n[Test 4] Build Headers (browser-specific order):');
|
||||||
|
const { headers, orderedHeaders } = buildHeaders(true, 1000);
|
||||||
|
console.log(` Headers built for ${session1.fingerprint.browserName}:`);
|
||||||
|
console.log(` Order: ${orderedHeaders.join(' → ')}`);
|
||||||
|
console.log(` Sample headers:`);
|
||||||
|
console.log(` User-Agent: ${headers['User-Agent']?.slice(0, 50)}...`);
|
||||||
|
console.log(` Accept: ${headers['Accept']}`);
|
||||||
|
console.log(` Accept-Language: ${headers['Accept-Language']}`);
|
||||||
|
console.log(` Referer: ${headers['Referer']}`);
|
||||||
|
if (headers['sec-ch-ua']) {
|
||||||
|
console.log(` sec-ch-ua: ${headers['sec-ch-ua']}`);
|
||||||
|
}
|
||||||
|
if (headers['DNT']) {
|
||||||
|
console.log(` DNT: ${headers['DNT']}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
endSession();
|
||||||
|
|
||||||
|
// Test 5: Multiple Sessions (UA variety)
|
||||||
|
console.log('\n[Test 5] Multiple Sessions (UA & fingerprint variety):');
|
||||||
|
const sessions: {
|
||||||
|
browser: string;
|
||||||
|
device: string;
|
||||||
|
hasDNT: boolean;
|
||||||
|
}[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < 10; i++) {
|
||||||
|
const session = startSession(`/dispensary/store-${i}`);
|
||||||
|
sessions.push({
|
||||||
|
browser: session.fingerprint.browserName,
|
||||||
|
device: session.fingerprint.deviceCategory,
|
||||||
|
hasDNT: session.fingerprint.httpFingerprint.hasDNT,
|
||||||
|
});
|
||||||
|
endSession();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count distribution
|
||||||
|
const browserCounts: Record<string, number> = {};
|
||||||
|
const deviceCounts: Record<string, number> = {};
|
||||||
|
let dntCount = 0;
|
||||||
|
|
||||||
|
for (const s of sessions) {
|
||||||
|
browserCounts[s.browser] = (browserCounts[s.browser] || 0) + 1;
|
||||||
|
deviceCounts[s.device] = (deviceCounts[s.device] || 0) + 1;
|
||||||
|
if (s.hasDNT) dntCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(` 10 sessions created:`);
|
||||||
|
console.log(` Browsers: ${JSON.stringify(browserCounts)}`);
|
||||||
|
console.log(` Devices: ${JSON.stringify(deviceCounts)}`);
|
||||||
|
console.log(` DNT enabled: ${dntCount}/10 (expected ~30%)`);
|
||||||
|
|
||||||
|
// Test 6: Device distribution check (per workflow-12102025.md: 62/36/2)
|
||||||
|
console.log('\n[Test 6] Device Distribution (larger sample):');
|
||||||
|
const deviceSamples: string[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < 100; i++) {
|
||||||
|
const session = startSession();
|
||||||
|
deviceSamples.push(session.fingerprint.deviceCategory);
|
||||||
|
endSession();
|
||||||
|
}
|
||||||
|
|
||||||
|
const mobileCount = deviceSamples.filter(d => d === 'mobile').length;
|
||||||
|
const desktopCount = deviceSamples.filter(d => d === 'desktop').length;
|
||||||
|
const tabletCount = deviceSamples.filter(d => d === 'tablet').length;
|
||||||
|
|
||||||
|
console.log(` 100 sessions (expected: 62% mobile, 36% desktop, 2% tablet):`);
|
||||||
|
console.log(` Mobile: ${mobileCount}%`);
|
||||||
|
console.log(` Desktop: ${desktopCount}%`);
|
||||||
|
console.log(` Tablet: ${tabletCount}%`);
|
||||||
|
console.log(` Distribution: ${Math.abs(mobileCount - 62) < 15 && Math.abs(desktopCount - 36) < 15 ? '✅ Reasonable' : '⚠️ Off target'}`);
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('TEST COMPLETE');
|
||||||
|
console.log('='.repeat(60));
|
||||||
1202
backend/src/services/analytics/BrandIntelligenceService.ts
Normal file
1202
backend/src/services/analytics/BrandIntelligenceService.ts
Normal file
File diff suppressed because it is too large
Load Diff
@@ -26,6 +26,8 @@ import {
|
|||||||
PenetrationDataPoint,
|
PenetrationDataPoint,
|
||||||
BrandMarketPosition,
|
BrandMarketPosition,
|
||||||
BrandRecVsMedFootprint,
|
BrandRecVsMedFootprint,
|
||||||
|
BrandPromotionalSummary,
|
||||||
|
BrandPromotionalEvent,
|
||||||
} from './types';
|
} from './types';
|
||||||
|
|
||||||
export class BrandPenetrationService {
|
export class BrandPenetrationService {
|
||||||
@@ -44,16 +46,17 @@ export class BrandPenetrationService {
|
|||||||
// Get current brand presence
|
// Get current brand presence
|
||||||
const currentResult = await this.pool.query(`
|
const currentResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name,
|
sp.brand_name_raw AS brand_name,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries,
|
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries,
|
||||||
COUNT(*) AS total_skus,
|
COUNT(*) AS total_skus,
|
||||||
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus_per_dispensary,
|
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus_per_dispensary,
|
||||||
ARRAY_AGG(DISTINCT s.code) FILTER (WHERE s.code IS NOT NULL) AS states_present
|
ARRAY_AGG(DISTINCT s.code) FILTER (WHERE s.code IS NOT NULL) AS states_present
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
LEFT JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.brand_name = $1
|
LEFT JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.brand_name_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY sp.brand_name
|
GROUP BY sp.brand_name_raw
|
||||||
`, [brandName]);
|
`, [brandName]);
|
||||||
|
|
||||||
if (currentResult.rows.length === 0) {
|
if (currentResult.rows.length === 0) {
|
||||||
@@ -72,7 +75,7 @@ export class BrandPenetrationService {
|
|||||||
DATE(sps.captured_at) AS date,
|
DATE(sps.captured_at) AS date,
|
||||||
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
|
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
|
||||||
FROM store_product_snapshots sps
|
FROM store_product_snapshots sps
|
||||||
WHERE sps.brand_name = $1
|
WHERE sps.brand_name_raw = $1
|
||||||
AND sps.captured_at >= $2
|
AND sps.captured_at >= $2
|
||||||
AND sps.captured_at <= $3
|
AND sps.captured_at <= $3
|
||||||
AND sps.is_in_stock = TRUE
|
AND sps.is_in_stock = TRUE
|
||||||
@@ -123,8 +126,9 @@ export class BrandPenetrationService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
COUNT(*) AS sku_count
|
COUNT(*) AS sku_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.brand_name = $1
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.brand_name_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
||||||
),
|
),
|
||||||
@@ -133,7 +137,8 @@ export class BrandPenetrationService {
|
|||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries
|
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE sp.is_in_stock = TRUE
|
WHERE sp.is_in_stock = TRUE
|
||||||
GROUP BY s.code
|
GROUP BY s.code
|
||||||
)
|
)
|
||||||
@@ -169,7 +174,7 @@ export class BrandPenetrationService {
|
|||||||
let filters = '';
|
let filters = '';
|
||||||
|
|
||||||
if (options.category) {
|
if (options.category) {
|
||||||
filters += ` AND sp.category = $${paramIdx}`;
|
filters += ` AND sp.category_raw = $${paramIdx}`;
|
||||||
params.push(options.category);
|
params.push(options.category);
|
||||||
paramIdx++;
|
paramIdx++;
|
||||||
}
|
}
|
||||||
@@ -183,31 +188,33 @@ export class BrandPenetrationService {
|
|||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
WITH brand_metrics AS (
|
WITH brand_metrics AS (
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name,
|
sp.brand_name_raw AS brand_name,
|
||||||
sp.category,
|
sp.category_raw AS category,
|
||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
AVG(sp.price_rec) AS avg_price
|
AVG(sp.price_rec) AS avg_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.brand_name = $1
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.brand_name_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.category IS NOT NULL
|
AND sp.category_raw IS NOT NULL
|
||||||
${filters}
|
${filters}
|
||||||
GROUP BY sp.brand_name, sp.category, s.code
|
GROUP BY sp.brand_name_raw, sp.category_raw, s.code
|
||||||
),
|
),
|
||||||
category_totals AS (
|
category_totals AS (
|
||||||
SELECT
|
SELECT
|
||||||
sp.category,
|
sp.category_raw AS category,
|
||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
COUNT(*) AS total_skus,
|
COUNT(*) AS total_skus,
|
||||||
AVG(sp.price_rec) AS category_avg_price
|
AVG(sp.price_rec) AS category_avg_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE sp.is_in_stock = TRUE
|
WHERE sp.is_in_stock = TRUE
|
||||||
AND sp.category IS NOT NULL
|
AND sp.category_raw IS NOT NULL
|
||||||
GROUP BY sp.category, s.code
|
GROUP BY sp.category_raw, s.code
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
bm.*,
|
bm.*,
|
||||||
@@ -243,8 +250,9 @@ export class BrandPenetrationService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.brand_name = $1
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.brand_name_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND s.recreational_legal = TRUE
|
AND s.recreational_legal = TRUE
|
||||||
),
|
),
|
||||||
@@ -255,8 +263,9 @@ export class BrandPenetrationService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.brand_name = $1
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.brand_name_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND s.medical_legal = TRUE
|
AND s.medical_legal = TRUE
|
||||||
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
||||||
@@ -311,23 +320,24 @@ export class BrandPenetrationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (category) {
|
if (category) {
|
||||||
filters += ` AND sp.category = $${paramIdx}`;
|
filters += ` AND sp.category_raw = $${paramIdx}`;
|
||||||
params.push(category);
|
params.push(category);
|
||||||
paramIdx++;
|
paramIdx++;
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name,
|
sp.brand_name_raw AS brand_name,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT s.code) AS state_count
|
COUNT(DISTINCT s.code) AS state_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
LEFT JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.brand_name IS NOT NULL
|
LEFT JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.brand_name_raw IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
${filters}
|
${filters}
|
||||||
GROUP BY sp.brand_name
|
GROUP BY sp.brand_name_raw
|
||||||
ORDER BY dispensary_count DESC, sku_count DESC
|
ORDER BY dispensary_count DESC, sku_count DESC
|
||||||
LIMIT $1
|
LIMIT $1
|
||||||
`, params);
|
`, params);
|
||||||
@@ -358,23 +368,23 @@ export class BrandPenetrationService {
|
|||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
WITH start_counts AS (
|
WITH start_counts AS (
|
||||||
SELECT
|
SELECT
|
||||||
brand_name,
|
brand_name_raw AS brand_name,
|
||||||
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
||||||
FROM store_product_snapshots
|
FROM store_product_snapshots
|
||||||
WHERE captured_at >= $1 AND captured_at < $1 + INTERVAL '1 day'
|
WHERE captured_at >= $1 AND captured_at < $1 + INTERVAL '1 day'
|
||||||
AND brand_name IS NOT NULL
|
AND brand_name_raw IS NOT NULL
|
||||||
AND is_in_stock = TRUE
|
AND is_in_stock = TRUE
|
||||||
GROUP BY brand_name
|
GROUP BY brand_name_raw
|
||||||
),
|
),
|
||||||
end_counts AS (
|
end_counts AS (
|
||||||
SELECT
|
SELECT
|
||||||
brand_name,
|
brand_name_raw AS brand_name,
|
||||||
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
||||||
FROM store_product_snapshots
|
FROM store_product_snapshots
|
||||||
WHERE captured_at >= $2 - INTERVAL '1 day' AND captured_at <= $2
|
WHERE captured_at >= $2 - INTERVAL '1 day' AND captured_at <= $2
|
||||||
AND brand_name IS NOT NULL
|
AND brand_name_raw IS NOT NULL
|
||||||
AND is_in_stock = TRUE
|
AND is_in_stock = TRUE
|
||||||
GROUP BY brand_name
|
GROUP BY brand_name_raw
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
COALESCE(sc.brand_name, ec.brand_name) AS brand_name,
|
COALESCE(sc.brand_name, ec.brand_name) AS brand_name,
|
||||||
@@ -401,6 +411,225 @@ export class BrandPenetrationService {
|
|||||||
change_percent: row.change_percent ? parseFloat(row.change_percent) : 0,
|
change_percent: row.change_percent ? parseFloat(row.change_percent) : 0,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get brand promotional history
|
||||||
|
*
|
||||||
|
* Tracks when products went on special, how long, what discount,
|
||||||
|
* and estimated quantity sold during the promotion.
|
||||||
|
*/
|
||||||
|
async getBrandPromotionalHistory(
|
||||||
|
brandName: string,
|
||||||
|
options: { window?: TimeWindow; customRange?: DateRange; stateCode?: string; category?: string } = {}
|
||||||
|
): Promise<BrandPromotionalSummary> {
|
||||||
|
const { window = '90d', customRange, stateCode, category } = options;
|
||||||
|
const { start, end } = getDateRangeFromWindow(window, customRange);
|
||||||
|
|
||||||
|
// Build filters
|
||||||
|
const params: any[] = [brandName, start, end];
|
||||||
|
let paramIdx = 4;
|
||||||
|
let filters = '';
|
||||||
|
|
||||||
|
if (stateCode) {
|
||||||
|
filters += ` AND s.code = $${paramIdx}`;
|
||||||
|
params.push(stateCode);
|
||||||
|
paramIdx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (category) {
|
||||||
|
filters += ` AND sp.category_raw = $${paramIdx}`;
|
||||||
|
params.push(category);
|
||||||
|
paramIdx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find promotional events by detecting when is_on_special transitions to TRUE
|
||||||
|
// and tracking until it transitions back to FALSE
|
||||||
|
const eventsResult = await this.pool.query(`
|
||||||
|
WITH snapshot_with_lag AS (
|
||||||
|
SELECT
|
||||||
|
sps.id,
|
||||||
|
sps.store_product_id,
|
||||||
|
sps.dispensary_id,
|
||||||
|
sps.brand_name_raw,
|
||||||
|
sps.name_raw,
|
||||||
|
sps.category_raw,
|
||||||
|
sps.is_on_special,
|
||||||
|
sps.price_rec,
|
||||||
|
sps.price_rec_special,
|
||||||
|
sps.stock_quantity,
|
||||||
|
sps.captured_at,
|
||||||
|
LAG(sps.is_on_special) OVER (
|
||||||
|
PARTITION BY sps.store_product_id
|
||||||
|
ORDER BY sps.captured_at
|
||||||
|
) AS prev_is_on_special,
|
||||||
|
LAG(sps.stock_quantity) OVER (
|
||||||
|
PARTITION BY sps.store_product_id
|
||||||
|
ORDER BY sps.captured_at
|
||||||
|
) AS prev_stock_quantity
|
||||||
|
FROM store_product_snapshots sps
|
||||||
|
JOIN store_products sp ON sp.id = sps.store_product_id
|
||||||
|
JOIN dispensaries dd ON dd.id = sp.dispensary_id
|
||||||
|
LEFT JOIN states s ON s.id = dd.state_id
|
||||||
|
WHERE sps.brand_name_raw = $1
|
||||||
|
AND sps.captured_at >= $2
|
||||||
|
AND sps.captured_at <= $3
|
||||||
|
${filters}
|
||||||
|
),
|
||||||
|
special_starts AS (
|
||||||
|
-- Find when specials START (transition from not-on-special to on-special)
|
||||||
|
SELECT
|
||||||
|
store_product_id,
|
||||||
|
dispensary_id,
|
||||||
|
name_raw,
|
||||||
|
category_raw,
|
||||||
|
captured_at AS special_start,
|
||||||
|
price_rec AS regular_price,
|
||||||
|
price_rec_special AS special_price,
|
||||||
|
stock_quantity AS quantity_at_start
|
||||||
|
FROM snapshot_with_lag
|
||||||
|
WHERE is_on_special = TRUE
|
||||||
|
AND (prev_is_on_special = FALSE OR prev_is_on_special IS NULL)
|
||||||
|
AND price_rec_special IS NOT NULL
|
||||||
|
AND price_rec IS NOT NULL
|
||||||
|
),
|
||||||
|
special_ends AS (
|
||||||
|
-- Find when specials END (transition from on-special to not-on-special)
|
||||||
|
SELECT
|
||||||
|
store_product_id,
|
||||||
|
captured_at AS special_end,
|
||||||
|
prev_stock_quantity AS quantity_at_end
|
||||||
|
FROM snapshot_with_lag
|
||||||
|
WHERE is_on_special = FALSE
|
||||||
|
AND prev_is_on_special = TRUE
|
||||||
|
),
|
||||||
|
matched_events AS (
|
||||||
|
SELECT
|
||||||
|
ss.store_product_id,
|
||||||
|
ss.dispensary_id,
|
||||||
|
ss.name_raw AS product_name,
|
||||||
|
ss.category_raw AS category,
|
||||||
|
ss.special_start,
|
||||||
|
se.special_end,
|
||||||
|
ss.regular_price,
|
||||||
|
ss.special_price,
|
||||||
|
ss.quantity_at_start,
|
||||||
|
COALESCE(se.quantity_at_end, ss.quantity_at_start) AS quantity_at_end
|
||||||
|
FROM special_starts ss
|
||||||
|
LEFT JOIN special_ends se ON se.store_product_id = ss.store_product_id
|
||||||
|
AND se.special_end > ss.special_start
|
||||||
|
AND se.special_end = (
|
||||||
|
SELECT MIN(se2.special_end)
|
||||||
|
FROM special_ends se2
|
||||||
|
WHERE se2.store_product_id = ss.store_product_id
|
||||||
|
AND se2.special_end > ss.special_start
|
||||||
|
)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
me.store_product_id,
|
||||||
|
me.dispensary_id,
|
||||||
|
d.name AS dispensary_name,
|
||||||
|
s.code AS state_code,
|
||||||
|
me.product_name,
|
||||||
|
me.category,
|
||||||
|
me.special_start,
|
||||||
|
me.special_end,
|
||||||
|
EXTRACT(DAY FROM COALESCE(me.special_end, NOW()) - me.special_start)::INT AS duration_days,
|
||||||
|
me.regular_price,
|
||||||
|
me.special_price,
|
||||||
|
ROUND(((me.regular_price - me.special_price) / NULLIF(me.regular_price, 0)) * 100, 1) AS discount_percent,
|
||||||
|
me.quantity_at_start,
|
||||||
|
me.quantity_at_end,
|
||||||
|
GREATEST(0, COALESCE(me.quantity_at_start, 0) - COALESCE(me.quantity_at_end, 0)) AS quantity_sold_estimate
|
||||||
|
FROM matched_events me
|
||||||
|
JOIN dispensaries d ON d.id = me.dispensary_id
|
||||||
|
LEFT JOIN states s ON s.id = d.state_id
|
||||||
|
ORDER BY me.special_start DESC
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
const events: BrandPromotionalEvent[] = eventsResult.rows.map((row: any) => ({
|
||||||
|
product_name: row.product_name,
|
||||||
|
store_product_id: parseInt(row.store_product_id),
|
||||||
|
dispensary_id: parseInt(row.dispensary_id),
|
||||||
|
dispensary_name: row.dispensary_name,
|
||||||
|
state_code: row.state_code || 'Unknown',
|
||||||
|
category: row.category,
|
||||||
|
special_start: row.special_start.toISOString().split('T')[0],
|
||||||
|
special_end: row.special_end ? row.special_end.toISOString().split('T')[0] : null,
|
||||||
|
duration_days: row.duration_days ? parseInt(row.duration_days) : null,
|
||||||
|
regular_price: parseFloat(row.regular_price) || 0,
|
||||||
|
special_price: parseFloat(row.special_price) || 0,
|
||||||
|
discount_percent: parseFloat(row.discount_percent) || 0,
|
||||||
|
quantity_at_start: row.quantity_at_start ? parseInt(row.quantity_at_start) : null,
|
||||||
|
quantity_at_end: row.quantity_at_end ? parseInt(row.quantity_at_end) : null,
|
||||||
|
quantity_sold_estimate: row.quantity_sold_estimate ? parseInt(row.quantity_sold_estimate) : null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Calculate summary stats
|
||||||
|
const totalEvents = events.length;
|
||||||
|
const uniqueProducts = new Set(events.map(e => e.store_product_id)).size;
|
||||||
|
const uniqueDispensaries = new Set(events.map(e => e.dispensary_id)).size;
|
||||||
|
const uniqueStates = [...new Set(events.map(e => e.state_code))];
|
||||||
|
|
||||||
|
const avgDiscount = totalEvents > 0
|
||||||
|
? events.reduce((sum, e) => sum + e.discount_percent, 0) / totalEvents
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
const durations = events.filter(e => e.duration_days !== null).map(e => e.duration_days!);
|
||||||
|
const avgDuration = durations.length > 0
|
||||||
|
? durations.reduce((sum, d) => sum + d, 0) / durations.length
|
||||||
|
: null;
|
||||||
|
|
||||||
|
const totalQuantitySold = events
|
||||||
|
.filter(e => e.quantity_sold_estimate !== null)
|
||||||
|
.reduce((sum, e) => sum + (e.quantity_sold_estimate || 0), 0);
|
||||||
|
|
||||||
|
// Calculate frequency
|
||||||
|
const windowDays = Math.ceil((end.getTime() - start.getTime()) / (1000 * 60 * 60 * 24));
|
||||||
|
const weeklyAvg = windowDays > 0 ? (totalEvents / windowDays) * 7 : 0;
|
||||||
|
const monthlyAvg = windowDays > 0 ? (totalEvents / windowDays) * 30 : 0;
|
||||||
|
|
||||||
|
// Group by category
|
||||||
|
const categoryMap = new Map<string, { count: number; discounts: number[]; quantity: number }>();
|
||||||
|
for (const event of events) {
|
||||||
|
const cat = event.category || 'Uncategorized';
|
||||||
|
if (!categoryMap.has(cat)) {
|
||||||
|
categoryMap.set(cat, { count: 0, discounts: [], quantity: 0 });
|
||||||
|
}
|
||||||
|
const entry = categoryMap.get(cat)!;
|
||||||
|
entry.count++;
|
||||||
|
entry.discounts.push(event.discount_percent);
|
||||||
|
if (event.quantity_sold_estimate !== null) {
|
||||||
|
entry.quantity += event.quantity_sold_estimate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const byCategory = Array.from(categoryMap.entries()).map(([category, data]) => ({
|
||||||
|
category,
|
||||||
|
event_count: data.count,
|
||||||
|
avg_discount_percent: data.discounts.length > 0
|
||||||
|
? Math.round((data.discounts.reduce((a, b) => a + b, 0) / data.discounts.length) * 10) / 10
|
||||||
|
: 0,
|
||||||
|
quantity_sold_estimate: data.quantity > 0 ? data.quantity : null,
|
||||||
|
})).sort((a, b) => b.event_count - a.event_count);
|
||||||
|
|
||||||
|
return {
|
||||||
|
brand_name: brandName,
|
||||||
|
window,
|
||||||
|
total_promotional_events: totalEvents,
|
||||||
|
total_products_on_special: uniqueProducts,
|
||||||
|
total_dispensaries_with_specials: uniqueDispensaries,
|
||||||
|
states_with_specials: uniqueStates,
|
||||||
|
avg_discount_percent: Math.round(avgDiscount * 10) / 10,
|
||||||
|
avg_duration_days: avgDuration !== null ? Math.round(avgDuration * 10) / 10 : null,
|
||||||
|
total_quantity_sold_estimate: totalQuantitySold > 0 ? totalQuantitySold : null,
|
||||||
|
promotional_frequency: {
|
||||||
|
weekly_avg: Math.round(weeklyAvg * 10) / 10,
|
||||||
|
monthly_avg: Math.round(monthlyAvg * 10) / 10,
|
||||||
|
},
|
||||||
|
by_category: byCategory,
|
||||||
|
events,
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export default BrandPenetrationService;
|
export default BrandPenetrationService;
|
||||||
|
|||||||
@@ -43,14 +43,14 @@ export class CategoryAnalyticsService {
|
|||||||
// Get current category metrics
|
// Get current category metrics
|
||||||
const currentResult = await this.pool.query(`
|
const currentResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.category,
|
sp.category_raw,
|
||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
AVG(sp.price_rec) AS avg_price
|
AVG(sp.price_rec) AS avg_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
WHERE sp.category = $1
|
WHERE sp.category_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY sp.category
|
GROUP BY sp.category_raw
|
||||||
`, [category]);
|
`, [category]);
|
||||||
|
|
||||||
if (currentResult.rows.length === 0) {
|
if (currentResult.rows.length === 0) {
|
||||||
@@ -70,7 +70,7 @@ export class CategoryAnalyticsService {
|
|||||||
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count,
|
||||||
AVG(sps.price_rec) AS avg_price
|
AVG(sps.price_rec) AS avg_price
|
||||||
FROM store_product_snapshots sps
|
FROM store_product_snapshots sps
|
||||||
WHERE sps.category = $1
|
WHERE sps.category_raw = $1
|
||||||
AND sps.captured_at >= $2
|
AND sps.captured_at >= $2
|
||||||
AND sps.captured_at <= $3
|
AND sps.captured_at <= $3
|
||||||
AND sps.is_in_stock = TRUE
|
AND sps.is_in_stock = TRUE
|
||||||
@@ -111,8 +111,9 @@ export class CategoryAnalyticsService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
AVG(sp.price_rec) AS avg_price
|
AVG(sp.price_rec) AS avg_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.category = $1
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.category_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY s.code, s.name, s.recreational_legal
|
GROUP BY s.code, s.name, s.recreational_legal
|
||||||
ORDER BY sku_count DESC
|
ORDER BY sku_count DESC
|
||||||
@@ -154,24 +155,25 @@ export class CategoryAnalyticsService {
|
|||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.category,
|
sp.category_raw,
|
||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
COUNT(DISTINCT sp.brand_name) AS brand_count,
|
COUNT(DISTINCT sp.brand_name_raw) AS brand_count,
|
||||||
AVG(sp.price_rec) AS avg_price,
|
AVG(sp.price_rec) AS avg_price,
|
||||||
COUNT(DISTINCT s.code) AS state_count
|
COUNT(DISTINCT s.code) AS state_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
LEFT JOIN states s ON s.id = sp.state_id
|
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.category IS NOT NULL
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.category_raw IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
${stateFilter}
|
${stateFilter}
|
||||||
GROUP BY sp.category
|
GROUP BY sp.category_raw
|
||||||
ORDER BY sku_count DESC
|
ORDER BY sku_count DESC
|
||||||
LIMIT $1
|
LIMIT $1
|
||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
category: row.category,
|
category: row.category_raw,
|
||||||
sku_count: parseInt(row.sku_count),
|
sku_count: parseInt(row.sku_count),
|
||||||
dispensary_count: parseInt(row.dispensary_count),
|
dispensary_count: parseInt(row.dispensary_count),
|
||||||
brand_count: parseInt(row.brand_count),
|
brand_count: parseInt(row.brand_count),
|
||||||
@@ -188,14 +190,14 @@ export class CategoryAnalyticsService {
|
|||||||
let categoryFilter = '';
|
let categoryFilter = '';
|
||||||
|
|
||||||
if (category) {
|
if (category) {
|
||||||
categoryFilter = 'WHERE sp.category = $1';
|
categoryFilter = 'WHERE sp.category_raw = $1';
|
||||||
params.push(category);
|
params.push(category);
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
WITH category_stats AS (
|
WITH category_stats AS (
|
||||||
SELECT
|
SELECT
|
||||||
sp.category,
|
sp.category_raw,
|
||||||
CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END AS legal_type,
|
CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END AS legal_type,
|
||||||
COUNT(DISTINCT s.code) AS state_count,
|
COUNT(DISTINCT s.code) AS state_count,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
@@ -203,13 +205,14 @@ export class CategoryAnalyticsService {
|
|||||||
AVG(sp.price_rec) AS avg_price,
|
AVG(sp.price_rec) AS avg_price,
|
||||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
${categoryFilter}
|
${categoryFilter}
|
||||||
${category ? 'AND' : 'WHERE'} sp.category IS NOT NULL
|
${category ? 'AND' : 'WHERE'} sp.category_raw IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
||||||
GROUP BY sp.category, CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END
|
GROUP BY sp.category_raw, CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END
|
||||||
),
|
),
|
||||||
rec_stats AS (
|
rec_stats AS (
|
||||||
SELECT * FROM category_stats WHERE legal_type = 'recreational'
|
SELECT * FROM category_stats WHERE legal_type = 'recreational'
|
||||||
@@ -218,7 +221,7 @@ export class CategoryAnalyticsService {
|
|||||||
SELECT * FROM category_stats WHERE legal_type = 'medical_only'
|
SELECT * FROM category_stats WHERE legal_type = 'medical_only'
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
COALESCE(r.category, m.category) AS category,
|
COALESCE(r.category_raw, m.category_raw) AS category,
|
||||||
r.state_count AS rec_state_count,
|
r.state_count AS rec_state_count,
|
||||||
r.dispensary_count AS rec_dispensary_count,
|
r.dispensary_count AS rec_dispensary_count,
|
||||||
r.sku_count AS rec_sku_count,
|
r.sku_count AS rec_sku_count,
|
||||||
@@ -235,7 +238,7 @@ export class CategoryAnalyticsService {
|
|||||||
ELSE NULL
|
ELSE NULL
|
||||||
END AS price_diff_percent
|
END AS price_diff_percent
|
||||||
FROM rec_stats r
|
FROM rec_stats r
|
||||||
FULL OUTER JOIN med_stats m ON r.category = m.category
|
FULL OUTER JOIN med_stats m ON r.category_raw = m.category_raw
|
||||||
ORDER BY COALESCE(r.sku_count, 0) + COALESCE(m.sku_count, 0) DESC
|
ORDER BY COALESCE(r.sku_count, 0) + COALESCE(m.sku_count, 0) DESC
|
||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
@@ -282,7 +285,7 @@ export class CategoryAnalyticsService {
|
|||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
|
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
|
||||||
FROM store_product_snapshots sps
|
FROM store_product_snapshots sps
|
||||||
WHERE sps.category = $1
|
WHERE sps.category_raw = $1
|
||||||
AND sps.captured_at >= $2
|
AND sps.captured_at >= $2
|
||||||
AND sps.captured_at <= $3
|
AND sps.captured_at <= $3
|
||||||
AND sps.is_in_stock = TRUE
|
AND sps.is_in_stock = TRUE
|
||||||
@@ -335,31 +338,33 @@ export class CategoryAnalyticsService {
|
|||||||
WITH category_total AS (
|
WITH category_total AS (
|
||||||
SELECT COUNT(*) AS total
|
SELECT COUNT(*) AS total
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
LEFT JOIN states s ON s.id = sp.state_id
|
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.category = $1
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.category_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.brand_name IS NOT NULL
|
AND sp.brand_name_raw IS NOT NULL
|
||||||
${stateFilter}
|
${stateFilter}
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name,
|
sp.brand_name_raw,
|
||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
AVG(sp.price_rec) AS avg_price,
|
AVG(sp.price_rec) AS avg_price,
|
||||||
ROUND(COUNT(*)::NUMERIC * 100 / NULLIF((SELECT total FROM category_total), 0), 2) AS category_share_percent
|
ROUND(COUNT(*)::NUMERIC * 100 / NULLIF((SELECT total FROM category_total), 0), 2) AS category_share_percent
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
LEFT JOIN states s ON s.id = sp.state_id
|
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.category = $1
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.category_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.brand_name IS NOT NULL
|
AND sp.brand_name_raw IS NOT NULL
|
||||||
${stateFilter}
|
${stateFilter}
|
||||||
GROUP BY sp.brand_name
|
GROUP BY sp.brand_name_raw
|
||||||
ORDER BY sku_count DESC
|
ORDER BY sku_count DESC
|
||||||
LIMIT $2
|
LIMIT $2
|
||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
brand_name: row.brand_name,
|
brand_name: row.brand_name_raw,
|
||||||
sku_count: parseInt(row.sku_count),
|
sku_count: parseInt(row.sku_count),
|
||||||
dispensary_count: parseInt(row.dispensary_count),
|
dispensary_count: parseInt(row.dispensary_count),
|
||||||
avg_price: row.avg_price ? parseFloat(row.avg_price) : null,
|
avg_price: row.avg_price ? parseFloat(row.avg_price) : null,
|
||||||
@@ -421,7 +426,7 @@ export class CategoryAnalyticsService {
|
|||||||
`, [start, end, limit]);
|
`, [start, end, limit]);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
category: row.category,
|
category: row.category_raw,
|
||||||
start_sku_count: parseInt(row.start_sku_count),
|
start_sku_count: parseInt(row.start_sku_count),
|
||||||
end_sku_count: parseInt(row.end_sku_count),
|
end_sku_count: parseInt(row.end_sku_count),
|
||||||
growth: parseInt(row.growth),
|
growth: parseInt(row.growth),
|
||||||
|
|||||||
@@ -43,9 +43,9 @@ export class PriceAnalyticsService {
|
|||||||
const productResult = await this.pool.query(`
|
const productResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.id,
|
sp.id,
|
||||||
sp.name,
|
sp.name_raw,
|
||||||
sp.brand_name,
|
sp.brand_name_raw,
|
||||||
sp.category,
|
sp.category_raw,
|
||||||
sp.dispensary_id,
|
sp.dispensary_id,
|
||||||
sp.price_rec,
|
sp.price_rec,
|
||||||
sp.price_med,
|
sp.price_med,
|
||||||
@@ -53,7 +53,7 @@ export class PriceAnalyticsService {
|
|||||||
s.code AS state_code
|
s.code AS state_code
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
LEFT JOIN states s ON s.id = sp.state_id
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE sp.id = $1
|
WHERE sp.id = $1
|
||||||
`, [storeProductId]);
|
`, [storeProductId]);
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ export class PriceAnalyticsService {
|
|||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.category,
|
sp.category_raw,
|
||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
s.name AS state_name,
|
s.name AS state_name,
|
||||||
CASE
|
CASE
|
||||||
@@ -148,18 +148,18 @@ export class PriceAnalyticsService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE sp.category = $1
|
WHERE sp.category_raw = $1
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
||||||
${stateFilter}
|
${stateFilter}
|
||||||
GROUP BY sp.category, s.code, s.name, s.recreational_legal
|
GROUP BY sp.category_raw, s.code, s.name, s.recreational_legal
|
||||||
ORDER BY state_code
|
ORDER BY state_code
|
||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
category: row.category,
|
category: row.category_raw,
|
||||||
state_code: row.state_code,
|
state_code: row.state_code,
|
||||||
state_name: row.state_name,
|
state_name: row.state_name,
|
||||||
legal_type: row.legal_type,
|
legal_type: row.legal_type,
|
||||||
@@ -189,7 +189,7 @@ export class PriceAnalyticsService {
|
|||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name AS category,
|
sp.brand_name_raw AS category,
|
||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
s.name AS state_name,
|
s.name AS state_name,
|
||||||
CASE
|
CASE
|
||||||
@@ -204,18 +204,18 @@ export class PriceAnalyticsService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE sp.brand_name = $1
|
WHERE sp.brand_name_raw = $1
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
||||||
${stateFilter}
|
${stateFilter}
|
||||||
GROUP BY sp.brand_name, s.code, s.name, s.recreational_legal
|
GROUP BY sp.brand_name_raw, s.code, s.name, s.recreational_legal
|
||||||
ORDER BY state_code
|
ORDER BY state_code
|
||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
category: row.category,
|
category: row.category_raw,
|
||||||
state_code: row.state_code,
|
state_code: row.state_code,
|
||||||
state_name: row.state_name,
|
state_name: row.state_name,
|
||||||
legal_type: row.legal_type,
|
legal_type: row.legal_type,
|
||||||
@@ -254,7 +254,7 @@ export class PriceAnalyticsService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (category) {
|
if (category) {
|
||||||
filters += ` AND sp.category = $${paramIdx}`;
|
filters += ` AND sp.category_raw = $${paramIdx}`;
|
||||||
params.push(category);
|
params.push(category);
|
||||||
paramIdx++;
|
paramIdx++;
|
||||||
}
|
}
|
||||||
@@ -288,15 +288,16 @@ export class PriceAnalyticsService {
|
|||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
v.store_product_id,
|
v.store_product_id,
|
||||||
sp.name AS product_name,
|
sp.name_raw AS product_name,
|
||||||
sp.brand_name,
|
sp.brand_name_raw,
|
||||||
v.change_count,
|
v.change_count,
|
||||||
v.avg_change_pct,
|
v.avg_change_pct,
|
||||||
v.max_change_pct,
|
v.max_change_pct,
|
||||||
v.last_change_at
|
v.last_change_at
|
||||||
FROM volatility v
|
FROM volatility v
|
||||||
JOIN store_products sp ON sp.id = v.store_product_id
|
JOIN store_products sp ON sp.id = v.store_product_id
|
||||||
LEFT JOIN states s ON s.id = sp.state_id
|
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE 1=1 ${filters}
|
WHERE 1=1 ${filters}
|
||||||
ORDER BY v.change_count DESC, v.avg_change_pct DESC
|
ORDER BY v.change_count DESC, v.avg_change_pct DESC
|
||||||
LIMIT $3
|
LIMIT $3
|
||||||
@@ -305,7 +306,7 @@ export class PriceAnalyticsService {
|
|||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
store_product_id: row.store_product_id,
|
store_product_id: row.store_product_id,
|
||||||
product_name: row.product_name,
|
product_name: row.product_name,
|
||||||
brand_name: row.brand_name,
|
brand_name: row.brand_name_raw,
|
||||||
change_count: parseInt(row.change_count),
|
change_count: parseInt(row.change_count),
|
||||||
avg_change_percent: row.avg_change_pct ? parseFloat(row.avg_change_pct) : 0,
|
avg_change_percent: row.avg_change_pct ? parseFloat(row.avg_change_pct) : 0,
|
||||||
max_change_percent: row.max_change_pct ? parseFloat(row.max_change_pct) : 0,
|
max_change_percent: row.max_change_pct ? parseFloat(row.max_change_pct) : 0,
|
||||||
@@ -327,13 +328,13 @@ export class PriceAnalyticsService {
|
|||||||
let categoryFilter = '';
|
let categoryFilter = '';
|
||||||
|
|
||||||
if (category) {
|
if (category) {
|
||||||
categoryFilter = 'WHERE sp.category = $1';
|
categoryFilter = 'WHERE sp.category_raw = $1';
|
||||||
params.push(category);
|
params.push(category);
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.category,
|
sp.category_raw,
|
||||||
AVG(sp.price_rec) FILTER (WHERE s.recreational_legal = TRUE) AS rec_avg,
|
AVG(sp.price_rec) FILTER (WHERE s.recreational_legal = TRUE) AS rec_avg,
|
||||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
|
||||||
FILTER (WHERE s.recreational_legal = TRUE) AS rec_median,
|
FILTER (WHERE s.recreational_legal = TRUE) AS rec_median,
|
||||||
@@ -343,17 +344,18 @@ export class PriceAnalyticsService {
|
|||||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
|
||||||
FILTER (WHERE s.medical_legal = TRUE AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)) AS med_median
|
FILTER (WHERE s.medical_legal = TRUE AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)) AS med_median
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
${categoryFilter}
|
${categoryFilter}
|
||||||
${category ? 'AND' : 'WHERE'} sp.price_rec IS NOT NULL
|
${category ? 'AND' : 'WHERE'} sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.category IS NOT NULL
|
AND sp.category_raw IS NOT NULL
|
||||||
GROUP BY sp.category
|
GROUP BY sp.category_raw
|
||||||
ORDER BY sp.category
|
ORDER BY sp.category_raw
|
||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
category: row.category,
|
category: row.category_raw,
|
||||||
rec_avg: row.rec_avg ? parseFloat(row.rec_avg) : null,
|
rec_avg: row.rec_avg ? parseFloat(row.rec_avg) : null,
|
||||||
rec_median: row.rec_median ? parseFloat(row.rec_median) : null,
|
rec_median: row.rec_median ? parseFloat(row.rec_median) : null,
|
||||||
med_avg: row.med_avg ? parseFloat(row.med_avg) : null,
|
med_avg: row.med_avg ? parseFloat(row.med_avg) : null,
|
||||||
|
|||||||
@@ -108,14 +108,14 @@ export class StateAnalyticsService {
|
|||||||
SELECT
|
SELECT
|
||||||
COUNT(DISTINCT d.id) AS dispensary_count,
|
COUNT(DISTINCT d.id) AS dispensary_count,
|
||||||
COUNT(DISTINCT sp.id) AS product_count,
|
COUNT(DISTINCT sp.id) AS product_count,
|
||||||
COUNT(DISTINCT sp.brand_name) FILTER (WHERE sp.brand_name IS NOT NULL) AS brand_count,
|
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) AS brand_count,
|
||||||
COUNT(DISTINCT sp.category) FILTER (WHERE sp.category IS NOT NULL) AS category_count,
|
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS category_count,
|
||||||
COUNT(sps.id) AS snapshot_count,
|
COUNT(sps.id) AS snapshot_count,
|
||||||
MAX(sps.captured_at) AS last_crawl_at
|
MAX(sps.captured_at) AS last_crawl_at
|
||||||
FROM states s
|
FROM states s
|
||||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||||
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
||||||
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
||||||
WHERE s.code = $1
|
WHERE s.code = $1
|
||||||
`, [stateCode]);
|
`, [stateCode]);
|
||||||
|
|
||||||
@@ -129,7 +129,8 @@ export class StateAnalyticsService {
|
|||||||
MIN(price_rec) AS min_price,
|
MIN(price_rec) AS min_price,
|
||||||
MAX(price_rec) AS max_price
|
MAX(price_rec) AS max_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE s.code = $1
|
WHERE s.code = $1
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
@@ -140,14 +141,15 @@ export class StateAnalyticsService {
|
|||||||
// Get top categories
|
// Get top categories
|
||||||
const topCategoriesResult = await this.pool.query(`
|
const topCategoriesResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.category,
|
sp.category_raw,
|
||||||
COUNT(*) AS count
|
COUNT(*) AS count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE s.code = $1
|
WHERE s.code = $1
|
||||||
AND sp.category IS NOT NULL
|
AND sp.category_raw IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY sp.category
|
GROUP BY sp.category_raw
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
LIMIT 10
|
LIMIT 10
|
||||||
`, [stateCode]);
|
`, [stateCode]);
|
||||||
@@ -155,14 +157,15 @@ export class StateAnalyticsService {
|
|||||||
// Get top brands
|
// Get top brands
|
||||||
const topBrandsResult = await this.pool.query(`
|
const topBrandsResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name AS brand,
|
sp.brand_name_raw AS brand,
|
||||||
COUNT(*) AS count
|
COUNT(*) AS count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE s.code = $1
|
WHERE s.code = $1
|
||||||
AND sp.brand_name IS NOT NULL
|
AND sp.brand_name_raw IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY sp.brand_name
|
GROUP BY sp.brand_name_raw
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
LIMIT 10
|
LIMIT 10
|
||||||
`, [stateCode]);
|
`, [stateCode]);
|
||||||
@@ -191,7 +194,7 @@ export class StateAnalyticsService {
|
|||||||
max_price: pricing.max_price ? parseFloat(pricing.max_price) : null,
|
max_price: pricing.max_price ? parseFloat(pricing.max_price) : null,
|
||||||
},
|
},
|
||||||
top_categories: topCategoriesResult.rows.map((row: any) => ({
|
top_categories: topCategoriesResult.rows.map((row: any) => ({
|
||||||
category: row.category,
|
category: row.category_raw,
|
||||||
count: parseInt(row.count),
|
count: parseInt(row.count),
|
||||||
})),
|
})),
|
||||||
top_brands: topBrandsResult.rows.map((row: any) => ({
|
top_brands: topBrandsResult.rows.map((row: any) => ({
|
||||||
@@ -215,8 +218,8 @@ export class StateAnalyticsService {
|
|||||||
COUNT(sps.id) AS snapshot_count
|
COUNT(sps.id) AS snapshot_count
|
||||||
FROM states s
|
FROM states s
|
||||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||||
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
||||||
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
||||||
WHERE s.recreational_legal = TRUE
|
WHERE s.recreational_legal = TRUE
|
||||||
GROUP BY s.code, s.name
|
GROUP BY s.code, s.name
|
||||||
ORDER BY dispensary_count DESC
|
ORDER BY dispensary_count DESC
|
||||||
@@ -232,8 +235,8 @@ export class StateAnalyticsService {
|
|||||||
COUNT(sps.id) AS snapshot_count
|
COUNT(sps.id) AS snapshot_count
|
||||||
FROM states s
|
FROM states s
|
||||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||||
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
||||||
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
||||||
WHERE s.medical_legal = TRUE
|
WHERE s.medical_legal = TRUE
|
||||||
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
||||||
GROUP BY s.code, s.name
|
GROUP BY s.code, s.name
|
||||||
@@ -295,46 +298,48 @@ export class StateAnalyticsService {
|
|||||||
let groupBy = 'NULL';
|
let groupBy = 'NULL';
|
||||||
|
|
||||||
if (category) {
|
if (category) {
|
||||||
categoryFilter = 'AND sp.category = $1';
|
categoryFilter = 'AND sp.category_raw = $1';
|
||||||
params.push(category);
|
params.push(category);
|
||||||
groupBy = 'sp.category';
|
groupBy = 'sp.category_raw';
|
||||||
} else {
|
} else {
|
||||||
groupBy = 'sp.category';
|
groupBy = 'sp.category_raw';
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
WITH rec_prices AS (
|
WITH rec_prices AS (
|
||||||
SELECT
|
SELECT
|
||||||
${category ? 'sp.category' : 'sp.category'},
|
${category ? 'sp.category_raw' : 'sp.category_raw'},
|
||||||
COUNT(DISTINCT s.code) AS state_count,
|
COUNT(DISTINCT s.code) AS state_count,
|
||||||
COUNT(*) AS product_count,
|
COUNT(*) AS product_count,
|
||||||
AVG(sp.price_rec) AS avg_price,
|
AVG(sp.price_rec) AS avg_price,
|
||||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE s.recreational_legal = TRUE
|
WHERE s.recreational_legal = TRUE
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.category IS NOT NULL
|
AND sp.category_raw IS NOT NULL
|
||||||
${categoryFilter}
|
${categoryFilter}
|
||||||
GROUP BY sp.category
|
GROUP BY sp.category_raw
|
||||||
),
|
),
|
||||||
med_prices AS (
|
med_prices AS (
|
||||||
SELECT
|
SELECT
|
||||||
${category ? 'sp.category' : 'sp.category'},
|
${category ? 'sp.category_raw' : 'sp.category_raw'},
|
||||||
COUNT(DISTINCT s.code) AS state_count,
|
COUNT(DISTINCT s.code) AS state_count,
|
||||||
COUNT(*) AS product_count,
|
COUNT(*) AS product_count,
|
||||||
AVG(sp.price_rec) AS avg_price,
|
AVG(sp.price_rec) AS avg_price,
|
||||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE s.medical_legal = TRUE
|
WHERE s.medical_legal = TRUE
|
||||||
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.category IS NOT NULL
|
AND sp.category_raw IS NOT NULL
|
||||||
${categoryFilter}
|
${categoryFilter}
|
||||||
GROUP BY sp.category
|
GROUP BY sp.category_raw
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
COALESCE(r.category, m.category) AS category,
|
COALESCE(r.category, m.category) AS category,
|
||||||
@@ -357,7 +362,7 @@ export class StateAnalyticsService {
|
|||||||
`, params);
|
`, params);
|
||||||
|
|
||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
category: row.category,
|
category: row.category_raw,
|
||||||
recreational: {
|
recreational: {
|
||||||
state_count: parseInt(row.rec_state_count) || 0,
|
state_count: parseInt(row.rec_state_count) || 0,
|
||||||
product_count: parseInt(row.rec_product_count) || 0,
|
product_count: parseInt(row.rec_product_count) || 0,
|
||||||
@@ -395,12 +400,12 @@ export class StateAnalyticsService {
|
|||||||
COALESCE(s.medical_legal, FALSE) AS medical_legal,
|
COALESCE(s.medical_legal, FALSE) AS medical_legal,
|
||||||
COUNT(DISTINCT d.id) AS dispensary_count,
|
COUNT(DISTINCT d.id) AS dispensary_count,
|
||||||
COUNT(DISTINCT sp.id) AS product_count,
|
COUNT(DISTINCT sp.id) AS product_count,
|
||||||
COUNT(DISTINCT sp.brand_name) FILTER (WHERE sp.brand_name IS NOT NULL) AS brand_count,
|
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) AS brand_count,
|
||||||
MAX(sps.captured_at) AS last_crawl_at
|
MAX(sps.captured_at) AS last_crawl_at
|
||||||
FROM states s
|
FROM states s
|
||||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||||
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
||||||
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
||||||
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
||||||
ORDER BY dispensary_count DESC, s.name
|
ORDER BY dispensary_count DESC, s.name
|
||||||
`);
|
`);
|
||||||
@@ -451,8 +456,8 @@ export class StateAnalyticsService {
|
|||||||
END AS gap_reason
|
END AS gap_reason
|
||||||
FROM states s
|
FROM states s
|
||||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||||
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
||||||
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
||||||
WHERE s.recreational_legal = TRUE OR s.medical_legal = TRUE
|
WHERE s.recreational_legal = TRUE OR s.medical_legal = TRUE
|
||||||
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
||||||
HAVING COUNT(DISTINCT d.id) = 0
|
HAVING COUNT(DISTINCT d.id) = 0
|
||||||
@@ -499,7 +504,8 @@ export class StateAnalyticsService {
|
|||||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price,
|
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price,
|
||||||
COUNT(*) AS product_count
|
COUNT(*) AS product_count
|
||||||
FROM states s
|
FROM states s
|
||||||
JOIN store_products sp ON sp.state_id = s.id
|
JOIN dispensaries d ON d.state_id = s.id
|
||||||
|
JOIN store_products sp ON sp.dispensary_id = d.id
|
||||||
WHERE sp.price_rec IS NOT NULL
|
WHERE sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
||||||
|
|||||||
@@ -89,22 +89,22 @@ export class StoreAnalyticsService {
|
|||||||
// Get brands added/dropped
|
// Get brands added/dropped
|
||||||
const brandsResult = await this.pool.query(`
|
const brandsResult = await this.pool.query(`
|
||||||
WITH start_brands AS (
|
WITH start_brands AS (
|
||||||
SELECT DISTINCT brand_name
|
SELECT DISTINCT brand_name_raw
|
||||||
FROM store_product_snapshots
|
FROM store_product_snapshots
|
||||||
WHERE dispensary_id = $1
|
WHERE dispensary_id = $1
|
||||||
AND captured_at >= $2 AND captured_at < $2 + INTERVAL '1 day'
|
AND captured_at >= $2::timestamp AND captured_at < $2::timestamp + INTERVAL '1 day'
|
||||||
AND brand_name IS NOT NULL
|
AND brand_name_raw IS NOT NULL
|
||||||
),
|
),
|
||||||
end_brands AS (
|
end_brands AS (
|
||||||
SELECT DISTINCT brand_name
|
SELECT DISTINCT brand_name_raw
|
||||||
FROM store_product_snapshots
|
FROM store_product_snapshots
|
||||||
WHERE dispensary_id = $1
|
WHERE dispensary_id = $1
|
||||||
AND captured_at >= $3 - INTERVAL '1 day' AND captured_at <= $3
|
AND captured_at >= $3::timestamp - INTERVAL '1 day' AND captured_at <= $3::timestamp
|
||||||
AND brand_name IS NOT NULL
|
AND brand_name_raw IS NOT NULL
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
ARRAY(SELECT brand_name FROM end_brands EXCEPT SELECT brand_name FROM start_brands) AS added,
|
ARRAY(SELECT brand_name_raw FROM end_brands EXCEPT SELECT brand_name_raw FROM start_brands) AS added,
|
||||||
ARRAY(SELECT brand_name FROM start_brands EXCEPT SELECT brand_name FROM end_brands) AS dropped
|
ARRAY(SELECT brand_name_raw FROM start_brands EXCEPT SELECT brand_name_raw FROM end_brands) AS dropped
|
||||||
`, [dispensaryId, start, end]);
|
`, [dispensaryId, start, end]);
|
||||||
|
|
||||||
const brands = brandsResult.rows[0] || { added: [], dropped: [] };
|
const brands = brandsResult.rows[0] || { added: [], dropped: [] };
|
||||||
@@ -184,9 +184,9 @@ export class StoreAnalyticsService {
|
|||||||
-- Products added
|
-- Products added
|
||||||
SELECT
|
SELECT
|
||||||
sp.id AS store_product_id,
|
sp.id AS store_product_id,
|
||||||
sp.name AS product_name,
|
sp.name_raw AS product_name,
|
||||||
sp.brand_name,
|
sp.brand_name_raw,
|
||||||
sp.category,
|
sp.category_raw,
|
||||||
'added' AS event_type,
|
'added' AS event_type,
|
||||||
sp.first_seen_at AS event_date,
|
sp.first_seen_at AS event_date,
|
||||||
NULL::TEXT AS old_value,
|
NULL::TEXT AS old_value,
|
||||||
@@ -201,9 +201,9 @@ export class StoreAnalyticsService {
|
|||||||
-- Stock in/out from snapshots
|
-- Stock in/out from snapshots
|
||||||
SELECT
|
SELECT
|
||||||
sps.store_product_id,
|
sps.store_product_id,
|
||||||
sp.name AS product_name,
|
sp.name_raw AS product_name,
|
||||||
sp.brand_name,
|
sp.brand_name_raw,
|
||||||
sp.category,
|
sp.category_raw,
|
||||||
CASE
|
CASE
|
||||||
WHEN sps.is_in_stock = TRUE AND LAG(sps.is_in_stock) OVER w = FALSE THEN 'stock_in'
|
WHEN sps.is_in_stock = TRUE AND LAG(sps.is_in_stock) OVER w = FALSE THEN 'stock_in'
|
||||||
WHEN sps.is_in_stock = FALSE AND LAG(sps.is_in_stock) OVER w = TRUE THEN 'stock_out'
|
WHEN sps.is_in_stock = FALSE AND LAG(sps.is_in_stock) OVER w = TRUE THEN 'stock_out'
|
||||||
@@ -224,9 +224,9 @@ export class StoreAnalyticsService {
|
|||||||
-- Price changes from snapshots
|
-- Price changes from snapshots
|
||||||
SELECT
|
SELECT
|
||||||
sps.store_product_id,
|
sps.store_product_id,
|
||||||
sp.name AS product_name,
|
sp.name_raw AS product_name,
|
||||||
sp.brand_name,
|
sp.brand_name_raw,
|
||||||
sp.category,
|
sp.category_raw,
|
||||||
'price_change' AS event_type,
|
'price_change' AS event_type,
|
||||||
sps.captured_at AS event_date,
|
sps.captured_at AS event_date,
|
||||||
LAG(sps.price_rec::TEXT) OVER w AS old_value,
|
LAG(sps.price_rec::TEXT) OVER w AS old_value,
|
||||||
@@ -250,8 +250,8 @@ export class StoreAnalyticsService {
|
|||||||
return result.rows.map((row: any) => ({
|
return result.rows.map((row: any) => ({
|
||||||
store_product_id: row.store_product_id,
|
store_product_id: row.store_product_id,
|
||||||
product_name: row.product_name,
|
product_name: row.product_name,
|
||||||
brand_name: row.brand_name,
|
brand_name: row.brand_name_raw,
|
||||||
category: row.category,
|
category: row.category_raw,
|
||||||
event_type: row.event_type,
|
event_type: row.event_type,
|
||||||
event_date: row.event_date ? row.event_date.toISOString() : null,
|
event_date: row.event_date ? row.event_date.toISOString() : null,
|
||||||
old_value: row.old_value,
|
old_value: row.old_value,
|
||||||
@@ -259,6 +259,122 @@ export class StoreAnalyticsService {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get quantity changes for a store (increases/decreases)
|
||||||
|
* Useful for estimating sales (decreases) or restocks (increases)
|
||||||
|
*
|
||||||
|
* @param direction - 'decrease' for likely sales, 'increase' for restocks, 'all' for both
|
||||||
|
*/
|
||||||
|
async getQuantityChanges(
|
||||||
|
dispensaryId: number,
|
||||||
|
options: {
|
||||||
|
window?: TimeWindow;
|
||||||
|
customRange?: DateRange;
|
||||||
|
direction?: 'increase' | 'decrease' | 'all';
|
||||||
|
limit?: number;
|
||||||
|
} = {}
|
||||||
|
): Promise<{
|
||||||
|
dispensary_id: number;
|
||||||
|
window: TimeWindow;
|
||||||
|
direction: string;
|
||||||
|
total_changes: number;
|
||||||
|
total_units_decreased: number;
|
||||||
|
total_units_increased: number;
|
||||||
|
changes: Array<{
|
||||||
|
store_product_id: number;
|
||||||
|
product_name: string;
|
||||||
|
brand_name: string | null;
|
||||||
|
category: string | null;
|
||||||
|
old_quantity: number;
|
||||||
|
new_quantity: number;
|
||||||
|
quantity_delta: number;
|
||||||
|
direction: 'increase' | 'decrease';
|
||||||
|
captured_at: string;
|
||||||
|
}>;
|
||||||
|
}> {
|
||||||
|
const { window = '7d', customRange, direction = 'all', limit = 100 } = options;
|
||||||
|
const { start, end } = getDateRangeFromWindow(window, customRange);
|
||||||
|
|
||||||
|
// Build direction filter
|
||||||
|
let directionFilter = '';
|
||||||
|
if (direction === 'decrease') {
|
||||||
|
directionFilter = 'AND qty_delta < 0';
|
||||||
|
} else if (direction === 'increase') {
|
||||||
|
directionFilter = 'AND qty_delta > 0';
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await this.pool.query(`
|
||||||
|
WITH qty_changes AS (
|
||||||
|
SELECT
|
||||||
|
sps.store_product_id,
|
||||||
|
sp.name_raw AS product_name,
|
||||||
|
sp.brand_name_raw AS brand_name,
|
||||||
|
sp.category_raw AS category,
|
||||||
|
LAG(sps.stock_quantity) OVER w AS old_quantity,
|
||||||
|
sps.stock_quantity AS new_quantity,
|
||||||
|
sps.stock_quantity - LAG(sps.stock_quantity) OVER w AS qty_delta,
|
||||||
|
sps.captured_at
|
||||||
|
FROM store_product_snapshots sps
|
||||||
|
JOIN store_products sp ON sp.id = sps.store_product_id
|
||||||
|
WHERE sps.dispensary_id = $1
|
||||||
|
AND sps.captured_at >= $2
|
||||||
|
AND sps.captured_at <= $3
|
||||||
|
AND sps.stock_quantity IS NOT NULL
|
||||||
|
WINDOW w AS (PARTITION BY sps.store_product_id ORDER BY sps.captured_at)
|
||||||
|
)
|
||||||
|
SELECT *
|
||||||
|
FROM qty_changes
|
||||||
|
WHERE old_quantity IS NOT NULL
|
||||||
|
AND qty_delta != 0
|
||||||
|
${directionFilter}
|
||||||
|
ORDER BY captured_at DESC
|
||||||
|
LIMIT $4
|
||||||
|
`, [dispensaryId, start, end, limit]);
|
||||||
|
|
||||||
|
// Calculate totals
|
||||||
|
const totalsResult = await this.pool.query(`
|
||||||
|
WITH qty_changes AS (
|
||||||
|
SELECT
|
||||||
|
sps.stock_quantity - LAG(sps.stock_quantity) OVER w AS qty_delta
|
||||||
|
FROM store_product_snapshots sps
|
||||||
|
WHERE sps.dispensary_id = $1
|
||||||
|
AND sps.captured_at >= $2
|
||||||
|
AND sps.captured_at <= $3
|
||||||
|
AND sps.stock_quantity IS NOT NULL
|
||||||
|
AND sps.store_product_id IS NOT NULL
|
||||||
|
WINDOW w AS (PARTITION BY sps.store_product_id ORDER BY sps.captured_at)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE qty_delta != 0) AS total_changes,
|
||||||
|
COALESCE(SUM(ABS(qty_delta)) FILTER (WHERE qty_delta < 0), 0) AS units_decreased,
|
||||||
|
COALESCE(SUM(qty_delta) FILTER (WHERE qty_delta > 0), 0) AS units_increased
|
||||||
|
FROM qty_changes
|
||||||
|
WHERE qty_delta IS NOT NULL
|
||||||
|
`, [dispensaryId, start, end]);
|
||||||
|
|
||||||
|
const totals = totalsResult.rows[0] || {};
|
||||||
|
|
||||||
|
return {
|
||||||
|
dispensary_id: dispensaryId,
|
||||||
|
window,
|
||||||
|
direction,
|
||||||
|
total_changes: parseInt(totals.total_changes) || 0,
|
||||||
|
total_units_decreased: parseInt(totals.units_decreased) || 0,
|
||||||
|
total_units_increased: parseInt(totals.units_increased) || 0,
|
||||||
|
changes: result.rows.map((row: any) => ({
|
||||||
|
store_product_id: row.store_product_id,
|
||||||
|
product_name: row.product_name,
|
||||||
|
brand_name: row.brand_name_raw,
|
||||||
|
category: row.category_raw,
|
||||||
|
old_quantity: row.old_quantity,
|
||||||
|
new_quantity: row.new_quantity,
|
||||||
|
quantity_delta: row.qty_delta,
|
||||||
|
direction: row.qty_delta > 0 ? 'increase' : 'decrease',
|
||||||
|
captured_at: row.captured_at?.toISOString() || null,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get store inventory composition (categories and brands breakdown)
|
* Get store inventory composition (categories and brands breakdown)
|
||||||
*/
|
*/
|
||||||
@@ -299,14 +415,14 @@ export class StoreAnalyticsService {
|
|||||||
// Get top brands
|
// Get top brands
|
||||||
const brandsResult = await this.pool.query(`
|
const brandsResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
brand_name AS brand,
|
brand_name_raw AS brand,
|
||||||
COUNT(*) AS count,
|
COUNT(*) AS count,
|
||||||
ROUND(COUNT(*)::NUMERIC * 100 / NULLIF($2, 0), 2) AS percent
|
ROUND(COUNT(*)::NUMERIC * 100 / NULLIF($2, 0), 2) AS percent
|
||||||
FROM store_products
|
FROM store_products
|
||||||
WHERE dispensary_id = $1
|
WHERE dispensary_id = $1
|
||||||
AND brand_name IS NOT NULL
|
AND brand_name_raw IS NOT NULL
|
||||||
AND is_in_stock = TRUE
|
AND is_in_stock = TRUE
|
||||||
GROUP BY brand_name
|
GROUP BY brand_name_raw
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
LIMIT 20
|
LIMIT 20
|
||||||
`, [dispensaryId, totalProducts]);
|
`, [dispensaryId, totalProducts]);
|
||||||
@@ -316,7 +432,7 @@ export class StoreAnalyticsService {
|
|||||||
in_stock_count: parseInt(totals.in_stock) || 0,
|
in_stock_count: parseInt(totals.in_stock) || 0,
|
||||||
out_of_stock_count: parseInt(totals.out_of_stock) || 0,
|
out_of_stock_count: parseInt(totals.out_of_stock) || 0,
|
||||||
categories: categoriesResult.rows.map((row: any) => ({
|
categories: categoriesResult.rows.map((row: any) => ({
|
||||||
category: row.category,
|
category: row.category_raw,
|
||||||
count: parseInt(row.count),
|
count: parseInt(row.count),
|
||||||
percent: parseFloat(row.percent) || 0,
|
percent: parseFloat(row.percent) || 0,
|
||||||
})),
|
})),
|
||||||
@@ -458,23 +574,24 @@ export class StoreAnalyticsService {
|
|||||||
),
|
),
|
||||||
market_prices AS (
|
market_prices AS (
|
||||||
SELECT
|
SELECT
|
||||||
sp.category,
|
sp.category_raw,
|
||||||
AVG(sp.price_rec) AS market_avg
|
AVG(sp.price_rec) AS market_avg
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
WHERE sp.state_id = $2
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
WHERE d.state_id = $2
|
||||||
AND sp.price_rec IS NOT NULL
|
AND sp.price_rec IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.category IS NOT NULL
|
AND sp.category_raw IS NOT NULL
|
||||||
GROUP BY sp.category
|
GROUP BY sp.category_raw
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
sp.category,
|
sp.category_raw,
|
||||||
sp.store_avg AS store_avg_price,
|
sp.store_avg AS store_avg_price,
|
||||||
mp.market_avg AS market_avg_price,
|
mp.market_avg AS market_avg_price,
|
||||||
ROUND(((sp.store_avg - mp.market_avg) / NULLIF(mp.market_avg, 0) * 100)::NUMERIC, 2) AS price_vs_market_percent,
|
ROUND(((sp.store_avg - mp.market_avg) / NULLIF(mp.market_avg, 0) * 100)::NUMERIC, 2) AS price_vs_market_percent,
|
||||||
sp.product_count
|
sp.product_count
|
||||||
FROM store_prices sp
|
FROM store_prices sp
|
||||||
LEFT JOIN market_prices mp ON mp.category = sp.category
|
LEFT JOIN market_prices mp ON mp.category = sp.category_raw
|
||||||
ORDER BY sp.product_count DESC
|
ORDER BY sp.product_count DESC
|
||||||
`, [dispensaryId, dispensary.state_id]);
|
`, [dispensaryId, dispensary.state_id]);
|
||||||
|
|
||||||
@@ -486,9 +603,10 @@ export class StoreAnalyticsService {
|
|||||||
WHERE dispensary_id = $1 AND price_rec IS NOT NULL AND is_in_stock = TRUE
|
WHERE dispensary_id = $1 AND price_rec IS NOT NULL AND is_in_stock = TRUE
|
||||||
),
|
),
|
||||||
market_avg AS (
|
market_avg AS (
|
||||||
SELECT AVG(price_rec) AS avg
|
SELECT AVG(sp.price_rec) AS avg
|
||||||
FROM store_products
|
FROM store_products sp
|
||||||
WHERE state_id = $2 AND price_rec IS NOT NULL AND is_in_stock = TRUE
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
WHERE d.state_id = $2 AND sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
ROUND(((sa.avg - ma.avg) / NULLIF(ma.avg, 0) * 100)::NUMERIC, 2) AS price_vs_market
|
ROUND(((sa.avg - ma.avg) / NULLIF(ma.avg, 0) * 100)::NUMERIC, 2) AS price_vs_market
|
||||||
@@ -499,7 +617,7 @@ export class StoreAnalyticsService {
|
|||||||
dispensary_id: dispensaryId,
|
dispensary_id: dispensaryId,
|
||||||
dispensary_name: dispensary.name,
|
dispensary_name: dispensary.name,
|
||||||
categories: result.rows.map((row: any) => ({
|
categories: result.rows.map((row: any) => ({
|
||||||
category: row.category,
|
category: row.category_raw,
|
||||||
store_avg_price: parseFloat(row.store_avg_price),
|
store_avg_price: parseFloat(row.store_avg_price),
|
||||||
market_avg_price: row.market_avg_price ? parseFloat(row.market_avg_price) : 0,
|
market_avg_price: row.market_avg_price ? parseFloat(row.market_avg_price) : 0,
|
||||||
price_vs_market_percent: row.price_vs_market_percent ? parseFloat(row.price_vs_market_percent) : 0,
|
price_vs_market_percent: row.price_vs_market_percent ? parseFloat(row.price_vs_market_percent) : 0,
|
||||||
|
|||||||
@@ -11,3 +11,4 @@ export { BrandPenetrationService } from './BrandPenetrationService';
|
|||||||
export { CategoryAnalyticsService } from './CategoryAnalyticsService';
|
export { CategoryAnalyticsService } from './CategoryAnalyticsService';
|
||||||
export { StoreAnalyticsService } from './StoreAnalyticsService';
|
export { StoreAnalyticsService } from './StoreAnalyticsService';
|
||||||
export { StateAnalyticsService } from './StateAnalyticsService';
|
export { StateAnalyticsService } from './StateAnalyticsService';
|
||||||
|
export { BrandIntelligenceService } from './BrandIntelligenceService';
|
||||||
|
|||||||
@@ -322,3 +322,48 @@ export interface RecVsMedPriceComparison {
|
|||||||
};
|
};
|
||||||
price_diff_percent: number | null;
|
price_diff_percent: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// BRAND PROMOTIONAL ANALYTICS TYPES
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface BrandPromotionalEvent {
|
||||||
|
product_name: string;
|
||||||
|
store_product_id: number;
|
||||||
|
dispensary_id: number;
|
||||||
|
dispensary_name: string;
|
||||||
|
state_code: string;
|
||||||
|
category: string | null;
|
||||||
|
special_start: string; // ISO date when special started
|
||||||
|
special_end: string | null; // ISO date when special ended (null if ongoing)
|
||||||
|
duration_days: number | null;
|
||||||
|
regular_price: number;
|
||||||
|
special_price: number;
|
||||||
|
discount_percent: number;
|
||||||
|
quantity_at_start: number | null;
|
||||||
|
quantity_at_end: number | null;
|
||||||
|
quantity_sold_estimate: number | null; // quantity_at_start - quantity_at_end
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface BrandPromotionalSummary {
|
||||||
|
brand_name: string;
|
||||||
|
window: TimeWindow;
|
||||||
|
total_promotional_events: number;
|
||||||
|
total_products_on_special: number;
|
||||||
|
total_dispensaries_with_specials: number;
|
||||||
|
states_with_specials: string[];
|
||||||
|
avg_discount_percent: number;
|
||||||
|
avg_duration_days: number | null;
|
||||||
|
total_quantity_sold_estimate: number | null;
|
||||||
|
promotional_frequency: {
|
||||||
|
weekly_avg: number;
|
||||||
|
monthly_avg: number;
|
||||||
|
};
|
||||||
|
by_category: Array<{
|
||||||
|
category: string;
|
||||||
|
event_count: number;
|
||||||
|
avg_discount_percent: number;
|
||||||
|
quantity_sold_estimate: number | null;
|
||||||
|
}>;
|
||||||
|
events: BrandPromotionalEvent[];
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,49 +1,53 @@
|
|||||||
/**
|
/**
|
||||||
* Crawl Rotator - Proxy & User Agent Rotation for Crawlers
|
* Crawl Rotator - Proxy & User Agent Rotation for Crawlers
|
||||||
*
|
*
|
||||||
* Manages rotation of proxies and user agents to avoid blocks.
|
* Updated: 2025-12-10 per workflow-12102025.md
|
||||||
* Used by platform-specific crawlers (Dutchie, Jane, etc.)
|
*
|
||||||
|
* KEY BEHAVIORS (per workflow-12102025.md):
|
||||||
|
* 1. Task determines WHAT work to do, proxy determines SESSION IDENTITY
|
||||||
|
* 2. Proxy location (timezone) sets Accept-Language headers (always English)
|
||||||
|
* 3. On 403: immediately get new IP, new fingerprint, retry
|
||||||
|
* 4. After 3 consecutive 403s on same proxy with different fingerprints → disable proxy
|
||||||
|
*
|
||||||
|
* USER-AGENT GENERATION (per workflow-12102025.md):
|
||||||
|
* - Device distribution: Mobile 62%, Desktop 36%, Tablet 2%
|
||||||
|
* - Browser whitelist: Chrome, Safari, Edge, Firefox only
|
||||||
|
* - UA sticks until IP rotates
|
||||||
|
* - Failure = alert admin + stop crawl (no fallback)
|
||||||
|
*
|
||||||
|
* Uses intoli/user-agents for realistic UA generation with daily-updated data.
|
||||||
*
|
*
|
||||||
* Canonical location: src/services/crawl-rotator.ts
|
* Canonical location: src/services/crawl-rotator.ts
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Pool } from 'pg';
|
import { Pool } from 'pg';
|
||||||
|
import UserAgent from 'user-agents';
|
||||||
|
import {
|
||||||
|
HTTPFingerprint,
|
||||||
|
generateHTTPFingerprint,
|
||||||
|
BrowserType,
|
||||||
|
} from './http-fingerprint';
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// USER AGENT CONFIGURATION
|
// UA CONSTANTS (per workflow-12102025.md)
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Modern browser user agents (Chrome, Firefox, Safari, Edge on various platforms)
|
* Per workflow-12102025.md: Device category distribution (hardcoded)
|
||||||
* Updated: 2024
|
* Mobile: 62%, Desktop: 36%, Tablet: 2%
|
||||||
*/
|
*/
|
||||||
export const USER_AGENTS = [
|
const DEVICE_WEIGHTS = {
|
||||||
// Chrome on Windows
|
mobile: 62,
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
desktop: 36,
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
|
tablet: 2,
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
|
} as const;
|
||||||
|
|
||||||
// Chrome on macOS
|
/**
|
||||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
* Per workflow-12102025.md: Browser whitelist
|
||||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
|
* Only Chrome (67%), Safari (20%), Edge (6%), Firefox (3%)
|
||||||
|
* Samsung Internet, Opera, and other niche browsers are filtered out
|
||||||
// Firefox on Windows
|
*/
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
|
const ALLOWED_BROWSERS = ['Chrome', 'Safari', 'Edge', 'Firefox'] as const;
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0',
|
|
||||||
|
|
||||||
// Firefox on macOS
|
|
||||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0',
|
|
||||||
|
|
||||||
// Safari on macOS
|
|
||||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
|
|
||||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
|
|
||||||
|
|
||||||
// Edge on Windows
|
|
||||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
|
|
||||||
|
|
||||||
// Chrome on Linux
|
|
||||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
||||||
];
|
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// PROXY TYPES
|
// PROXY TYPES
|
||||||
@@ -61,6 +65,18 @@ export interface Proxy {
|
|||||||
failureCount: number;
|
failureCount: number;
|
||||||
successCount: number;
|
successCount: number;
|
||||||
avgResponseTimeMs: number | null;
|
avgResponseTimeMs: number | null;
|
||||||
|
maxConnections: number;
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Track consecutive 403s with different fingerprints.
|
||||||
|
* After 3 consecutive 403s → disable proxy (it's burned).
|
||||||
|
*/
|
||||||
|
consecutive403Count: number;
|
||||||
|
// Location info - determines session headers per workflow-12102025.md
|
||||||
|
city?: string;
|
||||||
|
state?: string;
|
||||||
|
country?: string;
|
||||||
|
countryCode?: string;
|
||||||
|
timezone?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ProxyStats {
|
export interface ProxyStats {
|
||||||
@@ -70,6 +86,40 @@ export interface ProxyStats {
|
|||||||
avgSuccessRate: number;
|
avgSuccessRate: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// FINGERPRINT TYPE
|
||||||
|
// Per workflow-12102025.md: Full browser fingerprint from user-agents
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface BrowserFingerprint {
|
||||||
|
userAgent: string;
|
||||||
|
platform: string;
|
||||||
|
screenWidth: number;
|
||||||
|
screenHeight: number;
|
||||||
|
viewportWidth: number;
|
||||||
|
viewportHeight: number;
|
||||||
|
deviceCategory: string;
|
||||||
|
browserName: string; // Per workflow-12102025.md: for session logging
|
||||||
|
// Derived headers for anti-detect
|
||||||
|
acceptLanguage: string;
|
||||||
|
secChUa?: string;
|
||||||
|
secChUaPlatform?: string;
|
||||||
|
secChUaMobile?: string;
|
||||||
|
// Per workflow-12102025.md: HTTP Fingerprinting section
|
||||||
|
httpFingerprint: HTTPFingerprint;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Session log entry for debugging blocked sessions
|
||||||
|
*/
|
||||||
|
export interface UASessionLog {
|
||||||
|
deviceCategory: string;
|
||||||
|
browserName: string;
|
||||||
|
userAgent: string;
|
||||||
|
proxyIp: string | null;
|
||||||
|
sessionStartedAt: Date;
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// PROXY ROTATOR CLASS
|
// PROXY ROTATOR CLASS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -84,9 +134,6 @@ export class ProxyRotator {
|
|||||||
this.pool = pool || null;
|
this.pool = pool || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize with database pool
|
|
||||||
*/
|
|
||||||
setPool(pool: Pool): void {
|
setPool(pool: Pool): void {
|
||||||
this.pool = pool;
|
this.pool = pool;
|
||||||
}
|
}
|
||||||
@@ -109,20 +156,28 @@ export class ProxyRotator {
|
|||||||
username,
|
username,
|
||||||
password,
|
password,
|
||||||
protocol,
|
protocol,
|
||||||
is_active as "isActive",
|
active as "isActive",
|
||||||
last_used_at as "lastUsedAt",
|
last_tested_at as "lastUsedAt",
|
||||||
failure_count as "failureCount",
|
failure_count as "failureCount",
|
||||||
success_count as "successCount",
|
0 as "successCount",
|
||||||
avg_response_time_ms as "avgResponseTimeMs"
|
response_time_ms as "avgResponseTimeMs",
|
||||||
|
COALESCE(max_connections, 1) as "maxConnections",
|
||||||
|
COALESCE(consecutive_403_count, 0) as "consecutive403Count",
|
||||||
|
city,
|
||||||
|
state,
|
||||||
|
country,
|
||||||
|
country_code as "countryCode",
|
||||||
|
timezone
|
||||||
FROM proxies
|
FROM proxies
|
||||||
WHERE is_active = true
|
WHERE active = true
|
||||||
ORDER BY failure_count ASC, last_used_at ASC NULLS FIRST
|
ORDER BY failure_count ASC, last_tested_at ASC NULLS FIRST
|
||||||
`);
|
`);
|
||||||
|
|
||||||
this.proxies = result.rows;
|
this.proxies = result.rows;
|
||||||
console.log(`[ProxyRotator] Loaded ${this.proxies.length} active proxies`);
|
|
||||||
|
const totalCapacity = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
|
||||||
|
console.log(`[ProxyRotator] Loaded ${this.proxies.length} active proxies (${totalCapacity} max concurrent connections)`);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Table might not exist - that's okay
|
|
||||||
console.warn(`[ProxyRotator] Could not load proxies: ${error}`);
|
console.warn(`[ProxyRotator] Could not load proxies: ${error}`);
|
||||||
this.proxies = [];
|
this.proxies = [];
|
||||||
}
|
}
|
||||||
@@ -134,7 +189,6 @@ export class ProxyRotator {
|
|||||||
getNext(): Proxy | null {
|
getNext(): Proxy | null {
|
||||||
if (this.proxies.length === 0) return null;
|
if (this.proxies.length === 0) return null;
|
||||||
|
|
||||||
// Round-robin rotation
|
|
||||||
this.currentIndex = (this.currentIndex + 1) % this.proxies.length;
|
this.currentIndex = (this.currentIndex + 1) % this.proxies.length;
|
||||||
this.lastRotation = new Date();
|
this.lastRotation = new Date();
|
||||||
|
|
||||||
@@ -169,19 +223,27 @@ export class ProxyRotator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mark proxy as failed (temporarily remove from rotation)
|
* Mark proxy as blocked (403 received)
|
||||||
|
* Per workflow-12102025.md:
|
||||||
|
* - Increment consecutive_403_count
|
||||||
|
* - After 3 consecutive 403s with different fingerprints → disable proxy
|
||||||
|
* - This is separate from general failures (timeouts, etc.)
|
||||||
*/
|
*/
|
||||||
async markFailed(proxyId: number, error?: string): Promise<void> {
|
async markBlocked(proxyId: number): Promise<boolean> {
|
||||||
// Update in-memory
|
|
||||||
const proxy = this.proxies.find(p => p.id === proxyId);
|
const proxy = this.proxies.find(p => p.id === proxyId);
|
||||||
if (proxy) {
|
let shouldDisable = false;
|
||||||
proxy.failureCount++;
|
|
||||||
|
|
||||||
// Deactivate if too many failures
|
if (proxy) {
|
||||||
if (proxy.failureCount >= 5) {
|
proxy.consecutive403Count++;
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: 3 consecutive 403s → proxy is burned
|
||||||
|
if (proxy.consecutive403Count >= 3) {
|
||||||
proxy.isActive = false;
|
proxy.isActive = false;
|
||||||
this.proxies = this.proxies.filter(p => p.id !== proxyId);
|
this.proxies = this.proxies.filter(p => p.id !== proxyId);
|
||||||
console.log(`[ProxyRotator] Proxy ${proxyId} deactivated after ${proxy.failureCount} failures`);
|
console.log(`[ProxyRotator] Proxy ${proxyId} DISABLED after ${proxy.consecutive403Count} consecutive 403s (burned)`);
|
||||||
|
shouldDisable = true;
|
||||||
|
} else {
|
||||||
|
console.log(`[ProxyRotator] Proxy ${proxyId} blocked (403 #${proxy.consecutive403Count}/3)`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -191,12 +253,49 @@ export class ProxyRotator {
|
|||||||
await this.pool.query(`
|
await this.pool.query(`
|
||||||
UPDATE proxies
|
UPDATE proxies
|
||||||
SET
|
SET
|
||||||
failure_count = failure_count + 1,
|
consecutive_403_count = COALESCE(consecutive_403_count, 0) + 1,
|
||||||
last_failure_at = NOW(),
|
last_failure_at = NOW(),
|
||||||
last_error = $2,
|
test_result = '403 Forbidden',
|
||||||
is_active = CASE WHEN failure_count >= 4 THEN false ELSE is_active END
|
active = CASE WHEN COALESCE(consecutive_403_count, 0) >= 2 THEN false ELSE active END,
|
||||||
|
updated_at = NOW()
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, [proxyId, error || null]);
|
`, [proxyId]);
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`[ProxyRotator] Failed to update proxy ${proxyId}:`, err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return shouldDisable;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark proxy as failed (general error - timeout, connection error, etc.)
|
||||||
|
* Separate from 403 blocking per workflow-12102025.md
|
||||||
|
*/
|
||||||
|
async markFailed(proxyId: number, error?: string): Promise<void> {
|
||||||
|
const proxy = this.proxies.find(p => p.id === proxyId);
|
||||||
|
if (proxy) {
|
||||||
|
proxy.failureCount++;
|
||||||
|
|
||||||
|
// Deactivate if too many general failures
|
||||||
|
if (proxy.failureCount >= 5) {
|
||||||
|
proxy.isActive = false;
|
||||||
|
this.proxies = this.proxies.filter(p => p.id !== proxyId);
|
||||||
|
console.log(`[ProxyRotator] Proxy ${proxyId} deactivated after ${proxy.failureCount} general failures`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.pool) {
|
||||||
|
try {
|
||||||
|
await this.pool.query(`
|
||||||
|
UPDATE proxies
|
||||||
|
SET
|
||||||
|
failure_count = failure_count + 1,
|
||||||
|
updated_at = NOW(),
|
||||||
|
test_result = $2,
|
||||||
|
active = CASE WHEN failure_count >= 4 THEN false ELSE active END
|
||||||
|
WHERE id = $1
|
||||||
|
`, [proxyId, error || 'failed']);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(`[ProxyRotator] Failed to update proxy ${proxyId}:`, err);
|
console.error(`[ProxyRotator] Failed to update proxy ${proxyId}:`, err);
|
||||||
}
|
}
|
||||||
@@ -204,34 +303,35 @@ export class ProxyRotator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mark proxy as successful
|
* Mark proxy as successful - resets consecutive 403 count
|
||||||
|
* Per workflow-12102025.md: successful request clears the 403 counter
|
||||||
*/
|
*/
|
||||||
async markSuccess(proxyId: number, responseTimeMs?: number): Promise<void> {
|
async markSuccess(proxyId: number, responseTimeMs?: number): Promise<void> {
|
||||||
// Update in-memory
|
|
||||||
const proxy = this.proxies.find(p => p.id === proxyId);
|
const proxy = this.proxies.find(p => p.id === proxyId);
|
||||||
if (proxy) {
|
if (proxy) {
|
||||||
proxy.successCount++;
|
proxy.successCount++;
|
||||||
|
proxy.consecutive403Count = 0; // Reset on success per workflow-12102025.md
|
||||||
proxy.lastUsedAt = new Date();
|
proxy.lastUsedAt = new Date();
|
||||||
if (responseTimeMs !== undefined) {
|
if (responseTimeMs !== undefined) {
|
||||||
// Rolling average
|
|
||||||
proxy.avgResponseTimeMs = proxy.avgResponseTimeMs
|
proxy.avgResponseTimeMs = proxy.avgResponseTimeMs
|
||||||
? (proxy.avgResponseTimeMs * 0.8) + (responseTimeMs * 0.2)
|
? (proxy.avgResponseTimeMs * 0.8) + (responseTimeMs * 0.2)
|
||||||
: responseTimeMs;
|
: responseTimeMs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update database
|
|
||||||
if (this.pool) {
|
if (this.pool) {
|
||||||
try {
|
try {
|
||||||
await this.pool.query(`
|
await this.pool.query(`
|
||||||
UPDATE proxies
|
UPDATE proxies
|
||||||
SET
|
SET
|
||||||
success_count = success_count + 1,
|
last_tested_at = NOW(),
|
||||||
last_used_at = NOW(),
|
test_result = 'success',
|
||||||
avg_response_time_ms = CASE
|
consecutive_403_count = 0,
|
||||||
WHEN avg_response_time_ms IS NULL THEN $2
|
response_time_ms = CASE
|
||||||
ELSE (avg_response_time_ms * 0.8) + ($2 * 0.2)
|
WHEN response_time_ms IS NULL THEN $2
|
||||||
END
|
ELSE (response_time_ms * 0.8 + $2 * 0.2)::integer
|
||||||
|
END,
|
||||||
|
updated_at = NOW()
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, [proxyId, responseTimeMs || null]);
|
`, [proxyId, responseTimeMs || null]);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -255,8 +355,8 @@ export class ProxyRotator {
|
|||||||
*/
|
*/
|
||||||
getStats(): ProxyStats {
|
getStats(): ProxyStats {
|
||||||
const totalProxies = this.proxies.length;
|
const totalProxies = this.proxies.length;
|
||||||
const activeProxies = this.proxies.filter(p => p.isActive).length;
|
const activeProxies = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
|
||||||
const blockedProxies = this.proxies.filter(p => p.failureCount >= 5).length;
|
const blockedProxies = this.proxies.filter(p => p.failureCount >= 5 || p.consecutive403Count >= 3).length;
|
||||||
|
|
||||||
const successRates = this.proxies
|
const successRates = this.proxies
|
||||||
.filter(p => p.successCount + p.failureCount > 0)
|
.filter(p => p.successCount + p.failureCount > 0)
|
||||||
@@ -274,9 +374,6 @@ export class ProxyRotator {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if proxy pool has available proxies
|
|
||||||
*/
|
|
||||||
hasAvailableProxies(): boolean {
|
hasAvailableProxies(): boolean {
|
||||||
return this.proxies.length > 0;
|
return this.proxies.length > 0;
|
||||||
}
|
}
|
||||||
@@ -284,53 +381,194 @@ export class ProxyRotator {
|
|||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// USER AGENT ROTATOR CLASS
|
// USER AGENT ROTATOR CLASS
|
||||||
|
// Per workflow-12102025.md: Uses intoli/user-agents for realistic fingerprints
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
export class UserAgentRotator {
|
export class UserAgentRotator {
|
||||||
private userAgents: string[];
|
private currentFingerprint: BrowserFingerprint | null = null;
|
||||||
private currentIndex: number = 0;
|
private sessionLog: UASessionLog | null = null;
|
||||||
private lastRotation: Date = new Date();
|
|
||||||
|
|
||||||
constructor(userAgents: string[] = USER_AGENTS) {
|
constructor() {
|
||||||
this.userAgents = userAgents;
|
// Per workflow-12102025.md: Initialize with first fingerprint
|
||||||
// Start at random index to avoid patterns
|
this.rotate();
|
||||||
this.currentIndex = Math.floor(Math.random() * userAgents.length);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get next user agent in rotation
|
* Per workflow-12102025.md: Roll device category based on distribution
|
||||||
|
* Mobile: 62%, Desktop: 36%, Tablet: 2%
|
||||||
*/
|
*/
|
||||||
getNext(): string {
|
private rollDeviceCategory(): 'mobile' | 'desktop' | 'tablet' {
|
||||||
this.currentIndex = (this.currentIndex + 1) % this.userAgents.length;
|
const roll = Math.random() * 100;
|
||||||
this.lastRotation = new Date();
|
if (roll < DEVICE_WEIGHTS.mobile) {
|
||||||
return this.userAgents[this.currentIndex];
|
return 'mobile';
|
||||||
|
} else if (roll < DEVICE_WEIGHTS.mobile + DEVICE_WEIGHTS.desktop) {
|
||||||
|
return 'desktop';
|
||||||
|
} else {
|
||||||
|
return 'tablet';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get current user agent without rotating
|
* Per workflow-12102025.md: Extract browser name from UA string
|
||||||
*/
|
*/
|
||||||
getCurrent(): string {
|
private extractBrowserName(userAgent: string): string {
|
||||||
return this.userAgents[this.currentIndex];
|
if (userAgent.includes('Edg/')) return 'Edge';
|
||||||
|
if (userAgent.includes('Firefox/')) return 'Firefox';
|
||||||
|
if (userAgent.includes('Safari/') && !userAgent.includes('Chrome/')) return 'Safari';
|
||||||
|
if (userAgent.includes('Chrome/')) return 'Chrome';
|
||||||
|
return 'Unknown';
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get a random user agent
|
* Per workflow-12102025.md: Check if browser is in whitelist
|
||||||
*/
|
*/
|
||||||
getRandom(): string {
|
private isAllowedBrowser(userAgent: string): boolean {
|
||||||
const index = Math.floor(Math.random() * this.userAgents.length);
|
const browserName = this.extractBrowserName(userAgent);
|
||||||
return this.userAgents[index];
|
return ALLOWED_BROWSERS.includes(browserName as typeof ALLOWED_BROWSERS[number]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get total available user agents
|
* Generate a new random fingerprint
|
||||||
|
* Per workflow-12102025.md:
|
||||||
|
* - Roll device category (62/36/2)
|
||||||
|
* - Filter to top 4 browsers only
|
||||||
|
* - Failure = alert admin + stop (no fallback)
|
||||||
*/
|
*/
|
||||||
|
rotate(proxyIp?: string): BrowserFingerprint {
|
||||||
|
// Per workflow-12102025.md: Roll device category
|
||||||
|
const deviceCategory = this.rollDeviceCategory();
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Generate UA filtered to device category
|
||||||
|
const generator = new UserAgent({ deviceCategory });
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Try to get an allowed browser (max 50 attempts)
|
||||||
|
let ua: ReturnType<typeof generator>;
|
||||||
|
let attempts = 0;
|
||||||
|
const maxAttempts = 50;
|
||||||
|
|
||||||
|
do {
|
||||||
|
ua = generator();
|
||||||
|
attempts++;
|
||||||
|
} while (!this.isAllowedBrowser(ua.data.userAgent) && attempts < maxAttempts);
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: If we can't get allowed browser, this is a failure
|
||||||
|
if (!this.isAllowedBrowser(ua.data.userAgent)) {
|
||||||
|
const errorMsg = `[UserAgentRotator] CRITICAL: Failed to generate allowed browser after ${maxAttempts} attempts. Device: ${deviceCategory}. Last UA: ${ua.data.userAgent}`;
|
||||||
|
console.error(errorMsg);
|
||||||
|
// Per workflow-12102025.md: Alert admin + stop crawl
|
||||||
|
// TODO: Post alert to admin dashboard
|
||||||
|
throw new Error(errorMsg);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = ua.data;
|
||||||
|
const browserName = this.extractBrowserName(data.userAgent);
|
||||||
|
|
||||||
|
// Build sec-ch-ua headers from user agent string
|
||||||
|
const secChUa = this.buildSecChUa(data.userAgent, deviceCategory);
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: HTTP Fingerprinting - generate full HTTP fingerprint
|
||||||
|
const httpFingerprint = generateHTTPFingerprint(browserName as BrowserType);
|
||||||
|
|
||||||
|
this.currentFingerprint = {
|
||||||
|
userAgent: data.userAgent,
|
||||||
|
platform: data.platform,
|
||||||
|
screenWidth: data.screenWidth,
|
||||||
|
screenHeight: data.screenHeight,
|
||||||
|
viewportWidth: data.viewportWidth,
|
||||||
|
viewportHeight: data.viewportHeight,
|
||||||
|
deviceCategory: data.deviceCategory,
|
||||||
|
browserName, // Per workflow-12102025.md: for session logging
|
||||||
|
// Per workflow-12102025.md: always English
|
||||||
|
acceptLanguage: 'en-US,en;q=0.9',
|
||||||
|
...secChUa,
|
||||||
|
// Per workflow-12102025.md: HTTP Fingerprinting section
|
||||||
|
httpFingerprint,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Log session data
|
||||||
|
this.sessionLog = {
|
||||||
|
deviceCategory,
|
||||||
|
browserName,
|
||||||
|
userAgent: data.userAgent,
|
||||||
|
proxyIp: proxyIp || null,
|
||||||
|
sessionStartedAt: new Date(),
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`[UserAgentRotator] New fingerprint: device=${deviceCategory}, browser=${browserName}, UA=${data.userAgent.slice(0, 50)}...`);
|
||||||
|
return this.currentFingerprint;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current fingerprint without rotating
|
||||||
|
*/
|
||||||
|
getCurrent(): BrowserFingerprint {
|
||||||
|
if (!this.currentFingerprint) {
|
||||||
|
return this.rotate();
|
||||||
|
}
|
||||||
|
return this.currentFingerprint;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a random fingerprint (rotates and returns)
|
||||||
|
*/
|
||||||
|
getRandom(proxyIp?: string): BrowserFingerprint {
|
||||||
|
return this.rotate(proxyIp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Get session log for debugging
|
||||||
|
*/
|
||||||
|
getSessionLog(): UASessionLog | null {
|
||||||
|
return this.sessionLog;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build sec-ch-ua headers from user agent string
|
||||||
|
* Per workflow-12102025.md: Include mobile indicator based on device category
|
||||||
|
*/
|
||||||
|
private buildSecChUa(userAgent: string, deviceCategory: string): { secChUa?: string; secChUaPlatform?: string; secChUaMobile?: string } {
|
||||||
|
const isMobile = deviceCategory === 'mobile' || deviceCategory === 'tablet';
|
||||||
|
|
||||||
|
// Extract Chrome version if present
|
||||||
|
const chromeMatch = userAgent.match(/Chrome\/(\d+)/);
|
||||||
|
const edgeMatch = userAgent.match(/Edg\/(\d+)/);
|
||||||
|
|
||||||
|
if (edgeMatch) {
|
||||||
|
const version = edgeMatch[1];
|
||||||
|
return {
|
||||||
|
secChUa: `"Microsoft Edge";v="${version}", "Chromium";v="${version}", "Not_A Brand";v="24"`,
|
||||||
|
secChUaPlatform: userAgent.includes('Windows') ? '"Windows"' : userAgent.includes('Android') ? '"Android"' : '"macOS"',
|
||||||
|
secChUaMobile: isMobile ? '?1' : '?0',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (chromeMatch) {
|
||||||
|
const version = chromeMatch[1];
|
||||||
|
let platform = '"Linux"';
|
||||||
|
if (userAgent.includes('Windows')) platform = '"Windows"';
|
||||||
|
else if (userAgent.includes('Mac')) platform = '"macOS"';
|
||||||
|
else if (userAgent.includes('Android')) platform = '"Android"';
|
||||||
|
else if (userAgent.includes('iPhone') || userAgent.includes('iPad')) platform = '"iOS"';
|
||||||
|
|
||||||
|
return {
|
||||||
|
secChUa: `"Google Chrome";v="${version}", "Chromium";v="${version}", "Not_A Brand";v="24"`,
|
||||||
|
secChUaPlatform: platform,
|
||||||
|
secChUaMobile: isMobile ? '?1' : '?0',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Firefox/Safari don't send sec-ch-ua
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
getCount(): number {
|
getCount(): number {
|
||||||
return this.userAgents.length;
|
return 1; // user-agents generates dynamically
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// COMBINED ROTATOR (for convenience)
|
// COMBINED ROTATOR
|
||||||
|
// Per workflow-12102025.md: Coordinates proxy + fingerprint rotation
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
export class CrawlRotator {
|
export class CrawlRotator {
|
||||||
@@ -342,49 +580,51 @@ export class CrawlRotator {
|
|||||||
this.userAgent = new UserAgentRotator();
|
this.userAgent = new UserAgentRotator();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize rotator (load proxies from DB)
|
|
||||||
*/
|
|
||||||
async initialize(): Promise<void> {
|
async initialize(): Promise<void> {
|
||||||
await this.proxy.loadProxies();
|
await this.proxy.loadProxies();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rotate proxy only
|
* Rotate proxy only (get new IP)
|
||||||
*/
|
*/
|
||||||
rotateProxy(): Proxy | null {
|
rotateProxy(): Proxy | null {
|
||||||
return this.proxy.getNext();
|
return this.proxy.getNext();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rotate user agent only
|
* Rotate fingerprint only (new UA, screen size, etc.)
|
||||||
*/
|
*/
|
||||||
rotateUserAgent(): string {
|
rotateFingerprint(): BrowserFingerprint {
|
||||||
return this.userAgent.getNext();
|
return this.userAgent.rotate();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rotate both proxy and user agent
|
* Rotate both proxy and fingerprint
|
||||||
|
* Per workflow-12102025.md: called on 403 for fresh identity
|
||||||
|
* Passes proxy IP to UA rotation for session logging
|
||||||
*/
|
*/
|
||||||
rotateBoth(): { proxy: Proxy | null; userAgent: string } {
|
rotateBoth(): { proxy: Proxy | null; fingerprint: BrowserFingerprint } {
|
||||||
|
const proxy = this.proxy.getNext();
|
||||||
|
const proxyIp = proxy ? proxy.host : undefined;
|
||||||
return {
|
return {
|
||||||
proxy: this.proxy.getNext(),
|
proxy,
|
||||||
userAgent: this.userAgent.getNext(),
|
fingerprint: this.userAgent.rotate(proxyIp),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get current proxy and user agent without rotating
|
* Get current proxy and fingerprint without rotating
|
||||||
*/
|
*/
|
||||||
getCurrent(): { proxy: Proxy | null; userAgent: string } {
|
getCurrent(): { proxy: Proxy | null; fingerprint: BrowserFingerprint } {
|
||||||
return {
|
return {
|
||||||
proxy: this.proxy.getCurrent(),
|
proxy: this.proxy.getCurrent(),
|
||||||
userAgent: this.userAgent.getCurrent(),
|
fingerprint: this.userAgent.getCurrent(),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Record success for current proxy
|
* Record success for current proxy
|
||||||
|
* Per workflow-12102025.md: resets consecutive 403 count
|
||||||
*/
|
*/
|
||||||
async recordSuccess(responseTimeMs?: number): Promise<void> {
|
async recordSuccess(responseTimeMs?: number): Promise<void> {
|
||||||
const current = this.proxy.getCurrent();
|
const current = this.proxy.getCurrent();
|
||||||
@@ -394,7 +634,20 @@ export class CrawlRotator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Record failure for current proxy
|
* Record 403 block for current proxy
|
||||||
|
* Per workflow-12102025.md: increments consecutive_403_count, disables after 3
|
||||||
|
* Returns true if proxy was disabled
|
||||||
|
*/
|
||||||
|
async recordBlock(): Promise<boolean> {
|
||||||
|
const current = this.proxy.getCurrent();
|
||||||
|
if (current) {
|
||||||
|
return await this.proxy.markBlocked(current.id);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record general failure (not 403)
|
||||||
*/
|
*/
|
||||||
async recordFailure(error?: string): Promise<void> {
|
async recordFailure(error?: string): Promise<void> {
|
||||||
const current = this.proxy.getCurrent();
|
const current = this.proxy.getCurrent();
|
||||||
@@ -402,6 +655,34 @@ export class CrawlRotator {
|
|||||||
await this.proxy.markFailed(current.id, error);
|
await this.proxy.markFailed(current.id, error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current proxy location info
|
||||||
|
* Per workflow-12102025.md: proxy location determines session headers
|
||||||
|
*/
|
||||||
|
getProxyLocation(): { city?: string; state?: string; country?: string; timezone?: string; isRotating: boolean } | null {
|
||||||
|
const current = this.proxy.getCurrent();
|
||||||
|
if (!current) return null;
|
||||||
|
|
||||||
|
const isRotating = current.maxConnections > 1;
|
||||||
|
|
||||||
|
return {
|
||||||
|
city: current.city,
|
||||||
|
state: current.state,
|
||||||
|
country: current.country,
|
||||||
|
timezone: current.timezone,
|
||||||
|
isRotating
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get timezone from current proxy
|
||||||
|
* Per workflow-12102025.md: used for Accept-Language header
|
||||||
|
*/
|
||||||
|
getProxyTimezone(): string | undefined {
|
||||||
|
const current = this.proxy.getCurrent();
|
||||||
|
return current?.timezone;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
315
backend/src/services/http-fingerprint.ts
Normal file
315
backend/src/services/http-fingerprint.ts
Normal file
@@ -0,0 +1,315 @@
|
|||||||
|
/**
|
||||||
|
* HTTP Fingerprinting Service
|
||||||
|
*
|
||||||
|
* Per workflow-12102025.md - HTTP Fingerprinting section:
|
||||||
|
* - Full header set per browser type
|
||||||
|
* - Browser-specific header ordering
|
||||||
|
* - Natural randomization (DNT, Accept quality)
|
||||||
|
* - Dynamic Referer per dispensary
|
||||||
|
*
|
||||||
|
* Canonical location: src/services/http-fingerprint.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// TYPES
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export type BrowserType = 'Chrome' | 'Firefox' | 'Safari' | 'Edge';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Full HTTP fingerprint for a session
|
||||||
|
*/
|
||||||
|
export interface HTTPFingerprint {
|
||||||
|
browserType: BrowserType;
|
||||||
|
headers: Record<string, string>;
|
||||||
|
headerOrder: string[];
|
||||||
|
curlImpersonateBinary: string;
|
||||||
|
hasDNT: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Context for building headers
|
||||||
|
*/
|
||||||
|
export interface HeaderContext {
|
||||||
|
userAgent: string;
|
||||||
|
secChUa?: string;
|
||||||
|
secChUaPlatform?: string;
|
||||||
|
secChUaMobile?: string;
|
||||||
|
referer: string;
|
||||||
|
isPost: boolean;
|
||||||
|
contentLength?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// CONSTANTS (per workflow-12102025.md)
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: DNT header distribution (~30% of users)
|
||||||
|
*/
|
||||||
|
const DNT_PROBABILITY = 0.30;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Accept header variations for natural traffic
|
||||||
|
*/
|
||||||
|
const ACCEPT_VARIATIONS = [
|
||||||
|
'application/json, text/plain, */*',
|
||||||
|
'application/json,text/plain,*/*',
|
||||||
|
'*/*',
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Accept-Language variations
|
||||||
|
*/
|
||||||
|
const ACCEPT_LANGUAGE_VARIATIONS = [
|
||||||
|
'en-US,en;q=0.9',
|
||||||
|
'en-US,en;q=0.8',
|
||||||
|
'en-US;q=0.9,en;q=0.8',
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: curl-impersonate binaries per browser
|
||||||
|
*/
|
||||||
|
const CURL_IMPERSONATE_BINARIES: Record<BrowserType, string> = {
|
||||||
|
Chrome: 'curl_chrome131',
|
||||||
|
Edge: 'curl_chrome131', // Edge uses Chromium
|
||||||
|
Firefox: 'curl_ff133',
|
||||||
|
Safari: 'curl_safari17',
|
||||||
|
};
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// HEADER ORDERING (per workflow-12102025.md)
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Chrome header order for GraphQL requests
|
||||||
|
*/
|
||||||
|
const CHROME_HEADER_ORDER = [
|
||||||
|
'Host',
|
||||||
|
'Connection',
|
||||||
|
'Content-Length',
|
||||||
|
'sec-ch-ua',
|
||||||
|
'DNT',
|
||||||
|
'sec-ch-ua-mobile',
|
||||||
|
'User-Agent',
|
||||||
|
'sec-ch-ua-platform',
|
||||||
|
'Content-Type',
|
||||||
|
'Accept',
|
||||||
|
'Origin',
|
||||||
|
'sec-fetch-site',
|
||||||
|
'sec-fetch-mode',
|
||||||
|
'sec-fetch-dest',
|
||||||
|
'Referer',
|
||||||
|
'Accept-Encoding',
|
||||||
|
'Accept-Language',
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Firefox header order for GraphQL requests
|
||||||
|
*/
|
||||||
|
const FIREFOX_HEADER_ORDER = [
|
||||||
|
'Host',
|
||||||
|
'User-Agent',
|
||||||
|
'Accept',
|
||||||
|
'Accept-Language',
|
||||||
|
'Accept-Encoding',
|
||||||
|
'Content-Type',
|
||||||
|
'Content-Length',
|
||||||
|
'Origin',
|
||||||
|
'DNT',
|
||||||
|
'Connection',
|
||||||
|
'Referer',
|
||||||
|
'sec-fetch-dest',
|
||||||
|
'sec-fetch-mode',
|
||||||
|
'sec-fetch-site',
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Safari header order for GraphQL requests
|
||||||
|
*/
|
||||||
|
const SAFARI_HEADER_ORDER = [
|
||||||
|
'Host',
|
||||||
|
'Connection',
|
||||||
|
'Content-Length',
|
||||||
|
'Accept',
|
||||||
|
'User-Agent',
|
||||||
|
'Content-Type',
|
||||||
|
'Origin',
|
||||||
|
'Referer',
|
||||||
|
'Accept-Encoding',
|
||||||
|
'Accept-Language',
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Edge uses Chrome order (Chromium-based)
|
||||||
|
*/
|
||||||
|
const HEADER_ORDERS: Record<BrowserType, string[]> = {
|
||||||
|
Chrome: CHROME_HEADER_ORDER,
|
||||||
|
Edge: CHROME_HEADER_ORDER,
|
||||||
|
Firefox: FIREFOX_HEADER_ORDER,
|
||||||
|
Safari: SAFARI_HEADER_ORDER,
|
||||||
|
};
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// FINGERPRINT GENERATION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Generate HTTP fingerprint for a session
|
||||||
|
* Randomization is done once per session for consistency
|
||||||
|
*/
|
||||||
|
export function generateHTTPFingerprint(browserType: BrowserType): HTTPFingerprint {
|
||||||
|
// Per workflow-12102025.md: DNT randomized per session (~30%)
|
||||||
|
const hasDNT = Math.random() < DNT_PROBABILITY;
|
||||||
|
|
||||||
|
return {
|
||||||
|
browserType,
|
||||||
|
headers: {}, // Built dynamically per request
|
||||||
|
headerOrder: HEADER_ORDERS[browserType],
|
||||||
|
curlImpersonateBinary: CURL_IMPERSONATE_BINARIES[browserType],
|
||||||
|
hasDNT,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Build complete headers for a request
|
||||||
|
* Returns headers in browser-specific order
|
||||||
|
*/
|
||||||
|
export function buildOrderedHeaders(
|
||||||
|
fingerprint: HTTPFingerprint,
|
||||||
|
context: HeaderContext
|
||||||
|
): { headers: Record<string, string>; orderedHeaders: string[] } {
|
||||||
|
const { browserType, hasDNT, headerOrder } = fingerprint;
|
||||||
|
const { userAgent, secChUa, secChUaPlatform, secChUaMobile, referer, isPost, contentLength } = context;
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Natural randomization for Accept
|
||||||
|
const accept = ACCEPT_VARIATIONS[Math.floor(Math.random() * ACCEPT_VARIATIONS.length)];
|
||||||
|
const acceptLanguage = ACCEPT_LANGUAGE_VARIATIONS[Math.floor(Math.random() * ACCEPT_LANGUAGE_VARIATIONS.length)];
|
||||||
|
|
||||||
|
// Build all possible headers
|
||||||
|
const allHeaders: Record<string, string> = {
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'User-Agent': userAgent,
|
||||||
|
'Accept': accept,
|
||||||
|
'Accept-Language': acceptLanguage,
|
||||||
|
'Accept-Encoding': 'gzip, deflate, br',
|
||||||
|
};
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: POST-only headers
|
||||||
|
if (isPost) {
|
||||||
|
allHeaders['Content-Type'] = 'application/json';
|
||||||
|
allHeaders['Origin'] = 'https://dutchie.com';
|
||||||
|
if (contentLength !== undefined) {
|
||||||
|
allHeaders['Content-Length'] = String(contentLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Dynamic Referer per dispensary
|
||||||
|
allHeaders['Referer'] = referer;
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: DNT randomized per session
|
||||||
|
if (hasDNT) {
|
||||||
|
allHeaders['DNT'] = '1';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Chromium-only headers (Chrome, Edge)
|
||||||
|
if (browserType === 'Chrome' || browserType === 'Edge') {
|
||||||
|
if (secChUa) allHeaders['sec-ch-ua'] = secChUa;
|
||||||
|
if (secChUaMobile) allHeaders['sec-ch-ua-mobile'] = secChUaMobile;
|
||||||
|
if (secChUaPlatform) allHeaders['sec-ch-ua-platform'] = secChUaPlatform;
|
||||||
|
allHeaders['sec-fetch-site'] = 'same-origin';
|
||||||
|
allHeaders['sec-fetch-mode'] = 'cors';
|
||||||
|
allHeaders['sec-fetch-dest'] = 'empty';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Firefox has sec-fetch but no sec-ch
|
||||||
|
if (browserType === 'Firefox') {
|
||||||
|
allHeaders['sec-fetch-site'] = 'same-origin';
|
||||||
|
allHeaders['sec-fetch-mode'] = 'cors';
|
||||||
|
allHeaders['sec-fetch-dest'] = 'empty';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Safari has no sec-* headers
|
||||||
|
|
||||||
|
// Filter to only headers that exist and order them
|
||||||
|
const orderedHeaders: string[] = [];
|
||||||
|
const headers: Record<string, string> = {};
|
||||||
|
|
||||||
|
for (const headerName of headerOrder) {
|
||||||
|
if (allHeaders[headerName]) {
|
||||||
|
orderedHeaders.push(headerName);
|
||||||
|
headers[headerName] = allHeaders[headerName];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { headers, orderedHeaders };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Build curl command arguments for headers
|
||||||
|
* Headers are added in browser-specific order
|
||||||
|
*/
|
||||||
|
export function buildCurlHeaderArgs(
|
||||||
|
fingerprint: HTTPFingerprint,
|
||||||
|
context: HeaderContext
|
||||||
|
): string[] {
|
||||||
|
const { headers, orderedHeaders } = buildOrderedHeaders(fingerprint, context);
|
||||||
|
|
||||||
|
const args: string[] = [];
|
||||||
|
for (const headerName of orderedHeaders) {
|
||||||
|
// Skip Host and Content-Length - curl handles these
|
||||||
|
if (headerName === 'Host' || headerName === 'Content-Length') continue;
|
||||||
|
args.push('-H', `${headerName}: ${headers[headerName]}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Extract Referer from dispensary menu_url
|
||||||
|
*/
|
||||||
|
export function buildRefererFromMenuUrl(menuUrl: string | null | undefined): string {
|
||||||
|
if (!menuUrl) {
|
||||||
|
return 'https://dutchie.com/';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract slug from menu_url
|
||||||
|
// Formats: /embedded-menu/<slug> or /dispensary/<slug> or full URL
|
||||||
|
let slug: string | null = null;
|
||||||
|
|
||||||
|
const embeddedMatch = menuUrl.match(/\/embedded-menu\/([^/?]+)/);
|
||||||
|
const dispensaryMatch = menuUrl.match(/\/dispensary\/([^/?]+)/);
|
||||||
|
|
||||||
|
if (embeddedMatch) {
|
||||||
|
slug = embeddedMatch[1];
|
||||||
|
} else if (dispensaryMatch) {
|
||||||
|
slug = dispensaryMatch[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (slug) {
|
||||||
|
return `https://dutchie.com/dispensary/${slug}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 'https://dutchie.com/';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Get curl-impersonate binary for browser
|
||||||
|
*/
|
||||||
|
export function getCurlBinary(browserType: BrowserType): string {
|
||||||
|
return CURL_IMPERSONATE_BINARIES[browserType];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per workflow-12102025.md: Check if curl-impersonate is available
|
||||||
|
*/
|
||||||
|
export function isCurlImpersonateAvailable(browserType: BrowserType): boolean {
|
||||||
|
const binary = CURL_IMPERSONATE_BINARIES[browserType];
|
||||||
|
try {
|
||||||
|
const { execSync } = require('child_process');
|
||||||
|
execSync(`which ${binary}`, { stdio: 'ignore' });
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
134
backend/src/services/ip2location.ts
Normal file
134
backend/src/services/ip2location.ts
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
/**
|
||||||
|
* IP2Location Service
|
||||||
|
*
|
||||||
|
* Uses local IP2Location LITE DB3 database for IP geolocation.
|
||||||
|
* No external API calls, no rate limits.
|
||||||
|
*
|
||||||
|
* Database: IP2Location LITE DB3 (free, monthly updates)
|
||||||
|
* Fields: country, region, city, latitude, longitude
|
||||||
|
*/
|
||||||
|
|
||||||
|
import path from 'path';
|
||||||
|
import fs from 'fs';
|
||||||
|
|
||||||
|
// @ts-ignore - no types for ip2location-nodejs
|
||||||
|
const { IP2Location } = require('ip2location-nodejs');
|
||||||
|
|
||||||
|
const DB_PATH = process.env.IP2LOCATION_DB_PATH ||
|
||||||
|
path.join(__dirname, '../../data/ip2location/IP2LOCATION-LITE-DB5.BIN');
|
||||||
|
|
||||||
|
let ip2location: any = null;
|
||||||
|
let dbLoaded = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize IP2Location database
|
||||||
|
*/
|
||||||
|
export function initIP2Location(): boolean {
|
||||||
|
if (dbLoaded) return true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!fs.existsSync(DB_PATH)) {
|
||||||
|
console.warn(`IP2Location database not found at: ${DB_PATH}`);
|
||||||
|
console.warn('Run: ./scripts/download-ip2location.sh to download');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ip2location = new IP2Location();
|
||||||
|
ip2location.open(DB_PATH);
|
||||||
|
dbLoaded = true;
|
||||||
|
console.log('IP2Location database loaded successfully');
|
||||||
|
return true;
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Failed to load IP2Location database:', err);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close IP2Location database
|
||||||
|
*/
|
||||||
|
export function closeIP2Location(): void {
|
||||||
|
if (ip2location) {
|
||||||
|
ip2location.close();
|
||||||
|
ip2location = null;
|
||||||
|
dbLoaded = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface GeoLocation {
|
||||||
|
city: string | null;
|
||||||
|
state: string | null;
|
||||||
|
stateCode: string | null;
|
||||||
|
country: string | null;
|
||||||
|
countryCode: string | null;
|
||||||
|
lat: number | null;
|
||||||
|
lng: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lookup IP address location
|
||||||
|
*
|
||||||
|
* @param ip - IPv4 or IPv6 address
|
||||||
|
* @returns Location data or null if not found
|
||||||
|
*/
|
||||||
|
export function lookupIP(ip: string): GeoLocation | null {
|
||||||
|
// Skip private/localhost IPs
|
||||||
|
if (!ip || ip === '127.0.0.1' || ip === '::1' ||
|
||||||
|
ip.startsWith('192.168.') || ip.startsWith('10.') ||
|
||||||
|
ip.startsWith('172.16.') || ip.startsWith('172.17.') ||
|
||||||
|
ip.startsWith('::ffff:127.') || ip.startsWith('::ffff:192.168.') ||
|
||||||
|
ip.startsWith('::ffff:10.')) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip IPv6 prefix if present
|
||||||
|
const cleanIP = ip.replace(/^::ffff:/, '');
|
||||||
|
|
||||||
|
// Initialize on first use if not already loaded
|
||||||
|
if (!dbLoaded) {
|
||||||
|
if (!initIP2Location()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = ip2location.getAll(cleanIP);
|
||||||
|
|
||||||
|
if (!result || result.ip === '?' || result.countryShort === '-') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// DB3 LITE doesn't include lat/lng - would need DB5+ for that
|
||||||
|
const lat = typeof result.latitude === 'number' && result.latitude !== 0 ? result.latitude : null;
|
||||||
|
const lng = typeof result.longitude === 'number' && result.longitude !== 0 ? result.longitude : null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
city: result.city !== '-' ? result.city : null,
|
||||||
|
state: result.region !== '-' ? result.region : null,
|
||||||
|
stateCode: null, // DB3 doesn't include state codes
|
||||||
|
country: result.countryLong !== '-' ? result.countryLong : null,
|
||||||
|
countryCode: result.countryShort !== '-' ? result.countryShort : null,
|
||||||
|
lat,
|
||||||
|
lng,
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
console.error('IP2Location lookup error:', err);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if IP2Location database is available
|
||||||
|
*/
|
||||||
|
export function isIP2LocationAvailable(): boolean {
|
||||||
|
if (dbLoaded) return true;
|
||||||
|
return fs.existsSync(DB_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export singleton-style interface
|
||||||
|
export default {
|
||||||
|
init: initIP2Location,
|
||||||
|
close: closeIP2Location,
|
||||||
|
lookup: lookupIP,
|
||||||
|
isAvailable: isIP2LocationAvailable,
|
||||||
|
};
|
||||||
@@ -276,7 +276,6 @@ export async function addProxiesFromList(proxies: Array<{
|
|||||||
await pool.query(`
|
await pool.query(`
|
||||||
INSERT INTO proxies (host, port, protocol, username, password, active)
|
INSERT INTO proxies (host, port, protocol, username, password, active)
|
||||||
VALUES ($1, $2, $3, $4, $5, false)
|
VALUES ($1, $2, $3, $4, $5, false)
|
||||||
ON CONFLICT (host, port, protocol) DO NOTHING
|
|
||||||
`, [
|
`, [
|
||||||
proxy.host,
|
proxy.host,
|
||||||
proxy.port,
|
proxy.port,
|
||||||
@@ -285,28 +284,10 @@ export async function addProxiesFromList(proxies: Array<{
|
|||||||
proxy.password
|
proxy.password
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// Check if it was actually inserted
|
|
||||||
const result = await pool.query(`
|
|
||||||
SELECT id FROM proxies
|
|
||||||
WHERE host = $1 AND port = $2 AND protocol = $3
|
|
||||||
`, [proxy.host, proxy.port, proxy.protocol]);
|
|
||||||
|
|
||||||
if (result.rows.length > 0) {
|
|
||||||
// Check if it was just inserted (no last_tested_at means new)
|
|
||||||
const checkResult = await pool.query(`
|
|
||||||
SELECT last_tested_at FROM proxies
|
|
||||||
WHERE host = $1 AND port = $2 AND protocol = $3
|
|
||||||
`, [proxy.host, proxy.port, proxy.protocol]);
|
|
||||||
|
|
||||||
if (checkResult.rows[0].last_tested_at === null) {
|
|
||||||
added++;
|
added++;
|
||||||
if (added % 100 === 0) {
|
if (added % 100 === 0) {
|
||||||
console.log(`📥 Imported ${added} proxies...`);
|
console.log(`📥 Imported ${added} proxies...`);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
duplicates++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
failed++;
|
failed++;
|
||||||
const errorMsg = `${proxy.host}:${proxy.port} - ${error.message}`;
|
const errorMsg = `${proxy.host}:${proxy.port} - ${error.message}`;
|
||||||
|
|||||||
@@ -8,8 +8,12 @@ interface ProxyTestJob {
|
|||||||
tested_proxies: number;
|
tested_proxies: number;
|
||||||
passed_proxies: number;
|
passed_proxies: number;
|
||||||
failed_proxies: number;
|
failed_proxies: number;
|
||||||
|
mode?: string; // 'all' | 'failed' | 'inactive'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Concurrency settings
|
||||||
|
const DEFAULT_CONCURRENCY = 10; // Test 10 proxies at a time
|
||||||
|
|
||||||
// Simple in-memory queue - could be replaced with Bull/Bee-Queue for production
|
// Simple in-memory queue - could be replaced with Bull/Bee-Queue for production
|
||||||
const activeJobs = new Map<number, { cancelled: boolean }>();
|
const activeJobs = new Map<number, { cancelled: boolean }>();
|
||||||
|
|
||||||
@@ -33,18 +37,40 @@ export async function cleanupOrphanedJobs(): Promise<void> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function createProxyTestJob(): Promise<number> {
|
export type ProxyTestMode = 'all' | 'failed' | 'inactive';
|
||||||
|
|
||||||
|
export interface CreateJobResult {
|
||||||
|
jobId: number;
|
||||||
|
totalProxies: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function createProxyTestJob(mode: ProxyTestMode = 'all', concurrency: number = DEFAULT_CONCURRENCY): Promise<CreateJobResult> {
|
||||||
// Check for existing running jobs first
|
// Check for existing running jobs first
|
||||||
const existingJob = await getActiveProxyTestJob();
|
const existingJob = await getActiveProxyTestJob();
|
||||||
if (existingJob) {
|
if (existingJob) {
|
||||||
throw new Error('A proxy test job is already running. Please cancel it first.');
|
throw new Error('A proxy test job is already running. Please cancel it first.');
|
||||||
}
|
}
|
||||||
const result = await pool.query(`
|
|
||||||
SELECT COUNT(*) as count FROM proxies
|
|
||||||
`);
|
|
||||||
|
|
||||||
|
// Get count based on mode
|
||||||
|
let countQuery: string;
|
||||||
|
switch (mode) {
|
||||||
|
case 'failed':
|
||||||
|
countQuery = `SELECT COUNT(*) as count FROM proxies WHERE test_result = 'failed' OR active = false`;
|
||||||
|
break;
|
||||||
|
case 'inactive':
|
||||||
|
countQuery = `SELECT COUNT(*) as count FROM proxies WHERE active = false`;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
countQuery = `SELECT COUNT(*) as count FROM proxies`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await pool.query(countQuery);
|
||||||
const totalProxies = parseInt(result.rows[0].count);
|
const totalProxies = parseInt(result.rows[0].count);
|
||||||
|
|
||||||
|
if (totalProxies === 0) {
|
||||||
|
throw new Error(`No proxies to test with mode '${mode}'`);
|
||||||
|
}
|
||||||
|
|
||||||
const jobResult = await pool.query(`
|
const jobResult = await pool.query(`
|
||||||
INSERT INTO proxy_test_jobs (status, total_proxies)
|
INSERT INTO proxy_test_jobs (status, total_proxies)
|
||||||
VALUES ('pending', $1)
|
VALUES ('pending', $1)
|
||||||
@@ -53,12 +79,12 @@ export async function createProxyTestJob(): Promise<number> {
|
|||||||
|
|
||||||
const jobId = jobResult.rows[0].id;
|
const jobId = jobResult.rows[0].id;
|
||||||
|
|
||||||
// Start job in background
|
// Start job in background with mode and concurrency
|
||||||
runProxyTestJob(jobId).catch(err => {
|
runProxyTestJob(jobId, mode, concurrency).catch(err => {
|
||||||
console.error(`❌ Proxy test job ${jobId} failed:`, err);
|
console.error(`❌ Proxy test job ${jobId} failed:`, err);
|
||||||
});
|
});
|
||||||
|
|
||||||
return jobId;
|
return { jobId, totalProxies };
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getProxyTestJob(jobId: number): Promise<ProxyTestJob | null> {
|
export async function getProxyTestJob(jobId: number): Promise<ProxyTestJob | null> {
|
||||||
@@ -111,7 +137,7 @@ export async function cancelProxyTestJob(jobId: number): Promise<boolean> {
|
|||||||
return result.rows.length > 0;
|
return result.rows.length > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runProxyTestJob(jobId: number): Promise<void> {
|
async function runProxyTestJob(jobId: number, mode: ProxyTestMode = 'all', concurrency: number = DEFAULT_CONCURRENCY): Promise<void> {
|
||||||
// Register job as active
|
// Register job as active
|
||||||
activeJobs.set(jobId, { cancelled: false });
|
activeJobs.set(jobId, { cancelled: false });
|
||||||
|
|
||||||
@@ -125,20 +151,30 @@ async function runProxyTestJob(jobId: number): Promise<void> {
|
|||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, [jobId]);
|
`, [jobId]);
|
||||||
|
|
||||||
console.log(`🔍 Starting proxy test job ${jobId}...`);
|
console.log(`🔍 Starting proxy test job ${jobId} (mode: ${mode}, concurrency: ${concurrency})...`);
|
||||||
|
|
||||||
// Get all proxies
|
// Get proxies based on mode
|
||||||
const result = await pool.query(`
|
let query: string;
|
||||||
SELECT id, host, port, protocol, username, password
|
switch (mode) {
|
||||||
FROM proxies
|
case 'failed':
|
||||||
ORDER BY id
|
query = `SELECT id, host, port, protocol, username, password FROM proxies WHERE test_result = 'failed' OR active = false ORDER BY id`;
|
||||||
`);
|
break;
|
||||||
|
case 'inactive':
|
||||||
|
query = `SELECT id, host, port, protocol, username, password FROM proxies WHERE active = false ORDER BY id`;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
query = `SELECT id, host, port, protocol, username, password FROM proxies ORDER BY id`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await pool.query(query);
|
||||||
|
const proxies = result.rows;
|
||||||
|
|
||||||
let tested = 0;
|
let tested = 0;
|
||||||
let passed = 0;
|
let passed = 0;
|
||||||
let failed = 0;
|
let failed = 0;
|
||||||
|
|
||||||
for (const proxy of result.rows) {
|
// Process proxies in batches for parallel testing
|
||||||
|
for (let i = 0; i < proxies.length; i += concurrency) {
|
||||||
// Check if job was cancelled
|
// Check if job was cancelled
|
||||||
const jobControl = activeJobs.get(jobId);
|
const jobControl = activeJobs.get(jobId);
|
||||||
if (jobControl?.cancelled) {
|
if (jobControl?.cancelled) {
|
||||||
@@ -146,7 +182,11 @@ async function runProxyTestJob(jobId: number): Promise<void> {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test the proxy
|
const batch = proxies.slice(i, i + concurrency);
|
||||||
|
|
||||||
|
// Test batch in parallel
|
||||||
|
const batchResults = await Promise.all(
|
||||||
|
batch.map(async (proxy) => {
|
||||||
const testResult = await testProxy(
|
const testResult = await testProxy(
|
||||||
proxy.host,
|
proxy.host,
|
||||||
proxy.port,
|
proxy.port,
|
||||||
@@ -158,12 +198,19 @@ async function runProxyTestJob(jobId: number): Promise<void> {
|
|||||||
// Save result
|
// Save result
|
||||||
await saveProxyTestResult(proxy.id, testResult);
|
await saveProxyTestResult(proxy.id, testResult);
|
||||||
|
|
||||||
|
return testResult.success;
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
// Count results
|
||||||
|
for (const success of batchResults) {
|
||||||
tested++;
|
tested++;
|
||||||
if (testResult.success) {
|
if (success) {
|
||||||
passed++;
|
passed++;
|
||||||
} else {
|
} else {
|
||||||
failed++;
|
failed++;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Update job progress
|
// Update job progress
|
||||||
await pool.query(`
|
await pool.query(`
|
||||||
@@ -175,10 +222,8 @@ async function runProxyTestJob(jobId: number): Promise<void> {
|
|||||||
WHERE id = $4
|
WHERE id = $4
|
||||||
`, [tested, passed, failed, jobId]);
|
`, [tested, passed, failed, jobId]);
|
||||||
|
|
||||||
// Log progress every 10 proxies
|
// Log progress
|
||||||
if (tested % 10 === 0) {
|
console.log(`📊 Job ${jobId}: ${tested}/${proxies.length} proxies tested (${passed} passed, ${failed} failed)`);
|
||||||
console.log(`📊 Job ${jobId}: ${tested}/${result.rows.length} proxies tested (${passed} passed, ${failed} failed)`);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark job as completed
|
// Mark job as completed
|
||||||
|
|||||||
@@ -1,116 +1,38 @@
|
|||||||
import cron from 'node-cron';
|
/**
|
||||||
import { pool } from '../db/pool';
|
* LEGACY SCHEDULER - DEPRECATED 2024-12-10
|
||||||
import { scrapeStore, scrapeCategory } from '../scraper-v2';
|
*
|
||||||
|
* DO NOT USE THIS FILE.
|
||||||
let scheduledJobs: cron.ScheduledTask[] = [];
|
*
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md:
|
||||||
async function getSettings(): Promise<{
|
* This node-cron scheduler has been replaced by the database-driven
|
||||||
scrapeIntervalHours: number;
|
* task scheduler in src/services/task-scheduler.ts
|
||||||
scrapeSpecialsTime: string;
|
*
|
||||||
}> {
|
* The new scheduler:
|
||||||
const result = await pool.query(`
|
* - Stores schedules in PostgreSQL (survives restarts)
|
||||||
SELECT key, value FROM settings
|
* - Uses SELECT FOR UPDATE SKIP LOCKED (multi-replica safe)
|
||||||
WHERE key IN ('scrape_interval_hours', 'scrape_specials_time')
|
* - Creates tasks in worker_tasks table (processed by task-worker.ts)
|
||||||
`);
|
*
|
||||||
|
* This file is kept for reference only. All exports are no-ops.
|
||||||
const settings: Record<string, string> = {};
|
* Legacy code has been removed - see git history for original implementation.
|
||||||
result.rows.forEach((row: { key: string; value: string }) => {
|
*/
|
||||||
settings[row.key] = row.value;
|
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
|
||||||
scrapeIntervalHours: parseInt(settings.scrape_interval_hours || '4'),
|
|
||||||
scrapeSpecialsTime: settings.scrape_specials_time || '00:01'
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
async function scrapeAllStores(): Promise<void> {
|
|
||||||
console.log('🔄 Starting scheduled scrape for all stores...');
|
|
||||||
|
|
||||||
const result = await pool.query(`
|
|
||||||
SELECT id, name FROM stores WHERE active = true AND scrape_enabled = true
|
|
||||||
`);
|
|
||||||
|
|
||||||
for (const store of result.rows) {
|
|
||||||
try {
|
|
||||||
console.log(`Scraping store: ${store.name}`);
|
|
||||||
await scrapeStore(store.id);
|
|
||||||
} catch (error) {
|
|
||||||
console.error(`Failed to scrape store ${store.name}:`, error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('✅ Scheduled scrape completed');
|
|
||||||
}
|
|
||||||
|
|
||||||
async function scrapeSpecials(): Promise<void> {
|
|
||||||
console.log('🌟 Starting scheduled specials scrape...');
|
|
||||||
|
|
||||||
const result = await pool.query(`
|
|
||||||
SELECT s.id, s.name, c.id as category_id
|
|
||||||
FROM stores s
|
|
||||||
JOIN categories c ON c.store_id = s.id
|
|
||||||
WHERE s.active = true AND s.scrape_enabled = true
|
|
||||||
AND c.slug = 'specials' AND c.scrape_enabled = true
|
|
||||||
`);
|
|
||||||
|
|
||||||
for (const row of result.rows) {
|
|
||||||
try {
|
|
||||||
console.log(`Scraping specials for: ${row.name}`);
|
|
||||||
await scrapeCategory(row.id, row.category_id);
|
|
||||||
} catch (error) {
|
|
||||||
console.error(`Failed to scrape specials for ${row.name}:`, error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('✅ Specials scrape completed');
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// 2024-12-10: All functions are now no-ops
|
||||||
export async function startScheduler(): Promise<void> {
|
export async function startScheduler(): Promise<void> {
|
||||||
// Stop any existing jobs
|
console.warn('[DEPRECATED] startScheduler() called - use taskScheduler from task-scheduler.ts instead');
|
||||||
stopScheduler();
|
|
||||||
|
|
||||||
const settings = await getSettings();
|
|
||||||
|
|
||||||
// Schedule regular store scrapes (every N hours)
|
|
||||||
const scrapeIntervalCron = `0 */${settings.scrapeIntervalHours} * * *`;
|
|
||||||
const storeJob = cron.schedule(scrapeIntervalCron, scrapeAllStores);
|
|
||||||
scheduledJobs.push(storeJob);
|
|
||||||
console.log(`📅 Scheduled store scraping: every ${settings.scrapeIntervalHours} hours`);
|
|
||||||
|
|
||||||
// Schedule specials scraping (daily at specified time)
|
|
||||||
const [hours, minutes] = settings.scrapeSpecialsTime.split(':');
|
|
||||||
const specialsCron = `${minutes} ${hours} * * *`;
|
|
||||||
const specialsJob = cron.schedule(specialsCron, scrapeSpecials);
|
|
||||||
scheduledJobs.push(specialsJob);
|
|
||||||
console.log(`📅 Scheduled specials scraping: daily at ${settings.scrapeSpecialsTime}`);
|
|
||||||
|
|
||||||
// Initial scrape on startup (after 10 seconds)
|
|
||||||
setTimeout(() => {
|
|
||||||
console.log('🚀 Running initial scrape...');
|
|
||||||
scrapeAllStores().catch(console.error);
|
|
||||||
}, 10000);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function stopScheduler(): void {
|
export function stopScheduler(): void {
|
||||||
scheduledJobs.forEach(job => job.stop());
|
console.warn('[DEPRECATED] stopScheduler() called - use taskScheduler from task-scheduler.ts instead');
|
||||||
scheduledJobs = [];
|
|
||||||
console.log('🛑 Scheduler stopped');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function restartScheduler(): Promise<void> {
|
export async function restartScheduler(): Promise<void> {
|
||||||
console.log('🔄 Restarting scheduler...');
|
console.warn('[DEPRECATED] restartScheduler() called - use taskScheduler from task-scheduler.ts instead');
|
||||||
stopScheduler();
|
|
||||||
await startScheduler();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Manual trigger functions for admin
|
export async function triggerStoreScrape(_storeId: number): Promise<void> {
|
||||||
export async function triggerStoreScrape(storeId: number): Promise<void> {
|
console.warn('[DEPRECATED] triggerStoreScrape() called - use taskService.createTask() instead');
|
||||||
console.log(`🔧 Manual scrape triggered for store ID: ${storeId}`);
|
|
||||||
await scrapeStore(storeId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function triggerAllStoresScrape(): Promise<void> {
|
export async function triggerAllStoresScrape(): Promise<void> {
|
||||||
console.log('🔧 Manual scrape triggered for all stores');
|
console.warn('[DEPRECATED] triggerAllStoresScrape() called - use taskScheduler.triggerSchedule() instead');
|
||||||
await scrapeAllStores();
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|||||||
import { Browser, Page } from 'puppeteer';
|
import { Browser, Page } from 'puppeteer';
|
||||||
import { SocksProxyAgent } from 'socks-proxy-agent';
|
import { SocksProxyAgent } from 'socks-proxy-agent';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import { uploadImageFromUrl, getImageUrl } from '../utils/minio';
|
import { downloadProductImageLegacy } from '../utils/image-storage';
|
||||||
import { logger } from './logger';
|
import { logger } from './logger';
|
||||||
import { registerScraper, updateScraperStats, completeScraper } from '../routes/scraper-monitor';
|
import { registerScraper, updateScraperStats, completeScraper } from '../routes/scraper-monitor';
|
||||||
import { incrementProxyFailure, getActiveProxy, isBotDetectionError, putProxyInTimeout } from './proxy';
|
import { incrementProxyFailure, getActiveProxy, isBotDetectionError, putProxyInTimeout } from './proxy';
|
||||||
@@ -767,7 +767,8 @@ export async function saveProducts(storeId: number, categoryId: number, products
|
|||||||
|
|
||||||
if (product.imageUrl && !localImagePath) {
|
if (product.imageUrl && !localImagePath) {
|
||||||
try {
|
try {
|
||||||
localImagePath = await uploadImageFromUrl(product.imageUrl, productId);
|
const result = await downloadProductImageLegacy(product.imageUrl, 0, productId);
|
||||||
|
localImagePath = result.urls?.original || null;
|
||||||
await client.query(`
|
await client.query(`
|
||||||
UPDATE products
|
UPDATE products
|
||||||
SET local_image_path = $1
|
SET local_image_path = $1
|
||||||
|
|||||||
375
backend/src/services/task-scheduler.ts
Normal file
375
backend/src/services/task-scheduler.ts
Normal file
@@ -0,0 +1,375 @@
|
|||||||
|
/**
|
||||||
|
* Database-Driven Task Scheduler
|
||||||
|
*
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md:
|
||||||
|
* - Schedules stored in DB (survives restarts)
|
||||||
|
* - Uses SELECT FOR UPDATE to prevent duplicate execution across replicas
|
||||||
|
* - Polls every 60s to check if schedules are due
|
||||||
|
* - Generates tasks into worker_tasks table for task-worker.ts to process
|
||||||
|
*
|
||||||
|
* 2024-12-10: Created to replace legacy node-cron scheduler
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
import { taskService, TaskRole } from '../tasks/task-service';
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Poll interval for checking schedules
|
||||||
|
const POLL_INTERVAL_MS = 60_000; // 60 seconds
|
||||||
|
|
||||||
|
interface TaskSchedule {
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
role: TaskRole;
|
||||||
|
enabled: boolean;
|
||||||
|
interval_hours: number;
|
||||||
|
last_run_at: Date | null;
|
||||||
|
next_run_at: Date | null;
|
||||||
|
state_code: string | null;
|
||||||
|
priority: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
class TaskScheduler {
|
||||||
|
private pollTimer: NodeJS.Timeout | null = null;
|
||||||
|
private isRunning = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start the scheduler
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: Called on API server startup
|
||||||
|
*/
|
||||||
|
async start(): Promise<void> {
|
||||||
|
if (this.isRunning) {
|
||||||
|
console.log('[TaskScheduler] Already running');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('[TaskScheduler] Starting database-driven scheduler...');
|
||||||
|
this.isRunning = true;
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: On startup, recover stale tasks
|
||||||
|
try {
|
||||||
|
const recovered = await taskService.recoverStaleTasks(10);
|
||||||
|
if (recovered > 0) {
|
||||||
|
console.log(`[TaskScheduler] Recovered ${recovered} stale tasks from dead workers`);
|
||||||
|
}
|
||||||
|
} catch (err: any) {
|
||||||
|
console.error('[TaskScheduler] Failed to recover stale tasks:', err.message);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Ensure default schedules exist
|
||||||
|
await this.ensureDefaultSchedules();
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Check immediately on startup
|
||||||
|
await this.checkAndRunDueSchedules();
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Then poll every 60 seconds
|
||||||
|
this.pollTimer = setInterval(async () => {
|
||||||
|
await this.checkAndRunDueSchedules();
|
||||||
|
}, POLL_INTERVAL_MS);
|
||||||
|
|
||||||
|
console.log('[TaskScheduler] Started - polling every 60s');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop the scheduler
|
||||||
|
*/
|
||||||
|
stop(): void {
|
||||||
|
if (this.pollTimer) {
|
||||||
|
clearInterval(this.pollTimer);
|
||||||
|
this.pollTimer = null;
|
||||||
|
}
|
||||||
|
this.isRunning = false;
|
||||||
|
console.log('[TaskScheduler] Stopped');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure default schedules exist in the database
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: Creates schedules if they don't exist
|
||||||
|
*/
|
||||||
|
private async ensureDefaultSchedules(): Promise<void> {
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Default schedules for task generation
|
||||||
|
// NOTE: payload_fetch replaces direct product_refresh - it chains to product_refresh
|
||||||
|
const defaults = [
|
||||||
|
{
|
||||||
|
name: 'payload_fetch_all',
|
||||||
|
role: 'payload_fetch' as TaskRole,
|
||||||
|
interval_hours: 4,
|
||||||
|
priority: 0,
|
||||||
|
description: 'Fetch payloads from Dutchie API for all crawl-enabled stores every 4 hours. Chains to product_refresh.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'store_discovery_dutchie',
|
||||||
|
role: 'store_discovery' as TaskRole,
|
||||||
|
interval_hours: 24,
|
||||||
|
priority: 5,
|
||||||
|
description: 'Discover new Dutchie stores daily',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'analytics_refresh',
|
||||||
|
role: 'analytics_refresh' as TaskRole,
|
||||||
|
interval_hours: 6,
|
||||||
|
priority: 0,
|
||||||
|
description: 'Refresh analytics materialized views every 6 hours',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const sched of defaults) {
|
||||||
|
try {
|
||||||
|
await pool.query(`
|
||||||
|
INSERT INTO task_schedules (name, role, interval_hours, priority, description, enabled, next_run_at)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, true, NOW())
|
||||||
|
ON CONFLICT (name) DO NOTHING
|
||||||
|
`, [sched.name, sched.role, sched.interval_hours, sched.priority, sched.description]);
|
||||||
|
} catch (err: any) {
|
||||||
|
// Table may not exist yet - will be created by migration
|
||||||
|
if (!err.message.includes('does not exist')) {
|
||||||
|
console.error(`[TaskScheduler] Failed to create default schedule ${sched.name}:`, err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check for and run any due schedules
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: Uses SELECT FOR UPDATE SKIP LOCKED to prevent duplicates
|
||||||
|
*/
|
||||||
|
private async checkAndRunDueSchedules(): Promise<void> {
|
||||||
|
const client = await pool.connect();
|
||||||
|
|
||||||
|
try {
|
||||||
|
await client.query('BEGIN');
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Atomic claim of due schedules
|
||||||
|
const result = await client.query<TaskSchedule>(`
|
||||||
|
SELECT *
|
||||||
|
FROM task_schedules
|
||||||
|
WHERE enabled = true
|
||||||
|
AND (next_run_at IS NULL OR next_run_at <= NOW())
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
`);
|
||||||
|
|
||||||
|
for (const schedule of result.rows) {
|
||||||
|
console.log(`[TaskScheduler] Running schedule: ${schedule.name} (${schedule.role})`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const tasksCreated = await this.executeSchedule(schedule);
|
||||||
|
console.log(`[TaskScheduler] Schedule ${schedule.name} created ${tasksCreated} tasks`);
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Update last_run_at and calculate next_run_at
|
||||||
|
await client.query(`
|
||||||
|
UPDATE task_schedules
|
||||||
|
SET
|
||||||
|
last_run_at = NOW(),
|
||||||
|
next_run_at = NOW() + ($1 || ' hours')::interval,
|
||||||
|
last_task_count = $2,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $3
|
||||||
|
`, [schedule.interval_hours, tasksCreated, schedule.id]);
|
||||||
|
|
||||||
|
} catch (err: any) {
|
||||||
|
console.error(`[TaskScheduler] Schedule ${schedule.name} failed:`, err.message);
|
||||||
|
|
||||||
|
// Still update next_run_at to prevent infinite retry loop
|
||||||
|
await client.query(`
|
||||||
|
UPDATE task_schedules
|
||||||
|
SET
|
||||||
|
next_run_at = NOW() + ($1 || ' hours')::interval,
|
||||||
|
last_error = $2,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $3
|
||||||
|
`, [schedule.interval_hours, err.message, schedule.id]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await client.query('COMMIT');
|
||||||
|
} catch (err: any) {
|
||||||
|
await client.query('ROLLBACK');
|
||||||
|
console.error('[TaskScheduler] Failed to check schedules:', err.message);
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute a schedule and create tasks
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: Different logic per role
|
||||||
|
*/
|
||||||
|
private async executeSchedule(schedule: TaskSchedule): Promise<number> {
|
||||||
|
switch (schedule.role) {
|
||||||
|
case 'payload_fetch':
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: payload_fetch replaces direct product_refresh
|
||||||
|
return this.generatePayloadFetchTasks(schedule);
|
||||||
|
|
||||||
|
case 'product_refresh':
|
||||||
|
// Legacy - kept for manual triggers, but scheduled crawls use payload_fetch
|
||||||
|
return this.generatePayloadFetchTasks(schedule);
|
||||||
|
|
||||||
|
case 'store_discovery':
|
||||||
|
return this.generateStoreDiscoveryTasks(schedule);
|
||||||
|
|
||||||
|
case 'analytics_refresh':
|
||||||
|
return this.generateAnalyticsRefreshTasks(schedule);
|
||||||
|
|
||||||
|
default:
|
||||||
|
console.warn(`[TaskScheduler] Unknown role: ${schedule.role}`);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate payload_fetch tasks for stores that need crawling
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: payload_fetch hits API, saves to disk, chains to product_refresh
|
||||||
|
*/
|
||||||
|
private async generatePayloadFetchTasks(schedule: TaskSchedule): Promise<number> {
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Find stores needing refresh
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT d.id
|
||||||
|
FROM dispensaries d
|
||||||
|
WHERE d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
-- No pending/running payload_fetch or product_refresh task already
|
||||||
|
AND NOT EXISTS (
|
||||||
|
SELECT 1 FROM worker_tasks t
|
||||||
|
WHERE t.dispensary_id = d.id
|
||||||
|
AND t.role IN ('payload_fetch', 'product_refresh')
|
||||||
|
AND t.status IN ('pending', 'claimed', 'running')
|
||||||
|
)
|
||||||
|
-- Never fetched OR last fetch > interval ago
|
||||||
|
AND (
|
||||||
|
d.last_fetch_at IS NULL
|
||||||
|
OR d.last_fetch_at < NOW() - ($1 || ' hours')::interval
|
||||||
|
)
|
||||||
|
${schedule.state_code ? 'AND d.state_id = (SELECT id FROM states WHERE code = $2)' : ''}
|
||||||
|
`, schedule.state_code ? [schedule.interval_hours, schedule.state_code] : [schedule.interval_hours]);
|
||||||
|
|
||||||
|
const dispensaryIds = result.rows.map((r: { id: number }) => r.id);
|
||||||
|
|
||||||
|
if (dispensaryIds.length === 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Create payload_fetch tasks (they chain to product_refresh)
|
||||||
|
const tasks = dispensaryIds.map((id: number) => ({
|
||||||
|
role: 'payload_fetch' as TaskRole,
|
||||||
|
dispensary_id: id,
|
||||||
|
priority: schedule.priority,
|
||||||
|
}));
|
||||||
|
|
||||||
|
return taskService.createTasks(tasks);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate store_discovery tasks
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: One task per platform
|
||||||
|
*/
|
||||||
|
private async generateStoreDiscoveryTasks(schedule: TaskSchedule): Promise<number> {
|
||||||
|
// Check if discovery task already pending
|
||||||
|
const existing = await taskService.listTasks({
|
||||||
|
role: 'store_discovery',
|
||||||
|
status: ['pending', 'claimed', 'running'],
|
||||||
|
limit: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (existing.length > 0) {
|
||||||
|
console.log('[TaskScheduler] Store discovery task already pending, skipping');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'store_discovery',
|
||||||
|
platform: 'dutchie',
|
||||||
|
priority: schedule.priority,
|
||||||
|
});
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate analytics_refresh tasks
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: Single task to refresh all MVs
|
||||||
|
*/
|
||||||
|
private async generateAnalyticsRefreshTasks(schedule: TaskSchedule): Promise<number> {
|
||||||
|
// Check if analytics task already pending
|
||||||
|
const existing = await taskService.listTasks({
|
||||||
|
role: 'analytics_refresh',
|
||||||
|
status: ['pending', 'claimed', 'running'],
|
||||||
|
limit: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (existing.length > 0) {
|
||||||
|
console.log('[TaskScheduler] Analytics refresh task already pending, skipping');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'analytics_refresh',
|
||||||
|
priority: schedule.priority,
|
||||||
|
});
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all schedules for dashboard display
|
||||||
|
*/
|
||||||
|
async getSchedules(): Promise<TaskSchedule[]> {
|
||||||
|
try {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT * FROM task_schedules ORDER BY name
|
||||||
|
`);
|
||||||
|
return result.rows as TaskSchedule[];
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update a schedule
|
||||||
|
*/
|
||||||
|
async updateSchedule(id: number, updates: Partial<TaskSchedule>): Promise<void> {
|
||||||
|
const setClauses: string[] = [];
|
||||||
|
const values: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (updates.enabled !== undefined) {
|
||||||
|
setClauses.push(`enabled = $${paramIndex++}`);
|
||||||
|
values.push(updates.enabled);
|
||||||
|
}
|
||||||
|
if (updates.interval_hours !== undefined) {
|
||||||
|
setClauses.push(`interval_hours = $${paramIndex++}`);
|
||||||
|
values.push(updates.interval_hours);
|
||||||
|
}
|
||||||
|
if (updates.priority !== undefined) {
|
||||||
|
setClauses.push(`priority = $${paramIndex++}`);
|
||||||
|
values.push(updates.priority);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (setClauses.length === 0) return;
|
||||||
|
|
||||||
|
setClauses.push('updated_at = NOW()');
|
||||||
|
values.push(id);
|
||||||
|
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE task_schedules
|
||||||
|
SET ${setClauses.join(', ')}
|
||||||
|
WHERE id = $${paramIndex}
|
||||||
|
`, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trigger a schedule to run immediately
|
||||||
|
*/
|
||||||
|
async triggerSchedule(id: number): Promise<number> {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT * FROM task_schedules WHERE id = $1
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
throw new Error(`Schedule ${id} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.executeSchedule(result.rows[0] as TaskSchedule);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Singleton instance
|
||||||
|
export const taskScheduler = new TaskScheduler();
|
||||||
92
backend/src/tasks/handlers/analytics-refresh.ts
Normal file
92
backend/src/tasks/handlers/analytics-refresh.ts
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
/**
|
||||||
|
* Analytics Refresh Handler
|
||||||
|
*
|
||||||
|
* Refreshes materialized views and pre-computed analytics tables.
|
||||||
|
* Should run daily or on-demand after major data changes.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
|
|
||||||
|
export async function handleAnalyticsRefresh(ctx: TaskContext): Promise<TaskResult> {
|
||||||
|
const { pool } = ctx;
|
||||||
|
|
||||||
|
console.log(`[AnalyticsRefresh] Starting analytics refresh...`);
|
||||||
|
|
||||||
|
const refreshed: string[] = [];
|
||||||
|
const failed: string[] = [];
|
||||||
|
|
||||||
|
// List of materialized views to refresh
|
||||||
|
const materializedViews = [
|
||||||
|
'mv_state_metrics',
|
||||||
|
'mv_brand_metrics',
|
||||||
|
'mv_category_metrics',
|
||||||
|
'v_brand_summary',
|
||||||
|
'v_dashboard_stats',
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const viewName of materializedViews) {
|
||||||
|
try {
|
||||||
|
// Heartbeat before each refresh
|
||||||
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
// Check if view exists
|
||||||
|
const existsResult = await pool.query(`
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT 1 FROM pg_matviews WHERE matviewname = $1
|
||||||
|
UNION
|
||||||
|
SELECT 1 FROM pg_views WHERE viewname = $1
|
||||||
|
) as exists
|
||||||
|
`, [viewName]);
|
||||||
|
|
||||||
|
if (!existsResult.rows[0].exists) {
|
||||||
|
console.log(`[AnalyticsRefresh] View ${viewName} does not exist, skipping`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to refresh (only works for materialized views)
|
||||||
|
try {
|
||||||
|
await pool.query(`REFRESH MATERIALIZED VIEW CONCURRENTLY ${viewName}`);
|
||||||
|
refreshed.push(viewName);
|
||||||
|
console.log(`[AnalyticsRefresh] Refreshed ${viewName}`);
|
||||||
|
} catch (refreshError: any) {
|
||||||
|
// Try non-concurrent refresh
|
||||||
|
try {
|
||||||
|
await pool.query(`REFRESH MATERIALIZED VIEW ${viewName}`);
|
||||||
|
refreshed.push(viewName);
|
||||||
|
console.log(`[AnalyticsRefresh] Refreshed ${viewName} (non-concurrent)`);
|
||||||
|
} catch (nonConcurrentError: any) {
|
||||||
|
// Not a materialized view or other error
|
||||||
|
console.log(`[AnalyticsRefresh] ${viewName} is not a materialized view or refresh failed`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`[AnalyticsRefresh] Error refreshing ${viewName}:`, error.message);
|
||||||
|
failed.push(viewName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run analytics capture functions if they exist
|
||||||
|
const captureFunctions = [
|
||||||
|
'capture_brand_snapshots',
|
||||||
|
'capture_category_snapshots',
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const funcName of captureFunctions) {
|
||||||
|
try {
|
||||||
|
await pool.query(`SELECT ${funcName}()`);
|
||||||
|
console.log(`[AnalyticsRefresh] Executed ${funcName}()`);
|
||||||
|
} catch (error: any) {
|
||||||
|
// Function might not exist
|
||||||
|
console.log(`[AnalyticsRefresh] ${funcName}() not available`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[AnalyticsRefresh] Complete: ${refreshed.length} refreshed, ${failed.length} failed`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: failed.length === 0,
|
||||||
|
refreshed,
|
||||||
|
failed,
|
||||||
|
error: failed.length > 0 ? `Failed to refresh: ${failed.join(', ')}` : undefined,
|
||||||
|
};
|
||||||
|
}
|
||||||
189
backend/src/tasks/handlers/entry-point-discovery.ts
Normal file
189
backend/src/tasks/handlers/entry-point-discovery.ts
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
/**
|
||||||
|
* Entry Point Discovery Handler
|
||||||
|
*
|
||||||
|
* Resolves platform IDs for a discovered store using Dutchie GraphQL.
|
||||||
|
* This is the step between store_discovery and product_discovery.
|
||||||
|
*
|
||||||
|
* Flow:
|
||||||
|
* 1. Load dispensary info from database
|
||||||
|
* 2. Extract slug from menu_url
|
||||||
|
* 3. Start stealth session (fingerprint + optional proxy)
|
||||||
|
* 4. Query Dutchie GraphQL to resolve slug → platform_dispensary_id
|
||||||
|
* 5. Update dispensary record with resolved ID
|
||||||
|
* 6. Queue product_discovery task if successful
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
|
import { startSession, endSession } from '../../platforms/dutchie';
|
||||||
|
import { resolveDispensaryIdWithDetails } from '../../platforms/dutchie/queries';
|
||||||
|
|
||||||
|
export async function handleEntryPointDiscovery(ctx: TaskContext): Promise<TaskResult> {
|
||||||
|
const { pool, task } = ctx;
|
||||||
|
const dispensaryId = task.dispensary_id;
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
return { success: false, error: 'No dispensary_id specified for entry_point_discovery task' };
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// ============================================================
|
||||||
|
// STEP 1: Load dispensary info
|
||||||
|
// ============================================================
|
||||||
|
const dispResult = await pool.query(`
|
||||||
|
SELECT id, name, menu_url, platform_dispensary_id, menu_type, state
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (dispResult.rows.length === 0) {
|
||||||
|
return { success: false, error: `Dispensary ${dispensaryId} not found` };
|
||||||
|
}
|
||||||
|
|
||||||
|
const dispensary = dispResult.rows[0];
|
||||||
|
|
||||||
|
// If already has platform_dispensary_id, we're done
|
||||||
|
if (dispensary.platform_dispensary_id) {
|
||||||
|
console.log(`[EntryPointDiscovery] Dispensary ${dispensaryId} already has platform ID: ${dispensary.platform_dispensary_id}`);
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
alreadyResolved: true,
|
||||||
|
platformId: dispensary.platform_dispensary_id,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const menuUrl = dispensary.menu_url;
|
||||||
|
if (!menuUrl) {
|
||||||
|
return { success: false, error: `Dispensary ${dispensaryId} has no menu_url` };
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[EntryPointDiscovery] Resolving platform ID for ${dispensary.name}`);
|
||||||
|
console.log(`[EntryPointDiscovery] Menu URL: ${menuUrl}`);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 2: Extract slug from menu URL
|
||||||
|
// ============================================================
|
||||||
|
let slug: string | null = null;
|
||||||
|
|
||||||
|
const embeddedMatch = menuUrl.match(/\/embedded-menu\/([^/?]+)/);
|
||||||
|
const dispensaryMatch = menuUrl.match(/\/dispensary\/([^/?]+)/);
|
||||||
|
|
||||||
|
if (embeddedMatch) {
|
||||||
|
slug = embeddedMatch[1];
|
||||||
|
} else if (dispensaryMatch) {
|
||||||
|
slug = dispensaryMatch[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!slug) {
|
||||||
|
// Mark as non-dutchie menu type
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET menu_type = 'unknown', updated_at = NOW()
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: `Could not extract slug from menu_url: ${menuUrl}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[EntryPointDiscovery] Extracted slug: ${slug}`);
|
||||||
|
|
||||||
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 3: Start stealth session
|
||||||
|
// ============================================================
|
||||||
|
// Per workflow-12102025.md: session identity comes from proxy location, not task params
|
||||||
|
const session = startSession();
|
||||||
|
console.log(`[EntryPointDiscovery] Session started: ${session.sessionId}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// ============================================================
|
||||||
|
// STEP 4: Resolve platform ID via GraphQL
|
||||||
|
// ============================================================
|
||||||
|
console.log(`[EntryPointDiscovery] Querying Dutchie GraphQL for slug: ${slug}`);
|
||||||
|
|
||||||
|
const result = await resolveDispensaryIdWithDetails(slug);
|
||||||
|
|
||||||
|
if (!result.dispensaryId) {
|
||||||
|
// Resolution failed - could be 403, 404, or invalid response
|
||||||
|
const reason = result.httpStatus
|
||||||
|
? `HTTP ${result.httpStatus}`
|
||||||
|
: result.error || 'Unknown error';
|
||||||
|
|
||||||
|
console.log(`[EntryPointDiscovery] Failed to resolve ${slug}: ${reason}`);
|
||||||
|
|
||||||
|
// Mark as failed resolution but keep menu_type as dutchie
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET
|
||||||
|
menu_type = CASE
|
||||||
|
WHEN $2 = 404 THEN 'removed'
|
||||||
|
WHEN $2 = 403 THEN 'blocked'
|
||||||
|
ELSE 'dutchie'
|
||||||
|
END,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId, result.httpStatus || 0]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: `Could not resolve platform ID: ${reason}`,
|
||||||
|
slug,
|
||||||
|
httpStatus: result.httpStatus,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const platformId = result.dispensaryId;
|
||||||
|
console.log(`[EntryPointDiscovery] Resolved ${slug} -> ${platformId}`);
|
||||||
|
|
||||||
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 5: Update dispensary with resolved ID
|
||||||
|
// ============================================================
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET
|
||||||
|
platform_dispensary_id = $2,
|
||||||
|
menu_type = 'dutchie',
|
||||||
|
crawl_enabled = true,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId, platformId]);
|
||||||
|
|
||||||
|
console.log(`[EntryPointDiscovery] Updated dispensary ${dispensaryId} with platform ID`);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 6: Queue product_discovery task
|
||||||
|
// ============================================================
|
||||||
|
await pool.query(`
|
||||||
|
INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for)
|
||||||
|
VALUES ('product_discovery', $1, 5, NOW())
|
||||||
|
ON CONFLICT DO NOTHING
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
console.log(`[EntryPointDiscovery] Queued product_discovery task for dispensary ${dispensaryId}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
platformId,
|
||||||
|
slug,
|
||||||
|
queuedProductDiscovery: true,
|
||||||
|
};
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
// Always end session
|
||||||
|
endSession();
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
console.error(`[EntryPointDiscovery] Error for dispensary ${dispensaryId}:`, errorMessage);
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: errorMessage,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
11
backend/src/tasks/handlers/index.ts
Normal file
11
backend/src/tasks/handlers/index.ts
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
/**
|
||||||
|
* Task Handlers Index
|
||||||
|
*
|
||||||
|
* Exports all task handlers for the task worker.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export { handleProductRefresh } from './product-refresh';
|
||||||
|
export { handleProductDiscovery } from './product-discovery';
|
||||||
|
export { handleStoreDiscovery } from './store-discovery';
|
||||||
|
export { handleEntryPointDiscovery } from './entry-point-discovery';
|
||||||
|
export { handleAnalyticsRefresh } from './analytics-refresh';
|
||||||
221
backend/src/tasks/handlers/payload-fetch.ts
Normal file
221
backend/src/tasks/handlers/payload-fetch.ts
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
/**
|
||||||
|
* Payload Fetch Handler
|
||||||
|
*
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing.
|
||||||
|
*
|
||||||
|
* This handler ONLY:
|
||||||
|
* 1. Hits Dutchie GraphQL API
|
||||||
|
* 2. Saves raw payload to filesystem (gzipped)
|
||||||
|
* 3. Records metadata in raw_crawl_payloads table
|
||||||
|
* 4. Queues a product_refresh task to process the payload
|
||||||
|
*
|
||||||
|
* Benefits of separation:
|
||||||
|
* - Retry-friendly: If normalize fails, re-run refresh without re-crawling
|
||||||
|
* - Faster refreshes: Local file read vs network call
|
||||||
|
* - Replay-able: Run refresh against any historical payload
|
||||||
|
* - Less API pressure: Only this role hits Dutchie
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
|
import {
|
||||||
|
executeGraphQL,
|
||||||
|
startSession,
|
||||||
|
endSession,
|
||||||
|
GRAPHQL_HASHES,
|
||||||
|
DUTCHIE_CONFIG,
|
||||||
|
} from '../../platforms/dutchie';
|
||||||
|
import { saveRawPayload } from '../../utils/payload-storage';
|
||||||
|
import { taskService } from '../task-service';
|
||||||
|
|
||||||
|
export async function handlePayloadFetch(ctx: TaskContext): Promise<TaskResult> {
|
||||||
|
const { pool, task } = ctx;
|
||||||
|
const dispensaryId = task.dispensary_id;
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
return { success: false, error: 'No dispensary_id specified for payload_fetch task' };
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// ============================================================
|
||||||
|
// STEP 1: Load dispensary info
|
||||||
|
// ============================================================
|
||||||
|
const dispResult = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id, name, platform_dispensary_id, menu_url, menu_type, city, state
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1 AND crawl_enabled = true
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (dispResult.rows.length === 0) {
|
||||||
|
return { success: false, error: `Dispensary ${dispensaryId} not found or not crawl_enabled` };
|
||||||
|
}
|
||||||
|
|
||||||
|
const dispensary = dispResult.rows[0];
|
||||||
|
const platformId = dispensary.platform_dispensary_id;
|
||||||
|
|
||||||
|
if (!platformId) {
|
||||||
|
return { success: false, error: `Dispensary ${dispensaryId} has no platform_dispensary_id` };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract cName from menu_url
|
||||||
|
const cNameMatch = dispensary.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||||
|
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||||
|
|
||||||
|
console.log(`[PayloadFetch] Starting fetch for ${dispensary.name} (ID: ${dispensaryId})`);
|
||||||
|
console.log(`[PayloadFetch] Platform ID: ${platformId}, cName: ${cName}`);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 2: Start stealth session
|
||||||
|
// ============================================================
|
||||||
|
const session = startSession();
|
||||||
|
console.log(`[PayloadFetch] Session started: ${session.sessionId}`);
|
||||||
|
|
||||||
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 3: Fetch products via GraphQL (Status: 'All')
|
||||||
|
// ============================================================
|
||||||
|
const allProducts: any[] = [];
|
||||||
|
let page = 0;
|
||||||
|
let totalCount = 0;
|
||||||
|
const perPage = DUTCHIE_CONFIG.perPage;
|
||||||
|
const maxPages = DUTCHIE_CONFIG.maxPages;
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (page < maxPages) {
|
||||||
|
const variables = {
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId: platformId,
|
||||||
|
pricingType: 'rec',
|
||||||
|
Status: 'All',
|
||||||
|
types: [],
|
||||||
|
useCache: false,
|
||||||
|
isDefaultSort: true,
|
||||||
|
sortBy: 'popularSortIdx',
|
||||||
|
sortDirection: 1,
|
||||||
|
bypassOnlineThresholds: true,
|
||||||
|
isKioskMenu: false,
|
||||||
|
removeProductsBelowOptionThresholds: false,
|
||||||
|
},
|
||||||
|
page,
|
||||||
|
perPage,
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`[PayloadFetch] Fetching page ${page + 1}...`);
|
||||||
|
|
||||||
|
const result = await executeGraphQL(
|
||||||
|
'FilteredProducts',
|
||||||
|
variables,
|
||||||
|
GRAPHQL_HASHES.FilteredProducts,
|
||||||
|
{ cName, maxRetries: 3 }
|
||||||
|
);
|
||||||
|
|
||||||
|
const data = result?.data?.filteredProducts;
|
||||||
|
if (!data || !data.products) {
|
||||||
|
if (page === 0) {
|
||||||
|
throw new Error('No product data returned from GraphQL');
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const products = data.products;
|
||||||
|
allProducts.push(...products);
|
||||||
|
|
||||||
|
if (page === 0) {
|
||||||
|
totalCount = data.queryInfo?.totalCount || products.length;
|
||||||
|
console.log(`[PayloadFetch] Total products reported: ${totalCount}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allProducts.length >= totalCount || products.length < perPage) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
page++;
|
||||||
|
|
||||||
|
if (page < maxPages) {
|
||||||
|
await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (page % 5 === 0) {
|
||||||
|
await ctx.heartbeat();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[PayloadFetch] Fetched ${allProducts.length} products in ${page + 1} pages`);
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
endSession();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allProducts.length === 0) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: 'No products returned from GraphQL',
|
||||||
|
productsProcessed: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 4: Save raw payload to filesystem
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Metadata/Payload separation
|
||||||
|
// ============================================================
|
||||||
|
const rawPayload = {
|
||||||
|
dispensaryId,
|
||||||
|
platformId,
|
||||||
|
cName,
|
||||||
|
fetchedAt: new Date().toISOString(),
|
||||||
|
productCount: allProducts.length,
|
||||||
|
products: allProducts,
|
||||||
|
};
|
||||||
|
|
||||||
|
const payloadResult = await saveRawPayload(
|
||||||
|
pool,
|
||||||
|
dispensaryId,
|
||||||
|
rawPayload,
|
||||||
|
null, // crawl_run_id - not using crawl_runs in new system
|
||||||
|
allProducts.length
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`[PayloadFetch] Saved payload #${payloadResult.id} (${(payloadResult.sizeBytes / 1024).toFixed(1)}KB)`);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 5: Update dispensary last_fetch_at
|
||||||
|
// ============================================================
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET last_fetch_at = NOW()
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 6: Queue product_refresh task to process the payload
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Task chaining
|
||||||
|
// ============================================================
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_refresh',
|
||||||
|
dispensary_id: dispensaryId,
|
||||||
|
priority: task.priority || 0,
|
||||||
|
payload: { payload_id: payloadResult.id },
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`[PayloadFetch] Queued product_refresh task for payload #${payloadResult.id}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
payloadId: payloadResult.id,
|
||||||
|
productCount: allProducts.length,
|
||||||
|
sizeBytes: payloadResult.sizeBytes,
|
||||||
|
};
|
||||||
|
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
console.error(`[PayloadFetch] Error for dispensary ${dispensaryId}:`, errorMessage);
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: errorMessage,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
31
backend/src/tasks/handlers/product-discovery.ts
Normal file
31
backend/src/tasks/handlers/product-discovery.ts
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
/**
|
||||||
|
* Product Discovery Handler
|
||||||
|
*
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: Initial product fetch for newly discovered stores.
|
||||||
|
*
|
||||||
|
* Flow:
|
||||||
|
* 1. Triggered after store_discovery promotes a new dispensary
|
||||||
|
* 2. Chains to payload_fetch to get initial product data
|
||||||
|
* 3. payload_fetch chains to product_refresh for DB upsert
|
||||||
|
*
|
||||||
|
* Chaining:
|
||||||
|
* store_discovery → (newStoreIds) → product_discovery → payload_fetch → product_refresh
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
|
import { handlePayloadFetch } from './payload-fetch';
|
||||||
|
|
||||||
|
export async function handleProductDiscovery(ctx: TaskContext): Promise<TaskResult> {
|
||||||
|
const { task } = ctx;
|
||||||
|
const dispensaryId = task.dispensary_id;
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
return { success: false, error: 'No dispensary_id provided' };
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[ProductDiscovery] Starting initial product discovery for dispensary ${dispensaryId}`);
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Chain to payload_fetch for API → disk
|
||||||
|
// payload_fetch will then chain to product_refresh for disk → DB
|
||||||
|
return handlePayloadFetch(ctx);
|
||||||
|
}
|
||||||
301
backend/src/tasks/handlers/product-refresh.ts
Normal file
301
backend/src/tasks/handlers/product-refresh.ts
Normal file
@@ -0,0 +1,301 @@
|
|||||||
|
/**
|
||||||
|
* Product Refresh Handler
|
||||||
|
*
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: Processes a locally-stored payload.
|
||||||
|
*
|
||||||
|
* This handler reads from the filesystem (NOT the Dutchie API).
|
||||||
|
* The payload_fetch handler is responsible for API calls.
|
||||||
|
*
|
||||||
|
* Flow:
|
||||||
|
* 1. Load payload from filesystem (by payload_id or latest for dispensary)
|
||||||
|
* 2. Normalize data via DutchieNormalizer
|
||||||
|
* 3. Upsert to store_products and store_product_snapshots
|
||||||
|
* 4. Track missing products (increment consecutive_misses, mark OOS at 3)
|
||||||
|
* 5. Download new product images
|
||||||
|
*
|
||||||
|
* Benefits of separation:
|
||||||
|
* - Retry-friendly: If this fails, re-run without re-crawling
|
||||||
|
* - Replay-able: Run against any historical payload
|
||||||
|
* - Faster: Local file read vs network call
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
|
import { DutchieNormalizer } from '../../hydration/normalizers/dutchie';
|
||||||
|
import {
|
||||||
|
upsertStoreProducts,
|
||||||
|
createStoreProductSnapshots,
|
||||||
|
downloadProductImages,
|
||||||
|
} from '../../hydration/canonical-upsert';
|
||||||
|
import { loadRawPayloadById, getLatestPayload } from '../../utils/payload-storage';
|
||||||
|
|
||||||
|
const normalizer = new DutchieNormalizer();
|
||||||
|
|
||||||
|
export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult> {
|
||||||
|
const { pool, task } = ctx;
|
||||||
|
const dispensaryId = task.dispensary_id;
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
return { success: false, error: 'No dispensary_id specified for product_refresh task' };
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// ============================================================
|
||||||
|
// STEP 1: Load dispensary info
|
||||||
|
// ============================================================
|
||||||
|
const dispResult = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id, name, platform_dispensary_id, menu_url, menu_type, city, state
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (dispResult.rows.length === 0) {
|
||||||
|
return { success: false, error: `Dispensary ${dispensaryId} not found` };
|
||||||
|
}
|
||||||
|
|
||||||
|
const dispensary = dispResult.rows[0];
|
||||||
|
|
||||||
|
// Extract cName from menu_url for image storage context
|
||||||
|
const cNameMatch = dispensary.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||||
|
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||||
|
|
||||||
|
console.log(`[ProductRefresh] Starting refresh for ${dispensary.name} (ID: ${dispensaryId})`);
|
||||||
|
|
||||||
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 2: Load payload from filesystem
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Read local payload, not API
|
||||||
|
// ============================================================
|
||||||
|
let payloadData: any;
|
||||||
|
let payloadId: number;
|
||||||
|
|
||||||
|
// Check if specific payload_id was provided (from task chaining)
|
||||||
|
const taskPayload = task.payload as { payload_id?: number } | null;
|
||||||
|
|
||||||
|
if (taskPayload?.payload_id) {
|
||||||
|
// Load specific payload (from payload_fetch chaining)
|
||||||
|
const result = await loadRawPayloadById(pool, taskPayload.payload_id);
|
||||||
|
if (!result) {
|
||||||
|
return { success: false, error: `Payload ${taskPayload.payload_id} not found` };
|
||||||
|
}
|
||||||
|
payloadData = result.payload;
|
||||||
|
payloadId = result.metadata.id;
|
||||||
|
console.log(`[ProductRefresh] Loaded specific payload #${payloadId}`);
|
||||||
|
} else {
|
||||||
|
// Load latest payload for this dispensary
|
||||||
|
const result = await getLatestPayload(pool, dispensaryId);
|
||||||
|
if (!result) {
|
||||||
|
return { success: false, error: `No payload found for dispensary ${dispensaryId}` };
|
||||||
|
}
|
||||||
|
payloadData = result.payload;
|
||||||
|
payloadId = result.metadata.id;
|
||||||
|
console.log(`[ProductRefresh] Loaded latest payload #${payloadId} (${result.metadata.fetchedAt})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const allProducts = payloadData.products || [];
|
||||||
|
|
||||||
|
if (allProducts.length === 0) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: 'Payload contains no products',
|
||||||
|
payloadId,
|
||||||
|
productsProcessed: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[ProductRefresh] Processing ${allProducts.length} products from payload #${payloadId}`);
|
||||||
|
|
||||||
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 3: Normalize data
|
||||||
|
// ============================================================
|
||||||
|
console.log(`[ProductRefresh] Normalizing ${allProducts.length} products...`);
|
||||||
|
|
||||||
|
// Build RawPayload for the normalizer
|
||||||
|
const rawPayload = {
|
||||||
|
id: `refresh-${dispensaryId}-${Date.now()}`,
|
||||||
|
dispensary_id: dispensaryId,
|
||||||
|
crawl_run_id: null,
|
||||||
|
platform: 'dutchie',
|
||||||
|
payload_version: 1,
|
||||||
|
raw_json: { data: { filteredProducts: { products: allProducts } } },
|
||||||
|
product_count: allProducts.length,
|
||||||
|
pricing_type: 'dual',
|
||||||
|
crawl_mode: 'dual_mode',
|
||||||
|
fetched_at: new Date(),
|
||||||
|
processed: false,
|
||||||
|
normalized_at: null,
|
||||||
|
hydration_error: null,
|
||||||
|
hydration_attempts: 0,
|
||||||
|
created_at: new Date(),
|
||||||
|
};
|
||||||
|
|
||||||
|
const normalizationResult = normalizer.normalize(rawPayload);
|
||||||
|
|
||||||
|
if (normalizationResult.errors.length > 0) {
|
||||||
|
console.warn(`[ProductRefresh] Normalization warnings: ${normalizationResult.errors.map(e => e.message).join(', ')}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (normalizationResult.products.length === 0) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: 'Normalization produced no products',
|
||||||
|
payloadId,
|
||||||
|
productsProcessed: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[ProductRefresh] Normalized ${normalizationResult.products.length} products`);
|
||||||
|
|
||||||
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 4: Upsert to canonical tables
|
||||||
|
// ============================================================
|
||||||
|
console.log(`[ProductRefresh] Upserting to store_products...`);
|
||||||
|
|
||||||
|
const upsertResult = await upsertStoreProducts(
|
||||||
|
pool,
|
||||||
|
normalizationResult.products,
|
||||||
|
normalizationResult.pricing,
|
||||||
|
normalizationResult.availability
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`[ProductRefresh] Upserted: ${upsertResult.upserted} (${upsertResult.new} new, ${upsertResult.updated} updated)`);
|
||||||
|
|
||||||
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
// Create snapshots
|
||||||
|
console.log(`[ProductRefresh] Creating snapshots...`);
|
||||||
|
|
||||||
|
const snapshotsResult = await createStoreProductSnapshots(
|
||||||
|
pool,
|
||||||
|
dispensaryId,
|
||||||
|
normalizationResult.products,
|
||||||
|
normalizationResult.pricing,
|
||||||
|
normalizationResult.availability,
|
||||||
|
null // No crawl_run_id in new system
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`[ProductRefresh] Created ${snapshotsResult.created} snapshots`);
|
||||||
|
|
||||||
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 5: Track missing products (consecutive_misses logic)
|
||||||
|
// - Products in feed: reset consecutive_misses to 0
|
||||||
|
// - Products not in feed: increment consecutive_misses
|
||||||
|
// - At 3 consecutive misses: mark as OOS
|
||||||
|
// ============================================================
|
||||||
|
const currentProductIds = allProducts
|
||||||
|
.map((p: any) => p._id || p.id)
|
||||||
|
.filter(Boolean);
|
||||||
|
|
||||||
|
// Reset consecutive_misses for products that ARE in the feed
|
||||||
|
if (currentProductIds.length > 0) {
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE store_products
|
||||||
|
SET consecutive_misses = 0, last_seen_at = NOW()
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND provider = 'dutchie'
|
||||||
|
AND provider_product_id = ANY($2)
|
||||||
|
`, [dispensaryId, currentProductIds]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Increment consecutive_misses for products NOT in the feed
|
||||||
|
const incrementResult = await pool.query(`
|
||||||
|
UPDATE store_products
|
||||||
|
SET consecutive_misses = consecutive_misses + 1
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND provider = 'dutchie'
|
||||||
|
AND provider_product_id NOT IN (SELECT unnest($2::text[]))
|
||||||
|
AND consecutive_misses < 3
|
||||||
|
RETURNING id
|
||||||
|
`, [dispensaryId, currentProductIds]);
|
||||||
|
|
||||||
|
const incrementedCount = incrementResult.rowCount || 0;
|
||||||
|
if (incrementedCount > 0) {
|
||||||
|
console.log(`[ProductRefresh] Incremented consecutive_misses for ${incrementedCount} products`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark as OOS any products that hit 3 consecutive misses
|
||||||
|
const oosResult = await pool.query(`
|
||||||
|
UPDATE store_products
|
||||||
|
SET stock_status = 'oos', is_in_stock = false
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND provider = 'dutchie'
|
||||||
|
AND consecutive_misses >= 3
|
||||||
|
AND stock_status != 'oos'
|
||||||
|
RETURNING id
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
const markedOosCount = oosResult.rowCount || 0;
|
||||||
|
if (markedOosCount > 0) {
|
||||||
|
console.log(`[ProductRefresh] Marked ${markedOosCount} products as OOS (3+ consecutive misses)`);
|
||||||
|
}
|
||||||
|
|
||||||
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 6: Download images for new products
|
||||||
|
// ============================================================
|
||||||
|
if (upsertResult.productsNeedingImages.length > 0) {
|
||||||
|
console.log(`[ProductRefresh] Downloading images for ${upsertResult.productsNeedingImages.length} products...`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const dispensaryContext = {
|
||||||
|
stateCode: dispensary.state || 'AZ',
|
||||||
|
storeSlug: cName,
|
||||||
|
};
|
||||||
|
await downloadProductImages(
|
||||||
|
pool,
|
||||||
|
upsertResult.productsNeedingImages,
|
||||||
|
dispensaryContext
|
||||||
|
);
|
||||||
|
} catch (imgError: any) {
|
||||||
|
// Image download errors shouldn't fail the whole task
|
||||||
|
console.warn(`[ProductRefresh] Image download error (non-fatal): ${imgError.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 7: Update dispensary last_crawl_at
|
||||||
|
// ============================================================
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET last_crawl_at = NOW()
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 8: Mark payload as processed
|
||||||
|
// ============================================================
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE raw_crawl_payloads
|
||||||
|
SET processed_at = NOW()
|
||||||
|
WHERE id = $1
|
||||||
|
`, [payloadId]);
|
||||||
|
|
||||||
|
console.log(`[ProductRefresh] Completed ${dispensary.name}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
payloadId,
|
||||||
|
productsProcessed: normalizationResult.products.length,
|
||||||
|
snapshotsCreated: snapshotsResult.created,
|
||||||
|
newProducts: upsertResult.new,
|
||||||
|
updatedProducts: upsertResult.updated,
|
||||||
|
markedOos: markedOosCount,
|
||||||
|
};
|
||||||
|
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
console.error(`[ProductRefresh] Error for dispensary ${dispensaryId}:`, errorMessage);
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: errorMessage,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
86
backend/src/tasks/handlers/store-discovery.ts
Normal file
86
backend/src/tasks/handlers/store-discovery.ts
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
/**
|
||||||
|
* Store Discovery Handler
|
||||||
|
*
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: Discovers new stores and returns their IDs for task chaining.
|
||||||
|
*
|
||||||
|
* Flow:
|
||||||
|
* 1. For each active state, run Dutchie discovery
|
||||||
|
* 2. Discover locations via GraphQL
|
||||||
|
* 3. Auto-promote valid locations to dispensaries table
|
||||||
|
* 4. Return newStoreIds[] for chaining to payload_fetch
|
||||||
|
*
|
||||||
|
* Chaining:
|
||||||
|
* store_discovery → (returns newStoreIds) → payload_fetch → product_refresh
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { TaskContext, TaskResult } from '../task-worker';
|
||||||
|
import { discoverState } from '../../discovery';
|
||||||
|
|
||||||
|
export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult> {
|
||||||
|
const { pool, task } = ctx;
|
||||||
|
const platform = task.platform || 'dutchie';
|
||||||
|
|
||||||
|
console.log(`[StoreDiscovery] Starting discovery for platform: ${platform}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get states to discover
|
||||||
|
const statesResult = await pool.query(`
|
||||||
|
SELECT code FROM states WHERE is_active = true ORDER BY code
|
||||||
|
`);
|
||||||
|
const stateCodes = statesResult.rows.map(r => r.code);
|
||||||
|
|
||||||
|
if (stateCodes.length === 0) {
|
||||||
|
return { success: true, storesDiscovered: 0, newStoreIds: [], message: 'No active states to discover' };
|
||||||
|
}
|
||||||
|
|
||||||
|
let totalDiscovered = 0;
|
||||||
|
let totalPromoted = 0;
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Collect all new store IDs for task chaining
|
||||||
|
const allNewStoreIds: number[] = [];
|
||||||
|
|
||||||
|
// Run discovery for each state
|
||||||
|
for (const stateCode of stateCodes) {
|
||||||
|
// Heartbeat before each state
|
||||||
|
await ctx.heartbeat();
|
||||||
|
|
||||||
|
console.log(`[StoreDiscovery] Discovering stores in ${stateCode}...`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await discoverState(pool, stateCode);
|
||||||
|
totalDiscovered += result.totalLocationsFound || 0;
|
||||||
|
totalPromoted += result.totalLocationsUpserted || 0;
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Collect new IDs for chaining
|
||||||
|
if (result.newDispensaryIds && result.newDispensaryIds.length > 0) {
|
||||||
|
allNewStoreIds.push(...result.newDispensaryIds);
|
||||||
|
console.log(`[StoreDiscovery] ${stateCode}: ${result.newDispensaryIds.length} new stores`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[StoreDiscovery] ${stateCode}: found ${result.totalLocationsFound}, upserted ${result.totalLocationsUpserted}`);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
console.error(`[StoreDiscovery] Error discovering ${stateCode}:`, errorMessage);
|
||||||
|
// Continue with other states
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[StoreDiscovery] Complete: ${totalDiscovered} discovered, ${totalPromoted} promoted, ${allNewStoreIds.length} new stores`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
storesDiscovered: totalDiscovered,
|
||||||
|
storesPromoted: totalPromoted,
|
||||||
|
statesProcessed: stateCodes.length,
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
|
||||||
|
newStoreIds: allNewStoreIds,
|
||||||
|
};
|
||||||
|
} catch (error: unknown) {
|
||||||
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
|
console.error(`[StoreDiscovery] Error:`, errorMessage);
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
error: errorMessage,
|
||||||
|
newStoreIds: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
25
backend/src/tasks/index.ts
Normal file
25
backend/src/tasks/index.ts
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
/**
|
||||||
|
* Task Queue Module
|
||||||
|
*
|
||||||
|
* Exports task service, worker, and types for use throughout the application.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export {
|
||||||
|
taskService,
|
||||||
|
TaskRole,
|
||||||
|
TaskStatus,
|
||||||
|
WorkerTask,
|
||||||
|
CreateTaskParams,
|
||||||
|
CapacityMetrics,
|
||||||
|
TaskFilter,
|
||||||
|
} from './task-service';
|
||||||
|
|
||||||
|
export { TaskWorker, TaskContext, TaskResult } from './task-worker';
|
||||||
|
|
||||||
|
export {
|
||||||
|
handleProductRefresh,
|
||||||
|
handleProductDiscovery,
|
||||||
|
handleStoreDiscovery,
|
||||||
|
handleEntryPointDiscovery,
|
||||||
|
handleAnalyticsRefresh,
|
||||||
|
} from './handlers';
|
||||||
93
backend/src/tasks/start-pod.ts
Normal file
93
backend/src/tasks/start-pod.ts
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
#!/usr/bin/env npx tsx
|
||||||
|
/**
|
||||||
|
* Start Pod - Simulates a Kubernetes pod locally
|
||||||
|
*
|
||||||
|
* Starts 5 workers with a pod name from the predefined list.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/tasks/start-pod.ts <pod-index>
|
||||||
|
* npx tsx src/tasks/start-pod.ts 0 # Starts pod "Aethelgard" with 5 workers
|
||||||
|
* npx tsx src/tasks/start-pod.ts 1 # Starts pod "Xylos" with 5 workers
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { spawn } from 'child_process';
|
||||||
|
import path from 'path';
|
||||||
|
|
||||||
|
const POD_NAMES = [
|
||||||
|
'Aethelgard',
|
||||||
|
'Xylos',
|
||||||
|
'Kryll',
|
||||||
|
'Coriolis',
|
||||||
|
'Dimidium',
|
||||||
|
'Veridia',
|
||||||
|
'Zetani',
|
||||||
|
'Talos IV',
|
||||||
|
'Onyx',
|
||||||
|
'Celestia',
|
||||||
|
'Gormand',
|
||||||
|
'Betha',
|
||||||
|
'Ragnar',
|
||||||
|
'Syphon',
|
||||||
|
'Axiom',
|
||||||
|
'Nadir',
|
||||||
|
'Terra Nova',
|
||||||
|
'Acheron',
|
||||||
|
'Nexus',
|
||||||
|
'Vespera',
|
||||||
|
'Helios Prime',
|
||||||
|
'Oasis',
|
||||||
|
'Mordina',
|
||||||
|
'Cygnus',
|
||||||
|
'Umbra',
|
||||||
|
];
|
||||||
|
|
||||||
|
const WORKERS_PER_POD = 5;
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const podIndex = parseInt(process.argv[2] ?? '0', 10);
|
||||||
|
|
||||||
|
if (podIndex < 0 || podIndex >= POD_NAMES.length) {
|
||||||
|
console.error(`Invalid pod index: ${podIndex}. Must be 0-${POD_NAMES.length - 1}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const podName = POD_NAMES[podIndex];
|
||||||
|
console.log(`[Pod] Starting pod "${podName}" with ${WORKERS_PER_POD} workers...`);
|
||||||
|
|
||||||
|
const workerScript = path.join(__dirname, 'task-worker.ts');
|
||||||
|
const workers: ReturnType<typeof spawn>[] = [];
|
||||||
|
|
||||||
|
for (let i = 1; i <= WORKERS_PER_POD; i++) {
|
||||||
|
const workerId = `${podName}-worker-${i}`;
|
||||||
|
|
||||||
|
const worker = spawn('npx', ['tsx', workerScript], {
|
||||||
|
env: {
|
||||||
|
...process.env,
|
||||||
|
WORKER_ID: workerId,
|
||||||
|
POD_NAME: podName,
|
||||||
|
},
|
||||||
|
stdio: 'inherit',
|
||||||
|
});
|
||||||
|
|
||||||
|
workers.push(worker);
|
||||||
|
console.log(`[Pod] Started worker ${i}/${WORKERS_PER_POD}: ${workerId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle shutdown
|
||||||
|
const shutdown = () => {
|
||||||
|
console.log(`\n[Pod] Shutting down pod "${podName}"...`);
|
||||||
|
workers.forEach(w => w.kill('SIGTERM'));
|
||||||
|
setTimeout(() => process.exit(0), 2000);
|
||||||
|
};
|
||||||
|
|
||||||
|
process.on('SIGTERM', shutdown);
|
||||||
|
process.on('SIGINT', shutdown);
|
||||||
|
|
||||||
|
// Keep the process alive
|
||||||
|
await new Promise(() => {});
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(err => {
|
||||||
|
console.error('[Pod] Fatal error:', err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
37
backend/src/tasks/task-pool-state.ts
Normal file
37
backend/src/tasks/task-pool-state.ts
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
/**
|
||||||
|
* Task Pool State
|
||||||
|
*
|
||||||
|
* Shared state for task pool pause/resume functionality.
|
||||||
|
* This is kept separate to avoid circular dependencies between
|
||||||
|
* task-service.ts and routes/tasks.ts.
|
||||||
|
*
|
||||||
|
* State is in-memory and resets on server restart.
|
||||||
|
* By default, the pool is PAUSED (closed) - admin must explicitly start it.
|
||||||
|
* This prevents workers from immediately grabbing tasks on deploy before
|
||||||
|
* the system is ready.
|
||||||
|
*/
|
||||||
|
|
||||||
|
let taskPoolPaused = true;
|
||||||
|
|
||||||
|
export function isTaskPoolPaused(): boolean {
|
||||||
|
return taskPoolPaused;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function pauseTaskPool(): void {
|
||||||
|
taskPoolPaused = true;
|
||||||
|
console.log('[TaskPool] Task pool PAUSED - workers will not pick up new tasks');
|
||||||
|
}
|
||||||
|
|
||||||
|
export function resumeTaskPool(): void {
|
||||||
|
taskPoolPaused = false;
|
||||||
|
console.log('[TaskPool] Task pool RESUMED - workers can pick up tasks');
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getTaskPoolStatus(): { paused: boolean; message: string } {
|
||||||
|
return {
|
||||||
|
paused: taskPoolPaused,
|
||||||
|
message: taskPoolPaused
|
||||||
|
? 'Task pool is paused - workers will not pick up new tasks'
|
||||||
|
: 'Task pool is open - workers are picking up tasks',
|
||||||
|
};
|
||||||
|
}
|
||||||
624
backend/src/tasks/task-service.ts
Normal file
624
backend/src/tasks/task-service.ts
Normal file
@@ -0,0 +1,624 @@
|
|||||||
|
/**
|
||||||
|
* Task Service
|
||||||
|
*
|
||||||
|
* Central service for managing worker tasks with:
|
||||||
|
* - Atomic task claiming (per-store locking)
|
||||||
|
* - Task lifecycle management
|
||||||
|
* - Auto-chaining of related tasks
|
||||||
|
* - Capacity planning metrics
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
import { isTaskPoolPaused } from './task-pool-state';
|
||||||
|
|
||||||
|
// Helper to check if a table exists
|
||||||
|
async function tableExists(tableName: string): Promise<boolean> {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT FROM information_schema.tables
|
||||||
|
WHERE table_name = $1
|
||||||
|
) as exists
|
||||||
|
`, [tableName]);
|
||||||
|
return result.rows[0].exists;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Task roles
|
||||||
|
// payload_fetch: Hits Dutchie API, saves raw payload to filesystem
|
||||||
|
// product_refresh: Reads local payload, normalizes, upserts to DB
|
||||||
|
export type TaskRole =
|
||||||
|
| 'store_discovery'
|
||||||
|
| 'entry_point_discovery'
|
||||||
|
| 'product_discovery'
|
||||||
|
| 'payload_fetch' // NEW: Fetches from API, saves to disk
|
||||||
|
| 'product_refresh' // CHANGED: Now reads from local payload
|
||||||
|
| 'analytics_refresh';
|
||||||
|
|
||||||
|
export type TaskStatus =
|
||||||
|
| 'pending'
|
||||||
|
| 'claimed'
|
||||||
|
| 'running'
|
||||||
|
| 'completed'
|
||||||
|
| 'failed'
|
||||||
|
| 'stale';
|
||||||
|
|
||||||
|
export interface WorkerTask {
|
||||||
|
id: number;
|
||||||
|
role: TaskRole;
|
||||||
|
dispensary_id: number | null;
|
||||||
|
dispensary_name?: string; // JOINed from dispensaries
|
||||||
|
dispensary_slug?: string; // JOINed from dispensaries
|
||||||
|
platform: string | null;
|
||||||
|
status: TaskStatus;
|
||||||
|
priority: number;
|
||||||
|
scheduled_for: Date | null;
|
||||||
|
worker_id: string | null;
|
||||||
|
claimed_at: Date | null;
|
||||||
|
started_at: Date | null;
|
||||||
|
completed_at: Date | null;
|
||||||
|
last_heartbeat_at: Date | null;
|
||||||
|
result: Record<string, unknown> | null;
|
||||||
|
error_message: string | null;
|
||||||
|
retry_count: number;
|
||||||
|
max_retries: number;
|
||||||
|
payload: Record<string, unknown> | null; // Per TASK_WORKFLOW_2024-12-10.md: Task chaining data
|
||||||
|
created_at: Date;
|
||||||
|
updated_at: Date;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CreateTaskParams {
|
||||||
|
role: TaskRole;
|
||||||
|
dispensary_id?: number;
|
||||||
|
platform?: string;
|
||||||
|
priority?: number;
|
||||||
|
scheduled_for?: Date;
|
||||||
|
payload?: Record<string, unknown>; // Per TASK_WORKFLOW_2024-12-10.md: For task chaining data
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CapacityMetrics {
|
||||||
|
role: string;
|
||||||
|
pending_tasks: number;
|
||||||
|
ready_tasks: number;
|
||||||
|
claimed_tasks: number;
|
||||||
|
running_tasks: number;
|
||||||
|
completed_last_hour: number;
|
||||||
|
failed_last_hour: number;
|
||||||
|
active_workers: number;
|
||||||
|
avg_duration_sec: number | null;
|
||||||
|
tasks_per_worker_hour: number | null;
|
||||||
|
estimated_hours_to_drain: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TaskFilter {
|
||||||
|
role?: TaskRole;
|
||||||
|
status?: TaskStatus | TaskStatus[];
|
||||||
|
dispensary_id?: number;
|
||||||
|
worker_id?: string;
|
||||||
|
limit?: number;
|
||||||
|
offset?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
class TaskService {
|
||||||
|
/**
|
||||||
|
* Create a new task
|
||||||
|
*/
|
||||||
|
async createTask(params: CreateTaskParams): Promise<WorkerTask> {
|
||||||
|
const result = await pool.query(
|
||||||
|
`INSERT INTO worker_tasks (role, dispensary_id, platform, priority, scheduled_for, payload)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6)
|
||||||
|
RETURNING *`,
|
||||||
|
[
|
||||||
|
params.role,
|
||||||
|
params.dispensary_id ?? null,
|
||||||
|
params.platform ?? null,
|
||||||
|
params.priority ?? 0,
|
||||||
|
params.scheduled_for ?? null,
|
||||||
|
params.payload ? JSON.stringify(params.payload) : null,
|
||||||
|
]
|
||||||
|
);
|
||||||
|
return result.rows[0] as WorkerTask;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create multiple tasks in a batch
|
||||||
|
*/
|
||||||
|
async createTasks(tasks: CreateTaskParams[]): Promise<number> {
|
||||||
|
if (tasks.length === 0) return 0;
|
||||||
|
|
||||||
|
const values = tasks.map((t, i) => {
|
||||||
|
const base = i * 5;
|
||||||
|
return `($${base + 1}, $${base + 2}, $${base + 3}, $${base + 4}, $${base + 5})`;
|
||||||
|
});
|
||||||
|
|
||||||
|
const params = tasks.flatMap((t) => [
|
||||||
|
t.role,
|
||||||
|
t.dispensary_id ?? null,
|
||||||
|
t.platform ?? null,
|
||||||
|
t.priority ?? 0,
|
||||||
|
t.scheduled_for ?? null,
|
||||||
|
]);
|
||||||
|
|
||||||
|
const result = await pool.query(
|
||||||
|
`INSERT INTO worker_tasks (role, dispensary_id, platform, priority, scheduled_for)
|
||||||
|
VALUES ${values.join(', ')}
|
||||||
|
ON CONFLICT DO NOTHING`,
|
||||||
|
params
|
||||||
|
);
|
||||||
|
|
||||||
|
return result.rowCount ?? 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Claim a task atomically for a worker
|
||||||
|
* If role is null, claims ANY available task (role-agnostic worker)
|
||||||
|
* Returns null if task pool is paused.
|
||||||
|
*/
|
||||||
|
async claimTask(role: TaskRole | null, workerId: string): Promise<WorkerTask | null> {
|
||||||
|
// Check if task pool is paused - don't claim any tasks
|
||||||
|
if (isTaskPoolPaused()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (role) {
|
||||||
|
// Role-specific claiming - use the SQL function
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT * FROM claim_task($1, $2)`,
|
||||||
|
[role, workerId]
|
||||||
|
);
|
||||||
|
return (result.rows[0] as WorkerTask) || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Role-agnostic claiming - claim ANY pending task
|
||||||
|
const result = await pool.query(`
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = $1,
|
||||||
|
claimed_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_tasks
|
||||||
|
WHERE status = 'pending'
|
||||||
|
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||||
|
-- Exclude stores that already have an active task
|
||||||
|
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||||
|
SELECT dispensary_id FROM worker_tasks
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND dispensary_id IS NOT NULL
|
||||||
|
))
|
||||||
|
ORDER BY priority DESC, created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING *
|
||||||
|
`, [workerId]);
|
||||||
|
|
||||||
|
return (result.rows[0] as WorkerTask) || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark a task as running (worker started processing)
|
||||||
|
*/
|
||||||
|
async startTask(taskId: number): Promise<void> {
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE worker_tasks
|
||||||
|
SET status = 'running', started_at = NOW(), last_heartbeat_at = NOW()
|
||||||
|
WHERE id = $1`,
|
||||||
|
[taskId]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update heartbeat to prevent stale detection
|
||||||
|
*/
|
||||||
|
async heartbeat(taskId: number): Promise<void> {
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE worker_tasks
|
||||||
|
SET last_heartbeat_at = NOW()
|
||||||
|
WHERE id = $1 AND status = 'running'`,
|
||||||
|
[taskId]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark a task as completed
|
||||||
|
*/
|
||||||
|
async completeTask(taskId: number, result?: Record<string, unknown>): Promise<void> {
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE worker_tasks
|
||||||
|
SET status = 'completed', completed_at = NOW(), result = $2
|
||||||
|
WHERE id = $1`,
|
||||||
|
[taskId, result ? JSON.stringify(result) : null]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mark a task as failed, with auto-retry if under max_retries
|
||||||
|
* Returns true if task was re-queued for retry, false if permanently failed
|
||||||
|
*/
|
||||||
|
async failTask(taskId: number, errorMessage: string): Promise<boolean> {
|
||||||
|
// Get current retry state
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT retry_count, max_retries FROM worker_tasks WHERE id = $1`,
|
||||||
|
[taskId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { retry_count, max_retries } = result.rows[0];
|
||||||
|
const newRetryCount = (retry_count || 0) + 1;
|
||||||
|
|
||||||
|
if (newRetryCount < (max_retries || 3)) {
|
||||||
|
// Re-queue for retry - reset to pending with incremented retry_count
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE worker_tasks
|
||||||
|
SET status = 'pending',
|
||||||
|
worker_id = NULL,
|
||||||
|
claimed_at = NULL,
|
||||||
|
started_at = NULL,
|
||||||
|
retry_count = $2,
|
||||||
|
error_message = $3,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1`,
|
||||||
|
[taskId, newRetryCount, `Retry ${newRetryCount}: ${errorMessage}`]
|
||||||
|
);
|
||||||
|
console.log(`[TaskService] Task ${taskId} queued for retry ${newRetryCount}/${max_retries || 3}`);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Max retries exceeded - mark as permanently failed
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE worker_tasks
|
||||||
|
SET status = 'failed',
|
||||||
|
completed_at = NOW(),
|
||||||
|
retry_count = $2,
|
||||||
|
error_message = $3
|
||||||
|
WHERE id = $1`,
|
||||||
|
[taskId, newRetryCount, `Failed after ${newRetryCount} attempts: ${errorMessage}`]
|
||||||
|
);
|
||||||
|
console.log(`[TaskService] Task ${taskId} permanently failed after ${newRetryCount} attempts`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a task by ID
|
||||||
|
*/
|
||||||
|
async getTask(taskId: number): Promise<WorkerTask | null> {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT * FROM worker_tasks WHERE id = $1`,
|
||||||
|
[taskId]
|
||||||
|
);
|
||||||
|
return (result.rows[0] as WorkerTask) || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List tasks with filters
|
||||||
|
*/
|
||||||
|
async listTasks(filter: TaskFilter = {}): Promise<WorkerTask[]> {
|
||||||
|
// Return empty list if table doesn't exist
|
||||||
|
if (!await tableExists('worker_tasks')) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const conditions: string[] = [];
|
||||||
|
const params: (string | number | string[])[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (filter.role) {
|
||||||
|
conditions.push(`t.role = $${paramIndex++}`);
|
||||||
|
params.push(filter.role);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filter.status) {
|
||||||
|
if (Array.isArray(filter.status)) {
|
||||||
|
conditions.push(`t.status = ANY($${paramIndex++})`);
|
||||||
|
params.push(filter.status);
|
||||||
|
} else {
|
||||||
|
conditions.push(`t.status = $${paramIndex++}`);
|
||||||
|
params.push(filter.status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filter.dispensary_id) {
|
||||||
|
conditions.push(`t.dispensary_id = $${paramIndex++}`);
|
||||||
|
params.push(filter.dispensary_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filter.worker_id) {
|
||||||
|
conditions.push(`t.worker_id = $${paramIndex++}`);
|
||||||
|
params.push(filter.worker_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
||||||
|
const limit = filter.limit ?? 100;
|
||||||
|
const offset = filter.offset ?? 0;
|
||||||
|
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT
|
||||||
|
t.*,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.slug as dispensary_slug
|
||||||
|
FROM worker_tasks t
|
||||||
|
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
||||||
|
${whereClause}
|
||||||
|
ORDER BY t.created_at DESC
|
||||||
|
LIMIT ${limit} OFFSET ${offset}`,
|
||||||
|
params
|
||||||
|
);
|
||||||
|
|
||||||
|
return result.rows as WorkerTask[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get capacity metrics for all roles
|
||||||
|
*/
|
||||||
|
async getCapacityMetrics(): Promise<CapacityMetrics[]> {
|
||||||
|
// Return empty metrics if worker_tasks table doesn't exist
|
||||||
|
if (!await tableExists('worker_tasks')) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT * FROM v_worker_capacity`
|
||||||
|
);
|
||||||
|
return result.rows as CapacityMetrics[];
|
||||||
|
} catch {
|
||||||
|
// View may not exist
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get capacity metrics for a specific role
|
||||||
|
*/
|
||||||
|
async getRoleCapacity(role: TaskRole): Promise<CapacityMetrics | null> {
|
||||||
|
// Return null if worker_tasks table doesn't exist
|
||||||
|
if (!await tableExists('worker_tasks')) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT * FROM v_worker_capacity WHERE role = $1`,
|
||||||
|
[role]
|
||||||
|
);
|
||||||
|
return (result.rows[0] as CapacityMetrics) || null;
|
||||||
|
} catch {
|
||||||
|
// View may not exist
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recover stale tasks from dead workers
|
||||||
|
*/
|
||||||
|
async recoverStaleTasks(staleThresholdMinutes = 10): Promise<number> {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT recover_stale_tasks($1)`,
|
||||||
|
[staleThresholdMinutes]
|
||||||
|
);
|
||||||
|
return (result.rows[0] as { recover_stale_tasks: number })?.recover_stale_tasks ?? 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate daily resync tasks for all active stores
|
||||||
|
*/
|
||||||
|
async generateDailyResyncTasks(batchesPerDay = 6, date?: Date): Promise<number> {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT generate_resync_tasks($1, $2)`,
|
||||||
|
[batchesPerDay, date ?? new Date()]
|
||||||
|
);
|
||||||
|
return (result.rows[0] as { generate_resync_tasks: number })?.generate_resync_tasks ?? 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Chain next task after completion
|
||||||
|
* Called automatically when a task completes successfully
|
||||||
|
*
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: Task chaining flow:
|
||||||
|
*
|
||||||
|
* Discovery flow (new stores):
|
||||||
|
* store_discovery → product_discovery → payload_fetch → product_refresh
|
||||||
|
*
|
||||||
|
* Scheduled flow (existing stores):
|
||||||
|
* payload_fetch → product_refresh
|
||||||
|
*
|
||||||
|
* Note: entry_point_discovery is deprecated since platform_dispensary_id
|
||||||
|
* is now resolved during store promotion.
|
||||||
|
*/
|
||||||
|
async chainNextTask(completedTask: WorkerTask): Promise<WorkerTask | null> {
|
||||||
|
if (completedTask.status !== 'completed') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (completedTask.role) {
|
||||||
|
case 'store_discovery': {
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: New stores discovered -> create product_discovery tasks
|
||||||
|
// Skip entry_point_discovery since platform_dispensary_id is set during promotion
|
||||||
|
const newStoreIds = (completedTask.result as { newStoreIds?: number[] })?.newStoreIds;
|
||||||
|
if (newStoreIds && newStoreIds.length > 0) {
|
||||||
|
console.log(`[TaskService] Chaining ${newStoreIds.length} product_discovery tasks for new stores`);
|
||||||
|
for (const storeId of newStoreIds) {
|
||||||
|
await this.createTask({
|
||||||
|
role: 'product_discovery',
|
||||||
|
dispensary_id: storeId,
|
||||||
|
platform: completedTask.platform ?? undefined,
|
||||||
|
priority: 10, // High priority for new stores
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'entry_point_discovery': {
|
||||||
|
// DEPRECATED: Entry point resolution now happens during store promotion
|
||||||
|
// Kept for backward compatibility with any in-flight tasks
|
||||||
|
const success = (completedTask.result as { success?: boolean })?.success;
|
||||||
|
if (success && completedTask.dispensary_id) {
|
||||||
|
return this.createTask({
|
||||||
|
role: 'product_discovery',
|
||||||
|
dispensary_id: completedTask.dispensary_id,
|
||||||
|
platform: completedTask.platform ?? undefined,
|
||||||
|
priority: 10,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'product_discovery': {
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Product discovery chains internally to payload_fetch
|
||||||
|
// No external chaining needed - handleProductDiscovery calls handlePayloadFetch directly
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'payload_fetch': {
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: payload_fetch chains to product_refresh
|
||||||
|
// This is handled internally by the payload_fetch handler via taskService.createTask
|
||||||
|
// No external chaining needed here
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create store discovery task for a platform/state
|
||||||
|
*/
|
||||||
|
async createStoreDiscoveryTask(
|
||||||
|
platform: string,
|
||||||
|
stateCode?: string,
|
||||||
|
priority = 0
|
||||||
|
): Promise<WorkerTask> {
|
||||||
|
return this.createTask({
|
||||||
|
role: 'store_discovery',
|
||||||
|
platform,
|
||||||
|
priority,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create entry point discovery task for a specific store
|
||||||
|
*/
|
||||||
|
async createEntryPointTask(
|
||||||
|
dispensaryId: number,
|
||||||
|
platform: string,
|
||||||
|
priority = 10
|
||||||
|
): Promise<WorkerTask> {
|
||||||
|
return this.createTask({
|
||||||
|
role: 'entry_point_discovery',
|
||||||
|
dispensary_id: dispensaryId,
|
||||||
|
platform,
|
||||||
|
priority,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create product discovery task for a specific store
|
||||||
|
*/
|
||||||
|
async createProductDiscoveryTask(
|
||||||
|
dispensaryId: number,
|
||||||
|
platform: string,
|
||||||
|
priority = 10
|
||||||
|
): Promise<WorkerTask> {
|
||||||
|
return this.createTask({
|
||||||
|
role: 'product_discovery',
|
||||||
|
dispensary_id: dispensaryId,
|
||||||
|
platform,
|
||||||
|
priority,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get task counts by status for dashboard
|
||||||
|
*/
|
||||||
|
async getTaskCounts(): Promise<Record<TaskStatus, number>> {
|
||||||
|
const counts: Record<TaskStatus, number> = {
|
||||||
|
pending: 0,
|
||||||
|
claimed: 0,
|
||||||
|
running: 0,
|
||||||
|
completed: 0,
|
||||||
|
failed: 0,
|
||||||
|
stale: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Return empty counts if table doesn't exist
|
||||||
|
if (!await tableExists('worker_tasks')) {
|
||||||
|
return counts;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT status, COUNT(*) as count
|
||||||
|
FROM worker_tasks
|
||||||
|
GROUP BY status`
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const row of result.rows) {
|
||||||
|
const typedRow = row as { status: TaskStatus; count: string };
|
||||||
|
counts[typedRow.status] = parseInt(typedRow.count, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
return counts;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get recent task completions for a role
|
||||||
|
*/
|
||||||
|
async getRecentCompletions(role: TaskRole, limit = 10): Promise<WorkerTask[]> {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT * FROM worker_tasks
|
||||||
|
WHERE role = $1 AND status = 'completed'
|
||||||
|
ORDER BY completed_at DESC
|
||||||
|
LIMIT $2`,
|
||||||
|
[role, limit]
|
||||||
|
);
|
||||||
|
return result.rows as WorkerTask[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a store has any active tasks
|
||||||
|
*/
|
||||||
|
async hasActiveTask(dispensaryId: number): Promise<boolean> {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT EXISTS(
|
||||||
|
SELECT 1 FROM worker_tasks
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND status IN ('claimed', 'running')
|
||||||
|
) as exists`,
|
||||||
|
[dispensaryId]
|
||||||
|
);
|
||||||
|
return (result.rows[0] as { exists: boolean })?.exists ?? false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the last completion time for a role
|
||||||
|
*/
|
||||||
|
async getLastCompletion(role: TaskRole): Promise<Date | null> {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT MAX(completed_at) as completed_at
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE role = $1 AND status = 'completed'`,
|
||||||
|
[role]
|
||||||
|
);
|
||||||
|
return (result.rows[0] as { completed_at: Date | null })?.completed_at ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate workers needed to complete tasks within SLA
|
||||||
|
*/
|
||||||
|
async calculateWorkersNeeded(role: TaskRole, slaHours: number): Promise<number> {
|
||||||
|
const capacity = await this.getRoleCapacity(role);
|
||||||
|
if (!capacity || !capacity.tasks_per_worker_hour) {
|
||||||
|
return 1; // Default to 1 worker if no data
|
||||||
|
}
|
||||||
|
|
||||||
|
const pendingTasks = capacity.pending_tasks;
|
||||||
|
const tasksPerWorkerHour = capacity.tasks_per_worker_hour;
|
||||||
|
const totalTaskCapacityNeeded = pendingTasks / slaHours;
|
||||||
|
|
||||||
|
return Math.ceil(totalTaskCapacityNeeded / tasksPerWorkerHour);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const taskService = new TaskService();
|
||||||
764
backend/src/tasks/task-worker.ts
Normal file
764
backend/src/tasks/task-worker.ts
Normal file
@@ -0,0 +1,764 @@
|
|||||||
|
/**
|
||||||
|
* Task Worker
|
||||||
|
*
|
||||||
|
* A unified worker that pulls tasks from the worker_tasks queue.
|
||||||
|
* Workers register on startup, get a friendly name, and pull tasks.
|
||||||
|
*
|
||||||
|
* Architecture:
|
||||||
|
* - Tasks are generated on schedule (by scheduler or API)
|
||||||
|
* - Workers PULL tasks from the pool (not assigned to them)
|
||||||
|
* - Tasks are claimed in order of priority (DESC) then creation time (ASC)
|
||||||
|
* - Workers report heartbeats to worker_registry
|
||||||
|
* - Workers are ROLE-AGNOSTIC by default (can handle any task type)
|
||||||
|
*
|
||||||
|
* Stealth & Anti-Detection:
|
||||||
|
* PROXIES ARE REQUIRED - workers will fail to start if no proxies available.
|
||||||
|
*
|
||||||
|
* On startup, workers initialize the CrawlRotator which provides:
|
||||||
|
* - Proxy rotation: Loads proxies from `proxies` table, ALL requests use proxy
|
||||||
|
* - User-Agent rotation: Cycles through realistic browser fingerprints
|
||||||
|
* - Fingerprint rotation: Changes browser profile on blocks
|
||||||
|
* - Locale/timezone: Matches Accept-Language to target state
|
||||||
|
*
|
||||||
|
* The CrawlRotator is wired to the Dutchie client via setCrawlRotator().
|
||||||
|
* Task handlers call startSession() which picks a random fingerprint.
|
||||||
|
* On 403 errors, the client automatically:
|
||||||
|
* 1. Records failure on current proxy
|
||||||
|
* 2. Rotates to next proxy
|
||||||
|
* 3. Rotates fingerprint
|
||||||
|
* 4. Retries the request
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/tasks/task-worker.ts # Role-agnostic (any task)
|
||||||
|
* WORKER_ROLE=product_refresh npx tsx src/tasks/task-worker.ts # Role-specific
|
||||||
|
*
|
||||||
|
* Environment:
|
||||||
|
* WORKER_ROLE - Which task role to process (optional, null = any task)
|
||||||
|
* WORKER_ID - Optional custom worker ID (auto-generated if not provided)
|
||||||
|
* POD_NAME - Kubernetes pod name (optional)
|
||||||
|
* POLL_INTERVAL_MS - How often to check for tasks (default: 5000)
|
||||||
|
* HEARTBEAT_INTERVAL_MS - How often to update heartbeat (default: 30000)
|
||||||
|
* API_BASE_URL - Backend API URL for registration (default: http://localhost:3010)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import { v4 as uuidv4 } from 'uuid';
|
||||||
|
import { taskService, TaskRole, WorkerTask } from './task-service';
|
||||||
|
import { getPool } from '../db/pool';
|
||||||
|
import os from 'os';
|
||||||
|
|
||||||
|
// Stealth/rotation support
|
||||||
|
import { CrawlRotator } from '../services/crawl-rotator';
|
||||||
|
import { setCrawlRotator } from '../platforms/dutchie';
|
||||||
|
|
||||||
|
// Task handlers by role
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: payload_fetch and product_refresh are now separate
|
||||||
|
import { handlePayloadFetch } from './handlers/payload-fetch';
|
||||||
|
import { handleProductRefresh } from './handlers/product-refresh';
|
||||||
|
import { handleProductDiscovery } from './handlers/product-discovery';
|
||||||
|
import { handleStoreDiscovery } from './handlers/store-discovery';
|
||||||
|
import { handleEntryPointDiscovery } from './handlers/entry-point-discovery';
|
||||||
|
import { handleAnalyticsRefresh } from './handlers/analytics-refresh';
|
||||||
|
|
||||||
|
const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000');
|
||||||
|
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000');
|
||||||
|
const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:3010';
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// CONCURRENT TASK PROCESSING SETTINGS
|
||||||
|
// =============================================================================
|
||||||
|
// Workers can process multiple tasks simultaneously using async I/O.
|
||||||
|
// This improves throughput for I/O-bound tasks (network calls, DB queries).
|
||||||
|
//
|
||||||
|
// Resource thresholds trigger "backoff" - the worker stops claiming new tasks
|
||||||
|
// but continues processing existing ones until resources return to normal.
|
||||||
|
//
|
||||||
|
// See: docs/WORKER_TASK_ARCHITECTURE.md#concurrent-task-processing
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
// Maximum number of tasks this worker will run concurrently
|
||||||
|
// Tune based on workload: I/O-bound tasks benefit from higher concurrency
|
||||||
|
const MAX_CONCURRENT_TASKS = parseInt(process.env.MAX_CONCURRENT_TASKS || '3');
|
||||||
|
|
||||||
|
// When heap memory usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
|
||||||
|
// Default 85% - gives headroom before OOM
|
||||||
|
const MEMORY_BACKOFF_THRESHOLD = parseFloat(process.env.MEMORY_BACKOFF_THRESHOLD || '0.85');
|
||||||
|
|
||||||
|
// When CPU usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
|
||||||
|
// Default 90% - allows some burst capacity
|
||||||
|
const CPU_BACKOFF_THRESHOLD = parseFloat(process.env.CPU_BACKOFF_THRESHOLD || '0.90');
|
||||||
|
|
||||||
|
// How long to wait (ms) when in backoff state before rechecking resources
|
||||||
|
const BACKOFF_DURATION_MS = parseInt(process.env.BACKOFF_DURATION_MS || '10000');
|
||||||
|
|
||||||
|
export interface TaskContext {
|
||||||
|
pool: Pool;
|
||||||
|
workerId: string;
|
||||||
|
task: WorkerTask;
|
||||||
|
heartbeat: () => Promise<void>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TaskResult {
|
||||||
|
success: boolean;
|
||||||
|
productsProcessed?: number;
|
||||||
|
snapshotsCreated?: number;
|
||||||
|
storesDiscovered?: number;
|
||||||
|
error?: string;
|
||||||
|
[key: string]: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
type TaskHandler = (ctx: TaskContext) => Promise<TaskResult>;
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Handler registry
|
||||||
|
// payload_fetch: Fetches from Dutchie API, saves to disk, chains to product_refresh
|
||||||
|
// product_refresh: Reads local payload, normalizes, upserts to DB
|
||||||
|
const TASK_HANDLERS: Record<TaskRole, TaskHandler> = {
|
||||||
|
payload_fetch: handlePayloadFetch, // NEW: API fetch -> disk
|
||||||
|
product_refresh: handleProductRefresh, // CHANGED: disk -> DB
|
||||||
|
product_discovery: handleProductDiscovery,
|
||||||
|
store_discovery: handleStoreDiscovery,
|
||||||
|
entry_point_discovery: handleEntryPointDiscovery,
|
||||||
|
analytics_refresh: handleAnalyticsRefresh,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resource usage stats reported to the registry and used for backoff decisions.
|
||||||
|
* These values are included in worker heartbeats and displayed in the UI.
|
||||||
|
*/
|
||||||
|
interface ResourceStats {
|
||||||
|
/** Current heap memory usage as decimal (0.0 to 1.0) */
|
||||||
|
memoryPercent: number;
|
||||||
|
/** Current heap used in MB */
|
||||||
|
memoryMb: number;
|
||||||
|
/** Total heap available in MB */
|
||||||
|
memoryTotalMb: number;
|
||||||
|
/** CPU usage percentage since last check (0 to 100) */
|
||||||
|
cpuPercent: number;
|
||||||
|
/** True if worker is currently in backoff state */
|
||||||
|
isBackingOff: boolean;
|
||||||
|
/** Reason for backoff (e.g., "Memory at 87.3% (threshold: 85%)") */
|
||||||
|
backoffReason: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class TaskWorker {
|
||||||
|
private pool: Pool;
|
||||||
|
private workerId: string;
|
||||||
|
private role: TaskRole | null; // null = role-agnostic (any task)
|
||||||
|
private friendlyName: string = '';
|
||||||
|
private isRunning: boolean = false;
|
||||||
|
private heartbeatInterval: NodeJS.Timeout | null = null;
|
||||||
|
private registryHeartbeatInterval: NodeJS.Timeout | null = null;
|
||||||
|
private crawlRotator: CrawlRotator;
|
||||||
|
|
||||||
|
// ==========================================================================
|
||||||
|
// CONCURRENT TASK TRACKING
|
||||||
|
// ==========================================================================
|
||||||
|
// activeTasks: Map of task ID -> task object for all currently running tasks
|
||||||
|
// taskPromises: Map of task ID -> Promise for cleanup when task completes
|
||||||
|
// maxConcurrentTasks: How many tasks this worker will run in parallel
|
||||||
|
// ==========================================================================
|
||||||
|
private activeTasks: Map<number, WorkerTask> = new Map();
|
||||||
|
private taskPromises: Map<number, Promise<void>> = new Map();
|
||||||
|
private maxConcurrentTasks: number = MAX_CONCURRENT_TASKS;
|
||||||
|
|
||||||
|
// ==========================================================================
|
||||||
|
// RESOURCE MONITORING FOR BACKOFF
|
||||||
|
// ==========================================================================
|
||||||
|
// CPU tracking uses differential measurement - we track last values and
|
||||||
|
// calculate percentage based on elapsed time since last check.
|
||||||
|
// ==========================================================================
|
||||||
|
private lastCpuUsage: { user: number; system: number } = { user: 0, system: 0 };
|
||||||
|
private lastCpuCheck: number = Date.now();
|
||||||
|
private isBackingOff: boolean = false;
|
||||||
|
private backoffReason: string | null = null;
|
||||||
|
|
||||||
|
constructor(role: TaskRole | null = null, workerId?: string) {
|
||||||
|
this.pool = getPool();
|
||||||
|
this.role = role;
|
||||||
|
this.workerId = workerId || `worker-${uuidv4().slice(0, 8)}`;
|
||||||
|
this.crawlRotator = new CrawlRotator(this.pool);
|
||||||
|
|
||||||
|
// Initialize CPU tracking
|
||||||
|
const cpuUsage = process.cpuUsage();
|
||||||
|
this.lastCpuUsage = { user: cpuUsage.user, system: cpuUsage.system };
|
||||||
|
this.lastCpuCheck = Date.now();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current resource usage
|
||||||
|
*/
|
||||||
|
private getResourceStats(): ResourceStats {
|
||||||
|
const memUsage = process.memoryUsage();
|
||||||
|
const heapUsedMb = memUsage.heapUsed / 1024 / 1024;
|
||||||
|
const heapTotalMb = memUsage.heapTotal / 1024 / 1024;
|
||||||
|
const memoryPercent = heapUsedMb / heapTotalMb;
|
||||||
|
|
||||||
|
// Calculate CPU usage since last check
|
||||||
|
const cpuUsage = process.cpuUsage();
|
||||||
|
const now = Date.now();
|
||||||
|
const elapsed = now - this.lastCpuCheck;
|
||||||
|
|
||||||
|
let cpuPercent = 0;
|
||||||
|
if (elapsed > 0) {
|
||||||
|
const userDiff = (cpuUsage.user - this.lastCpuUsage.user) / 1000; // microseconds to ms
|
||||||
|
const systemDiff = (cpuUsage.system - this.lastCpuUsage.system) / 1000;
|
||||||
|
cpuPercent = ((userDiff + systemDiff) / elapsed) * 100;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update last values
|
||||||
|
this.lastCpuUsage = { user: cpuUsage.user, system: cpuUsage.system };
|
||||||
|
this.lastCpuCheck = now;
|
||||||
|
|
||||||
|
return {
|
||||||
|
memoryPercent,
|
||||||
|
memoryMb: Math.round(heapUsedMb),
|
||||||
|
memoryTotalMb: Math.round(heapTotalMb),
|
||||||
|
cpuPercent: Math.min(100, cpuPercent), // Cap at 100%
|
||||||
|
isBackingOff: this.isBackingOff,
|
||||||
|
backoffReason: this.backoffReason,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if we should back off from taking new tasks
|
||||||
|
*/
|
||||||
|
private shouldBackOff(): { backoff: boolean; reason: string | null } {
|
||||||
|
const stats = this.getResourceStats();
|
||||||
|
|
||||||
|
if (stats.memoryPercent > MEMORY_BACKOFF_THRESHOLD) {
|
||||||
|
return { backoff: true, reason: `Memory at ${(stats.memoryPercent * 100).toFixed(1)}% (threshold: ${MEMORY_BACKOFF_THRESHOLD * 100}%)` };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stats.cpuPercent > CPU_BACKOFF_THRESHOLD * 100) {
|
||||||
|
return { backoff: true, reason: `CPU at ${stats.cpuPercent.toFixed(1)}% (threshold: ${CPU_BACKOFF_THRESHOLD * 100}%)` };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { backoff: false, reason: null };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get count of currently running tasks
|
||||||
|
*/
|
||||||
|
get activeTaskCount(): number {
|
||||||
|
return this.activeTasks.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if we can accept more tasks
|
||||||
|
*/
|
||||||
|
private canAcceptMoreTasks(): boolean {
|
||||||
|
return this.activeTasks.size < this.maxConcurrentTasks;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize stealth systems (proxy rotation, fingerprints)
|
||||||
|
* Called once on worker startup before processing any tasks.
|
||||||
|
*
|
||||||
|
* IMPORTANT: Proxies are REQUIRED. Workers will wait until proxies are available.
|
||||||
|
* Workers listen for PostgreSQL NOTIFY 'proxy_added' to wake up immediately when proxies are added.
|
||||||
|
*/
|
||||||
|
private async initializeStealth(): Promise<void> {
|
||||||
|
const MAX_WAIT_MINUTES = 60;
|
||||||
|
const POLL_INTERVAL_MS = 30000; // 30 seconds fallback polling
|
||||||
|
const maxAttempts = (MAX_WAIT_MINUTES * 60 * 1000) / POLL_INTERVAL_MS;
|
||||||
|
let attempts = 0;
|
||||||
|
let notifyClient: any = null;
|
||||||
|
|
||||||
|
// Set up PostgreSQL LISTEN for proxy notifications
|
||||||
|
try {
|
||||||
|
notifyClient = await this.pool.connect();
|
||||||
|
await notifyClient.query('LISTEN proxy_added');
|
||||||
|
console.log(`[TaskWorker] Listening for proxy_added notifications...`);
|
||||||
|
} catch (err: any) {
|
||||||
|
console.log(`[TaskWorker] Could not set up LISTEN (will poll): ${err.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a promise that resolves when notified
|
||||||
|
let notifyResolve: (() => void) | null = null;
|
||||||
|
if (notifyClient) {
|
||||||
|
notifyClient.on('notification', (msg: any) => {
|
||||||
|
if (msg.channel === 'proxy_added') {
|
||||||
|
console.log(`[TaskWorker] Received proxy_added notification!`);
|
||||||
|
if (notifyResolve) notifyResolve();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (attempts < maxAttempts) {
|
||||||
|
try {
|
||||||
|
// Load proxies from database
|
||||||
|
await this.crawlRotator.initialize();
|
||||||
|
|
||||||
|
const stats = this.crawlRotator.proxy.getStats();
|
||||||
|
if (stats.activeProxies > 0) {
|
||||||
|
console.log(`[TaskWorker] Loaded ${stats.activeProxies} proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
|
||||||
|
|
||||||
|
// Wire rotator to Dutchie client - proxies will be used for ALL requests
|
||||||
|
setCrawlRotator(this.crawlRotator);
|
||||||
|
|
||||||
|
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, proxy REQUIRED for all requests`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
attempts++;
|
||||||
|
console.log(`[TaskWorker] No active proxies available (attempt ${attempts}). Waiting for proxies...`);
|
||||||
|
|
||||||
|
// Wait for either notification or timeout
|
||||||
|
await new Promise<void>((resolve) => {
|
||||||
|
notifyResolve = resolve;
|
||||||
|
setTimeout(resolve, POLL_INTERVAL_MS);
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
attempts++;
|
||||||
|
console.log(`[TaskWorker] Error loading proxies (attempt ${attempts}): ${error.message}. Retrying...`);
|
||||||
|
await this.sleep(POLL_INTERVAL_MS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`No active proxies available after waiting ${MAX_WAIT_MINUTES} minutes. Add proxies to the database.`);
|
||||||
|
} finally {
|
||||||
|
// Clean up LISTEN connection
|
||||||
|
if (notifyClient) {
|
||||||
|
try {
|
||||||
|
await notifyClient.query('UNLISTEN proxy_added');
|
||||||
|
notifyClient.release();
|
||||||
|
} catch {
|
||||||
|
// Ignore cleanup errors
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register worker with the registry (get friendly name)
|
||||||
|
*/
|
||||||
|
private async register(): Promise<void> {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${API_BASE_URL}/api/worker-registry/register`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
role: this.role,
|
||||||
|
worker_id: this.workerId,
|
||||||
|
pod_name: process.env.POD_NAME || process.env.HOSTNAME,
|
||||||
|
hostname: os.hostname(),
|
||||||
|
metadata: {
|
||||||
|
pid: process.pid,
|
||||||
|
node_version: process.version,
|
||||||
|
started_at: new Date().toISOString()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
if (data.success) {
|
||||||
|
this.friendlyName = data.friendly_name;
|
||||||
|
console.log(`[TaskWorker] ${data.message}`);
|
||||||
|
} else {
|
||||||
|
console.warn(`[TaskWorker] Registration warning: ${data.error}`);
|
||||||
|
this.friendlyName = this.workerId.slice(0, 12);
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
// Registration is optional - worker can still function without it
|
||||||
|
console.warn(`[TaskWorker] Could not register with API (will continue): ${error.message}`);
|
||||||
|
this.friendlyName = this.workerId.slice(0, 12);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deregister worker from the registry
|
||||||
|
*/
|
||||||
|
private async deregister(): Promise<void> {
|
||||||
|
try {
|
||||||
|
await fetch(`${API_BASE_URL}/api/worker-registry/deregister`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ worker_id: this.workerId })
|
||||||
|
});
|
||||||
|
console.log(`[TaskWorker] ${this.friendlyName} signed off`);
|
||||||
|
} catch {
|
||||||
|
// Ignore deregistration errors
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Send heartbeat to registry with resource usage and proxy location
|
||||||
|
*/
|
||||||
|
private async sendRegistryHeartbeat(): Promise<void> {
|
||||||
|
try {
|
||||||
|
const memUsage = process.memoryUsage();
|
||||||
|
const cpuUsage = process.cpuUsage();
|
||||||
|
const proxyLocation = this.crawlRotator.getProxyLocation();
|
||||||
|
const resourceStats = this.getResourceStats();
|
||||||
|
|
||||||
|
// Get array of active task IDs
|
||||||
|
const activeTaskIds = Array.from(this.activeTasks.keys());
|
||||||
|
|
||||||
|
await fetch(`${API_BASE_URL}/api/worker-registry/heartbeat`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
worker_id: this.workerId,
|
||||||
|
current_task_id: activeTaskIds[0] || null, // Primary task for backwards compat
|
||||||
|
current_task_ids: activeTaskIds, // All active tasks
|
||||||
|
active_task_count: this.activeTasks.size,
|
||||||
|
max_concurrent_tasks: this.maxConcurrentTasks,
|
||||||
|
status: this.activeTasks.size > 0 ? 'active' : 'idle',
|
||||||
|
resources: {
|
||||||
|
memory_mb: Math.round(memUsage.heapUsed / 1024 / 1024),
|
||||||
|
memory_total_mb: Math.round(memUsage.heapTotal / 1024 / 1024),
|
||||||
|
memory_rss_mb: Math.round(memUsage.rss / 1024 / 1024),
|
||||||
|
memory_percent: Math.round(resourceStats.memoryPercent * 100),
|
||||||
|
cpu_user_ms: Math.round(cpuUsage.user / 1000),
|
||||||
|
cpu_system_ms: Math.round(cpuUsage.system / 1000),
|
||||||
|
cpu_percent: Math.round(resourceStats.cpuPercent),
|
||||||
|
proxy_location: proxyLocation,
|
||||||
|
is_backing_off: this.isBackingOff,
|
||||||
|
backoff_reason: this.backoffReason,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
// Ignore heartbeat errors
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Report task completion to registry
|
||||||
|
*/
|
||||||
|
private async reportTaskCompletion(success: boolean): Promise<void> {
|
||||||
|
try {
|
||||||
|
await fetch(`${API_BASE_URL}/api/worker-registry/task-completed`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
worker_id: this.workerId,
|
||||||
|
success
|
||||||
|
})
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
// Ignore errors
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start registry heartbeat interval
|
||||||
|
*/
|
||||||
|
private startRegistryHeartbeat(): void {
|
||||||
|
this.registryHeartbeatInterval = setInterval(async () => {
|
||||||
|
await this.sendRegistryHeartbeat();
|
||||||
|
}, HEARTBEAT_INTERVAL_MS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop registry heartbeat interval
|
||||||
|
*/
|
||||||
|
private stopRegistryHeartbeat(): void {
|
||||||
|
if (this.registryHeartbeatInterval) {
|
||||||
|
clearInterval(this.registryHeartbeatInterval);
|
||||||
|
this.registryHeartbeatInterval = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start the worker loop
|
||||||
|
*/
|
||||||
|
async start(): Promise<void> {
|
||||||
|
this.isRunning = true;
|
||||||
|
|
||||||
|
// Initialize stealth systems (proxy rotation, fingerprints)
|
||||||
|
await this.initializeStealth();
|
||||||
|
|
||||||
|
// Register with the API to get a friendly name
|
||||||
|
await this.register();
|
||||||
|
|
||||||
|
// Start registry heartbeat
|
||||||
|
this.startRegistryHeartbeat();
|
||||||
|
|
||||||
|
const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)';
|
||||||
|
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (max ${this.maxConcurrentTasks} concurrent tasks)`);
|
||||||
|
|
||||||
|
while (this.isRunning) {
|
||||||
|
try {
|
||||||
|
await this.mainLoop();
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`[TaskWorker] Loop error:`, error.message);
|
||||||
|
await this.sleep(POLL_INTERVAL_MS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for any remaining tasks to complete
|
||||||
|
if (this.taskPromises.size > 0) {
|
||||||
|
console.log(`[TaskWorker] Waiting for ${this.taskPromises.size} active tasks to complete...`);
|
||||||
|
await Promise.allSettled(this.taskPromises.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[TaskWorker] Worker ${this.workerId} stopped`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main loop - tries to fill up to maxConcurrentTasks
|
||||||
|
*/
|
||||||
|
private async mainLoop(): Promise<void> {
|
||||||
|
// Check resource usage and backoff if needed
|
||||||
|
const { backoff, reason } = this.shouldBackOff();
|
||||||
|
if (backoff) {
|
||||||
|
if (!this.isBackingOff) {
|
||||||
|
console.log(`[TaskWorker] ${this.friendlyName} backing off: ${reason}`);
|
||||||
|
}
|
||||||
|
this.isBackingOff = true;
|
||||||
|
this.backoffReason = reason;
|
||||||
|
await this.sleep(BACKOFF_DURATION_MS);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear backoff state
|
||||||
|
if (this.isBackingOff) {
|
||||||
|
console.log(`[TaskWorker] ${this.friendlyName} resuming normal operation`);
|
||||||
|
this.isBackingOff = false;
|
||||||
|
this.backoffReason = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for decommission signal
|
||||||
|
const shouldDecommission = await this.checkDecommission();
|
||||||
|
if (shouldDecommission) {
|
||||||
|
console.log(`[TaskWorker] ${this.friendlyName} received decommission signal - waiting for ${this.activeTasks.size} tasks to complete`);
|
||||||
|
// Stop accepting new tasks, wait for current to finish
|
||||||
|
this.isRunning = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to claim more tasks if we have capacity
|
||||||
|
if (this.canAcceptMoreTasks()) {
|
||||||
|
const task = await taskService.claimTask(this.role, this.workerId);
|
||||||
|
|
||||||
|
if (task) {
|
||||||
|
console.log(`[TaskWorker] ${this.friendlyName} claimed task ${task.id} (${task.role}) [${this.activeTasks.size + 1}/${this.maxConcurrentTasks}]`);
|
||||||
|
this.activeTasks.set(task.id, task);
|
||||||
|
|
||||||
|
// Start task in background (don't await)
|
||||||
|
const taskPromise = this.executeTask(task);
|
||||||
|
this.taskPromises.set(task.id, taskPromise);
|
||||||
|
|
||||||
|
// Clean up when done
|
||||||
|
taskPromise.finally(() => {
|
||||||
|
this.activeTasks.delete(task.id);
|
||||||
|
this.taskPromises.delete(task.id);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Immediately try to claim more tasks (don't wait for poll interval)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// No task claimed or at capacity - wait before next poll
|
||||||
|
await this.sleep(POLL_INTERVAL_MS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop the worker
|
||||||
|
*/
|
||||||
|
async stop(): Promise<void> {
|
||||||
|
this.isRunning = false;
|
||||||
|
this.stopHeartbeat();
|
||||||
|
this.stopRegistryHeartbeat();
|
||||||
|
await this.deregister();
|
||||||
|
console.log(`[TaskWorker] ${this.friendlyName} stopped`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute a single task (runs concurrently with other tasks)
|
||||||
|
*/
|
||||||
|
private async executeTask(task: WorkerTask): Promise<void> {
|
||||||
|
console.log(`[TaskWorker] ${this.friendlyName} starting task ${task.id} (${task.role}) for dispensary ${task.dispensary_id || 'N/A'}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Mark as running
|
||||||
|
await taskService.startTask(task.id);
|
||||||
|
|
||||||
|
// Get handler for this role
|
||||||
|
const handler = TASK_HANDLERS[task.role];
|
||||||
|
if (!handler) {
|
||||||
|
throw new Error(`No handler registered for role: ${task.role}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create context
|
||||||
|
const ctx: TaskContext = {
|
||||||
|
pool: this.pool,
|
||||||
|
workerId: this.workerId,
|
||||||
|
task,
|
||||||
|
heartbeat: async () => {
|
||||||
|
await taskService.heartbeat(task.id);
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Execute the task
|
||||||
|
const result = await handler(ctx);
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
// Mark as completed
|
||||||
|
await taskService.completeTask(task.id, result);
|
||||||
|
await this.reportTaskCompletion(true);
|
||||||
|
console.log(`[TaskWorker] ${this.friendlyName} completed task ${task.id} [${this.activeTasks.size}/${this.maxConcurrentTasks} active]`);
|
||||||
|
|
||||||
|
// Chain next task if applicable
|
||||||
|
const chainedTask = await taskService.chainNextTask({
|
||||||
|
...task,
|
||||||
|
status: 'completed',
|
||||||
|
result,
|
||||||
|
});
|
||||||
|
if (chainedTask) {
|
||||||
|
console.log(`[TaskWorker] Chained new task ${chainedTask.id} (${chainedTask.role})`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Mark as failed
|
||||||
|
await taskService.failTask(task.id, result.error || 'Unknown error');
|
||||||
|
await this.reportTaskCompletion(false);
|
||||||
|
console.log(`[TaskWorker] ${this.friendlyName} failed task ${task.id}: ${result.error}`);
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
// Mark as failed
|
||||||
|
await taskService.failTask(task.id, error.message);
|
||||||
|
await this.reportTaskCompletion(false);
|
||||||
|
console.error(`[TaskWorker] ${this.friendlyName} task ${task.id} error:`, error.message);
|
||||||
|
}
|
||||||
|
// Note: cleanup (removing from activeTasks) is handled in mainLoop's finally block
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if this worker has been flagged for decommission
|
||||||
|
* Returns true if worker should stop after current task
|
||||||
|
*/
|
||||||
|
private async checkDecommission(): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
// Check worker_registry for decommission flag
|
||||||
|
const result = await this.pool.query(
|
||||||
|
`SELECT decommission_requested, decommission_reason
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE worker_id = $1`,
|
||||||
|
[this.workerId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length > 0 && result.rows[0].decommission_requested) {
|
||||||
|
const reason = result.rows[0].decommission_reason || 'No reason provided';
|
||||||
|
console.log(`[TaskWorker] Decommission requested: ${reason}`);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
} catch (error: any) {
|
||||||
|
// If we can't check, continue running
|
||||||
|
console.warn(`[TaskWorker] Could not check decommission status: ${error.message}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start heartbeat interval
|
||||||
|
*/
|
||||||
|
private startHeartbeat(taskId: number): void {
|
||||||
|
this.heartbeatInterval = setInterval(async () => {
|
||||||
|
try {
|
||||||
|
await taskService.heartbeat(taskId);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.warn(`[TaskWorker] Heartbeat failed:`, error.message);
|
||||||
|
}
|
||||||
|
}, HEARTBEAT_INTERVAL_MS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop heartbeat interval
|
||||||
|
*/
|
||||||
|
private stopHeartbeat(): void {
|
||||||
|
if (this.heartbeatInterval) {
|
||||||
|
clearInterval(this.heartbeatInterval);
|
||||||
|
this.heartbeatInterval = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sleep helper
|
||||||
|
*/
|
||||||
|
private sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get worker info
|
||||||
|
*/
|
||||||
|
getInfo(): {
|
||||||
|
workerId: string;
|
||||||
|
role: TaskRole | null;
|
||||||
|
isRunning: boolean;
|
||||||
|
activeTaskIds: number[];
|
||||||
|
activeTaskCount: number;
|
||||||
|
maxConcurrentTasks: number;
|
||||||
|
isBackingOff: boolean;
|
||||||
|
backoffReason: string | null;
|
||||||
|
} {
|
||||||
|
return {
|
||||||
|
workerId: this.workerId,
|
||||||
|
role: this.role,
|
||||||
|
isRunning: this.isRunning,
|
||||||
|
activeTaskIds: Array.from(this.activeTasks.keys()),
|
||||||
|
activeTaskCount: this.activeTasks.size,
|
||||||
|
maxConcurrentTasks: this.maxConcurrentTasks,
|
||||||
|
isBackingOff: this.isBackingOff,
|
||||||
|
backoffReason: this.backoffReason,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// CLI ENTRY POINT
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const role = process.env.WORKER_ROLE as TaskRole | undefined;
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Valid task roles
|
||||||
|
const validRoles: TaskRole[] = [
|
||||||
|
'store_discovery',
|
||||||
|
'entry_point_discovery',
|
||||||
|
'product_discovery',
|
||||||
|
'payload_fetch', // NEW: Fetches from API, saves to disk
|
||||||
|
'product_refresh', // CHANGED: Reads from disk, processes to DB
|
||||||
|
'analytics_refresh',
|
||||||
|
];
|
||||||
|
|
||||||
|
// If role specified, validate it
|
||||||
|
if (role && !validRoles.includes(role)) {
|
||||||
|
console.error(`Error: Invalid WORKER_ROLE: ${role}`);
|
||||||
|
console.error(`Valid roles: ${validRoles.join(', ')}`);
|
||||||
|
console.error('Or omit WORKER_ROLE for role-agnostic worker (any task)');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const workerId = process.env.WORKER_ID;
|
||||||
|
// Pass null for role-agnostic, or the specific role
|
||||||
|
const worker = new TaskWorker(role || null, workerId);
|
||||||
|
|
||||||
|
// Handle graceful shutdown
|
||||||
|
process.on('SIGTERM', () => {
|
||||||
|
console.log('[TaskWorker] Received SIGTERM, shutting down...');
|
||||||
|
worker.stop();
|
||||||
|
});
|
||||||
|
|
||||||
|
process.on('SIGINT', () => {
|
||||||
|
console.log('[TaskWorker] Received SIGINT, shutting down...');
|
||||||
|
worker.stop();
|
||||||
|
});
|
||||||
|
|
||||||
|
await worker.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run if this is the main module
|
||||||
|
if (require.main === module) {
|
||||||
|
main().catch((error) => {
|
||||||
|
console.error('[TaskWorker] Fatal error:', error);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export { main };
|
||||||
49
backend/src/types/user-agents.d.ts
vendored
Normal file
49
backend/src/types/user-agents.d.ts
vendored
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
/**
|
||||||
|
* Type declarations for user-agents npm package
|
||||||
|
* Per workflow-12102025.md: Used for realistic UA generation with market-share weighting
|
||||||
|
*/
|
||||||
|
|
||||||
|
declare module 'user-agents' {
|
||||||
|
interface UserAgentData {
|
||||||
|
userAgent: string;
|
||||||
|
platform: string;
|
||||||
|
screenWidth: number;
|
||||||
|
screenHeight: number;
|
||||||
|
viewportWidth: number;
|
||||||
|
viewportHeight: number;
|
||||||
|
deviceCategory: 'desktop' | 'mobile' | 'tablet';
|
||||||
|
appName: string;
|
||||||
|
connection?: {
|
||||||
|
downlink: number;
|
||||||
|
effectiveType: string;
|
||||||
|
rtt: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface UserAgentOptions {
|
||||||
|
deviceCategory?: 'desktop' | 'mobile' | 'tablet';
|
||||||
|
platform?: RegExp | string;
|
||||||
|
screenWidth?: RegExp | { min?: number; max?: number };
|
||||||
|
screenHeight?: RegExp | { min?: number; max?: number };
|
||||||
|
}
|
||||||
|
|
||||||
|
interface UserAgentInstance {
|
||||||
|
data: UserAgentData;
|
||||||
|
toString(): string;
|
||||||
|
random(): UserAgentInstance;
|
||||||
|
}
|
||||||
|
|
||||||
|
class UserAgent {
|
||||||
|
constructor(options?: UserAgentOptions | UserAgentOptions[]);
|
||||||
|
data: UserAgentData;
|
||||||
|
toString(): string;
|
||||||
|
random(): UserAgentInstance;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make it callable
|
||||||
|
interface UserAgent {
|
||||||
|
(): UserAgentInstance;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default UserAgent;
|
||||||
|
}
|
||||||
@@ -1,26 +1,29 @@
|
|||||||
/**
|
/**
|
||||||
* Local Image Storage Utility
|
* Local Image Storage Utility
|
||||||
*
|
*
|
||||||
* Downloads and stores product images to local filesystem.
|
* Downloads and stores product images to local filesystem with proper hierarchy.
|
||||||
* Replaces MinIO-based storage with simple local file storage.
|
|
||||||
*
|
*
|
||||||
* Directory structure:
|
* Directory structure:
|
||||||
* /images/products/<dispensary_id>/<product_id>.webp
|
* /images/products/<state>/<store_slug>/<brand_slug>/<product_id>/image.webp
|
||||||
* /images/products/<dispensary_id>/<product_id>-thumb.webp
|
* /images/products/<state>/<store_slug>/<brand_slug>/<product_id>/image-medium.webp
|
||||||
* /images/products/<dispensary_id>/<product_id>-medium.webp
|
* /images/products/<state>/<store_slug>/<brand_slug>/<product_id>/image-thumb.webp
|
||||||
* /images/brands/<brand_slug>.webp
|
* /images/brands/<brand_slug>/logo.webp
|
||||||
|
*
|
||||||
|
* This structure allows:
|
||||||
|
* - Easy migration to MinIO/S3 (bucket per state)
|
||||||
|
* - Browsing by state/store/brand
|
||||||
|
* - Multiple images per product (future: gallery)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import axios from 'axios';
|
import axios from 'axios';
|
||||||
import sharp from 'sharp';
|
// @ts-ignore - sharp module typing quirk
|
||||||
|
const sharp = require('sharp');
|
||||||
import * as fs from 'fs/promises';
|
import * as fs from 'fs/promises';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import { createHash } from 'crypto';
|
import { createHash } from 'crypto';
|
||||||
|
|
||||||
// Base path for image storage - configurable via env
|
// Base path for image storage - configurable via env
|
||||||
// Uses project-relative paths by default, NOT /app or other privileged paths
|
|
||||||
function getImagesBasePath(): string {
|
function getImagesBasePath(): string {
|
||||||
// Priority: IMAGES_PATH > STORAGE_BASE_PATH/images > ./storage/images
|
|
||||||
if (process.env.IMAGES_PATH) {
|
if (process.env.IMAGES_PATH) {
|
||||||
return process.env.IMAGES_PATH;
|
return process.env.IMAGES_PATH;
|
||||||
}
|
}
|
||||||
@@ -35,16 +38,28 @@ const IMAGES_BASE_PATH = getImagesBasePath();
|
|||||||
const IMAGES_PUBLIC_URL = process.env.IMAGES_PUBLIC_URL || '/images';
|
const IMAGES_PUBLIC_URL = process.env.IMAGES_PUBLIC_URL || '/images';
|
||||||
|
|
||||||
export interface LocalImageSizes {
|
export interface LocalImageSizes {
|
||||||
full: string; // URL path: /images/products/123/456.webp
|
original: string; // URL path to original image
|
||||||
medium: string; // URL path: /images/products/123/456-medium.webp
|
// Legacy compatibility - all point to original until we add image proxy
|
||||||
thumb: string; // URL path: /images/products/123/456-thumb.webp
|
full: string;
|
||||||
|
medium: string;
|
||||||
|
thumb: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface DownloadResult {
|
export interface DownloadResult {
|
||||||
success: boolean;
|
success: boolean;
|
||||||
urls?: LocalImageSizes;
|
urls?: LocalImageSizes;
|
||||||
|
localPaths?: LocalImageSizes;
|
||||||
error?: string;
|
error?: string;
|
||||||
bytesDownloaded?: number;
|
bytesDownloaded?: number;
|
||||||
|
skipped?: boolean; // True if image already exists
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ProductImageContext {
|
||||||
|
stateCode: string; // e.g., "AZ", "CA"
|
||||||
|
storeSlug: string; // e.g., "deeply-rooted"
|
||||||
|
brandSlug: string; // e.g., "high-west-farms"
|
||||||
|
productId: string; // External product ID
|
||||||
|
dispensaryId?: number; // For backwards compat
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -58,6 +73,17 @@ async function ensureDir(dirPath: string): Promise<void> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sanitize a string for use in file paths
|
||||||
|
*/
|
||||||
|
function slugify(str: string): string {
|
||||||
|
return str
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[^a-z0-9]+/g, '-')
|
||||||
|
.replace(/^-+|-+$/g, '')
|
||||||
|
.substring(0, 50) || 'unknown';
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate a short hash from a URL for deduplication
|
* Generate a short hash from a URL for deduplication
|
||||||
*/
|
*/
|
||||||
@@ -81,53 +107,30 @@ async function downloadImage(imageUrl: string): Promise<Buffer> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process and save image in multiple sizes
|
* Process and save original image (convert to webp for consistency)
|
||||||
* Returns the file paths relative to IMAGES_BASE_PATH
|
*
|
||||||
|
* We store only the original - resizing will be done on-demand via
|
||||||
|
* an image proxy service (imgproxy, thumbor, or similar) in the future.
|
||||||
*/
|
*/
|
||||||
async function processAndSaveImage(
|
async function processAndSaveImage(
|
||||||
buffer: Buffer,
|
buffer: Buffer,
|
||||||
outputDir: string,
|
outputDir: string,
|
||||||
baseFilename: string
|
baseFilename: string
|
||||||
): Promise<{ full: string; medium: string; thumb: string; totalBytes: number }> {
|
): Promise<{ original: string; totalBytes: number }> {
|
||||||
await ensureDir(outputDir);
|
await ensureDir(outputDir);
|
||||||
|
|
||||||
const fullPath = path.join(outputDir, `${baseFilename}.webp`);
|
const originalPath = path.join(outputDir, `${baseFilename}.webp`);
|
||||||
const mediumPath = path.join(outputDir, `${baseFilename}-medium.webp`);
|
|
||||||
const thumbPath = path.join(outputDir, `${baseFilename}-thumb.webp`);
|
|
||||||
|
|
||||||
// Process images in parallel
|
// Convert to webp, preserve original dimensions, high quality
|
||||||
const [fullBuffer, mediumBuffer, thumbBuffer] = await Promise.all([
|
const originalBuffer = await sharp(buffer)
|
||||||
// Full: max 1200x1200, high quality
|
.webp({ quality: 90 })
|
||||||
sharp(buffer)
|
.toBuffer();
|
||||||
.resize(1200, 1200, { fit: 'inside', withoutEnlargement: true })
|
|
||||||
.webp({ quality: 85 })
|
|
||||||
.toBuffer(),
|
|
||||||
// Medium: 600x600
|
|
||||||
sharp(buffer)
|
|
||||||
.resize(600, 600, { fit: 'inside', withoutEnlargement: true })
|
|
||||||
.webp({ quality: 80 })
|
|
||||||
.toBuffer(),
|
|
||||||
// Thumb: 200x200
|
|
||||||
sharp(buffer)
|
|
||||||
.resize(200, 200, { fit: 'inside', withoutEnlargement: true })
|
|
||||||
.webp({ quality: 75 })
|
|
||||||
.toBuffer(),
|
|
||||||
]);
|
|
||||||
|
|
||||||
// Save all sizes
|
await fs.writeFile(originalPath, originalBuffer);
|
||||||
await Promise.all([
|
|
||||||
fs.writeFile(fullPath, fullBuffer),
|
|
||||||
fs.writeFile(mediumPath, mediumBuffer),
|
|
||||||
fs.writeFile(thumbPath, thumbBuffer),
|
|
||||||
]);
|
|
||||||
|
|
||||||
const totalBytes = fullBuffer.length + mediumBuffer.length + thumbBuffer.length;
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
full: fullPath,
|
original: originalPath,
|
||||||
medium: mediumPath,
|
totalBytes: originalBuffer.length,
|
||||||
thumb: thumbPath,
|
|
||||||
totalBytes,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -135,47 +138,107 @@ async function processAndSaveImage(
|
|||||||
* Convert a file path to a public URL
|
* Convert a file path to a public URL
|
||||||
*/
|
*/
|
||||||
function pathToUrl(filePath: string): string {
|
function pathToUrl(filePath: string): string {
|
||||||
|
// Find /products/ or /brands/ in the path and extract from there
|
||||||
|
const productsMatch = filePath.match(/(\/products\/.*)/);
|
||||||
|
const brandsMatch = filePath.match(/(\/brands\/.*)/);
|
||||||
|
|
||||||
|
if (productsMatch) {
|
||||||
|
return `${IMAGES_PUBLIC_URL}${productsMatch[1]}`;
|
||||||
|
}
|
||||||
|
if (brandsMatch) {
|
||||||
|
return `${IMAGES_PUBLIC_URL}${brandsMatch[1]}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: try to replace base path (works if paths match exactly)
|
||||||
const relativePath = filePath.replace(IMAGES_BASE_PATH, '');
|
const relativePath = filePath.replace(IMAGES_BASE_PATH, '');
|
||||||
return `${IMAGES_PUBLIC_URL}${relativePath}`;
|
return `${IMAGES_PUBLIC_URL}${relativePath}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Download and store a product image locally
|
* Build the directory path for a product image
|
||||||
|
* Structure: /images/products/<state>/<store>/<brand>/<product>/
|
||||||
|
*/
|
||||||
|
function buildProductImagePath(ctx: ProductImageContext): string {
|
||||||
|
const state = slugify(ctx.stateCode || 'unknown');
|
||||||
|
const store = slugify(ctx.storeSlug || 'unknown');
|
||||||
|
const brand = slugify(ctx.brandSlug || 'unknown');
|
||||||
|
const product = slugify(ctx.productId || 'unknown');
|
||||||
|
|
||||||
|
return path.join(IMAGES_BASE_PATH, 'products', state, store, brand, product);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Download and store a product image with proper hierarchy
|
||||||
*
|
*
|
||||||
* @param imageUrl - The third-party image URL to download
|
* @param imageUrl - The third-party image URL to download
|
||||||
* @param dispensaryId - The dispensary ID (for directory organization)
|
* @param ctx - Product context (state, store, brand, product)
|
||||||
* @param productId - The product ID or external ID (for filename)
|
* @param options - Download options
|
||||||
* @returns Download result with local URLs
|
* @returns Download result with local URLs
|
||||||
*/
|
*/
|
||||||
export async function downloadProductImage(
|
export async function downloadProductImage(
|
||||||
imageUrl: string,
|
imageUrl: string,
|
||||||
dispensaryId: number,
|
ctx: ProductImageContext,
|
||||||
productId: string | number
|
options: { skipIfExists?: boolean } = {}
|
||||||
): Promise<DownloadResult> {
|
): Promise<DownloadResult> {
|
||||||
|
const { skipIfExists = true } = options;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (!imageUrl) {
|
if (!imageUrl) {
|
||||||
return { success: false, error: 'No image URL provided' };
|
return { success: false, error: 'No image URL provided' };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const outputDir = buildProductImagePath(ctx);
|
||||||
|
const urlHash = hashUrl(imageUrl);
|
||||||
|
const baseFilename = `image-${urlHash}`;
|
||||||
|
|
||||||
|
// Check if image already exists
|
||||||
|
if (skipIfExists) {
|
||||||
|
const existingPath = path.join(outputDir, `${baseFilename}.webp`);
|
||||||
|
try {
|
||||||
|
await fs.access(existingPath);
|
||||||
|
// Image exists, return existing URL
|
||||||
|
const url = pathToUrl(existingPath);
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
skipped: true,
|
||||||
|
urls: {
|
||||||
|
original: url,
|
||||||
|
full: url,
|
||||||
|
medium: url,
|
||||||
|
thumb: url,
|
||||||
|
},
|
||||||
|
localPaths: {
|
||||||
|
original: existingPath,
|
||||||
|
full: existingPath,
|
||||||
|
medium: existingPath,
|
||||||
|
thumb: existingPath,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
// Image doesn't exist, continue to download
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Download the image
|
// Download the image
|
||||||
const buffer = await downloadImage(imageUrl);
|
const buffer = await downloadImage(imageUrl);
|
||||||
|
|
||||||
// Organize by dispensary ID
|
// Process and save (original only)
|
||||||
const outputDir = path.join(IMAGES_BASE_PATH, 'products', String(dispensaryId));
|
|
||||||
|
|
||||||
// Use product ID + URL hash for uniqueness
|
|
||||||
const urlHash = hashUrl(imageUrl);
|
|
||||||
const baseFilename = `${productId}-${urlHash}`;
|
|
||||||
|
|
||||||
// Process and save
|
|
||||||
const result = await processAndSaveImage(buffer, outputDir, baseFilename);
|
const result = await processAndSaveImage(buffer, outputDir, baseFilename);
|
||||||
|
const url = pathToUrl(result.original);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
urls: {
|
urls: {
|
||||||
full: pathToUrl(result.full),
|
original: url,
|
||||||
medium: pathToUrl(result.medium),
|
full: url,
|
||||||
thumb: pathToUrl(result.thumb),
|
medium: url,
|
||||||
|
thumb: url,
|
||||||
|
},
|
||||||
|
localPaths: {
|
||||||
|
original: result.original,
|
||||||
|
full: result.original,
|
||||||
|
medium: result.original,
|
||||||
|
thumb: result.original,
|
||||||
},
|
},
|
||||||
bytesDownloaded: result.totalBytes,
|
bytesDownloaded: result.totalBytes,
|
||||||
};
|
};
|
||||||
@@ -188,33 +251,71 @@ export async function downloadProductImage(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Download and store a brand logo locally
|
* Legacy function - backwards compatible with old signature
|
||||||
|
* Maps to new hierarchy using dispensary_id as store identifier
|
||||||
|
*/
|
||||||
|
export async function downloadProductImageLegacy(
|
||||||
|
imageUrl: string,
|
||||||
|
dispensaryId: number,
|
||||||
|
productId: string | number
|
||||||
|
): Promise<DownloadResult> {
|
||||||
|
return downloadProductImage(imageUrl, {
|
||||||
|
stateCode: 'unknown',
|
||||||
|
storeSlug: `store-${dispensaryId}`,
|
||||||
|
brandSlug: 'unknown',
|
||||||
|
productId: String(productId),
|
||||||
|
dispensaryId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Download and store a brand logo
|
||||||
*
|
*
|
||||||
* @param logoUrl - The brand logo URL
|
* @param logoUrl - The brand logo URL
|
||||||
* @param brandId - The brand ID or slug
|
* @param brandSlug - The brand slug/ID
|
||||||
* @returns Download result with local URL
|
* @returns Download result with local URL
|
||||||
*/
|
*/
|
||||||
export async function downloadBrandLogo(
|
export async function downloadBrandLogo(
|
||||||
logoUrl: string,
|
logoUrl: string,
|
||||||
brandId: string
|
brandSlug: string,
|
||||||
|
options: { skipIfExists?: boolean } = {}
|
||||||
): Promise<DownloadResult> {
|
): Promise<DownloadResult> {
|
||||||
|
const { skipIfExists = true } = options;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (!logoUrl) {
|
if (!logoUrl) {
|
||||||
return { success: false, error: 'No logo URL provided' };
|
return { success: false, error: 'No logo URL provided' };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const safeBrandSlug = slugify(brandSlug);
|
||||||
|
const outputDir = path.join(IMAGES_BASE_PATH, 'brands', safeBrandSlug);
|
||||||
|
const urlHash = hashUrl(logoUrl);
|
||||||
|
const baseFilename = `logo-${urlHash}`;
|
||||||
|
|
||||||
|
// Check if logo already exists
|
||||||
|
if (skipIfExists) {
|
||||||
|
const existingPath = path.join(outputDir, `${baseFilename}.webp`);
|
||||||
|
try {
|
||||||
|
await fs.access(existingPath);
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
skipped: true,
|
||||||
|
urls: {
|
||||||
|
original: pathToUrl(existingPath),
|
||||||
|
full: pathToUrl(existingPath),
|
||||||
|
medium: pathToUrl(existingPath),
|
||||||
|
thumb: pathToUrl(existingPath),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
} catch {
|
||||||
|
// Logo doesn't exist, continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Download the image
|
// Download the image
|
||||||
const buffer = await downloadImage(logoUrl);
|
const buffer = await downloadImage(logoUrl);
|
||||||
|
|
||||||
// Brand logos go in /images/brands/
|
// Brand logos in their own directory
|
||||||
const outputDir = path.join(IMAGES_BASE_PATH, 'brands');
|
|
||||||
|
|
||||||
// Sanitize brand ID for filename
|
|
||||||
const safeBrandId = brandId.replace(/[^a-zA-Z0-9-_]/g, '_');
|
|
||||||
const urlHash = hashUrl(logoUrl);
|
|
||||||
const baseFilename = `${safeBrandId}-${urlHash}`;
|
|
||||||
|
|
||||||
// Process and save (single size for logos)
|
|
||||||
await ensureDir(outputDir);
|
await ensureDir(outputDir);
|
||||||
const logoPath = path.join(outputDir, `${baseFilename}.webp`);
|
const logoPath = path.join(outputDir, `${baseFilename}.webp`);
|
||||||
|
|
||||||
@@ -228,6 +329,7 @@ export async function downloadBrandLogo(
|
|||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
urls: {
|
urls: {
|
||||||
|
original: pathToUrl(logoPath),
|
||||||
full: pathToUrl(logoPath),
|
full: pathToUrl(logoPath),
|
||||||
medium: pathToUrl(logoPath),
|
medium: pathToUrl(logoPath),
|
||||||
thumb: pathToUrl(logoPath),
|
thumb: pathToUrl(logoPath),
|
||||||
@@ -243,20 +345,16 @@ export async function downloadBrandLogo(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if a local image already exists
|
* Check if a product image already exists
|
||||||
*/
|
*/
|
||||||
export async function imageExists(
|
export async function productImageExists(
|
||||||
dispensaryId: number,
|
ctx: ProductImageContext,
|
||||||
productId: string | number,
|
|
||||||
imageUrl: string
|
imageUrl: string
|
||||||
): Promise<boolean> {
|
): Promise<boolean> {
|
||||||
|
const outputDir = buildProductImagePath(ctx);
|
||||||
const urlHash = hashUrl(imageUrl);
|
const urlHash = hashUrl(imageUrl);
|
||||||
const imagePath = path.join(
|
const imagePath = path.join(outputDir, `image-${urlHash}.webp`);
|
||||||
IMAGES_BASE_PATH,
|
|
||||||
'products',
|
|
||||||
String(dispensaryId),
|
|
||||||
`${productId}-${urlHash}.webp`
|
|
||||||
);
|
|
||||||
try {
|
try {
|
||||||
await fs.access(imagePath);
|
await fs.access(imagePath);
|
||||||
return true;
|
return true;
|
||||||
@@ -266,24 +364,27 @@ export async function imageExists(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete a product's local images
|
* Get the local image URL for a product (if exists)
|
||||||
*/
|
*/
|
||||||
export async function deleteProductImages(
|
export async function getProductImageUrl(
|
||||||
dispensaryId: number,
|
ctx: ProductImageContext,
|
||||||
productId: string | number,
|
imageUrl: string
|
||||||
imageUrl?: string
|
): Promise<LocalImageSizes | null> {
|
||||||
): Promise<void> {
|
const outputDir = buildProductImagePath(ctx);
|
||||||
const productDir = path.join(IMAGES_BASE_PATH, 'products', String(dispensaryId));
|
const urlHash = hashUrl(imageUrl);
|
||||||
const prefix = imageUrl
|
const imagePath = path.join(outputDir, `image-${urlHash}.webp`);
|
||||||
? `${productId}-${hashUrl(imageUrl)}`
|
|
||||||
: String(productId);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const files = await fs.readdir(productDir);
|
await fs.access(imagePath);
|
||||||
const toDelete = files.filter(f => f.startsWith(prefix));
|
const url = pathToUrl(imagePath);
|
||||||
await Promise.all(toDelete.map(f => fs.unlink(path.join(productDir, f))));
|
return {
|
||||||
|
original: url,
|
||||||
|
full: url,
|
||||||
|
medium: url,
|
||||||
|
thumb: url,
|
||||||
|
};
|
||||||
} catch {
|
} catch {
|
||||||
// Directory might not exist, that's fine
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -296,19 +397,17 @@ export function isImageStorageReady(): boolean {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize the image storage directories
|
* Initialize the image storage directories
|
||||||
* Does NOT throw on failure - logs warning and continues
|
|
||||||
*/
|
*/
|
||||||
export async function initializeImageStorage(): Promise<void> {
|
export async function initializeImageStorage(): Promise<void> {
|
||||||
try {
|
try {
|
||||||
await ensureDir(path.join(IMAGES_BASE_PATH, 'products'));
|
await ensureDir(path.join(IMAGES_BASE_PATH, 'products'));
|
||||||
await ensureDir(path.join(IMAGES_BASE_PATH, 'brands'));
|
await ensureDir(path.join(IMAGES_BASE_PATH, 'brands'));
|
||||||
console.log(`✅ Image storage initialized at ${IMAGES_BASE_PATH}`);
|
console.log(`[ImageStorage] Initialized at ${IMAGES_BASE_PATH}`);
|
||||||
imageStorageReady = true;
|
imageStorageReady = true;
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.warn(`⚠️ WARNING: Could not initialize image storage at ${IMAGES_BASE_PATH}: ${error.message}`);
|
console.warn(`[ImageStorage] WARNING: Could not initialize at ${IMAGES_BASE_PATH}: ${error.message}`);
|
||||||
console.warn(' Image upload/processing is disabled. Server will continue without image features.');
|
console.warn(' Image features disabled. Server will continue without image downloads.');
|
||||||
imageStorageReady = false;
|
imageStorageReady = false;
|
||||||
// Do NOT throw - server should still start
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -316,34 +415,43 @@ export async function initializeImageStorage(): Promise<void> {
|
|||||||
* Get storage stats
|
* Get storage stats
|
||||||
*/
|
*/
|
||||||
export async function getStorageStats(): Promise<{
|
export async function getStorageStats(): Promise<{
|
||||||
productsDir: string;
|
basePath: string;
|
||||||
brandsDir: string;
|
|
||||||
productCount: number;
|
productCount: number;
|
||||||
brandCount: number;
|
brandCount: number;
|
||||||
|
totalSizeBytes: number;
|
||||||
}> {
|
}> {
|
||||||
const productsDir = path.join(IMAGES_BASE_PATH, 'products');
|
|
||||||
const brandsDir = path.join(IMAGES_BASE_PATH, 'brands');
|
|
||||||
|
|
||||||
let productCount = 0;
|
let productCount = 0;
|
||||||
let brandCount = 0;
|
let brandCount = 0;
|
||||||
|
let totalSizeBytes = 0;
|
||||||
|
|
||||||
|
async function countDir(dirPath: string): Promise<{ count: number; size: number }> {
|
||||||
|
let count = 0;
|
||||||
|
let size = 0;
|
||||||
try {
|
try {
|
||||||
const productDirs = await fs.readdir(productsDir);
|
const entries = await fs.readdir(dirPath, { withFileTypes: true });
|
||||||
for (const dir of productDirs) {
|
for (const entry of entries) {
|
||||||
const files = await fs.readdir(path.join(productsDir, dir));
|
const fullPath = path.join(dirPath, entry.name);
|
||||||
productCount += files.filter(f => f.endsWith('.webp') && !f.includes('-')).length;
|
if (entry.isDirectory()) {
|
||||||
|
const sub = await countDir(fullPath);
|
||||||
|
count += sub.count;
|
||||||
|
size += sub.size;
|
||||||
|
} else if (entry.name.endsWith('.webp') && !entry.name.includes('-')) {
|
||||||
|
count++;
|
||||||
|
const stat = await fs.stat(fullPath);
|
||||||
|
size += stat.size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch { /* ignore */ }
|
} catch { /* ignore */ }
|
||||||
|
return { count, size };
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
const products = await countDir(path.join(IMAGES_BASE_PATH, 'products'));
|
||||||
const brandFiles = await fs.readdir(brandsDir);
|
const brands = await countDir(path.join(IMAGES_BASE_PATH, 'brands'));
|
||||||
brandCount = brandFiles.filter(f => f.endsWith('.webp')).length;
|
|
||||||
} catch { /* ignore */ }
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
productsDir,
|
basePath: IMAGES_BASE_PATH,
|
||||||
brandsDir,
|
productCount: products.count,
|
||||||
productCount,
|
brandCount: brands.count,
|
||||||
brandCount,
|
totalSizeBytes: products.size + brands.size,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
406
backend/src/utils/payload-storage.ts
Normal file
406
backend/src/utils/payload-storage.ts
Normal file
@@ -0,0 +1,406 @@
|
|||||||
|
/**
|
||||||
|
* Payload Storage Utility
|
||||||
|
*
|
||||||
|
* Per TASK_WORKFLOW_2024-12-10.md: Store raw GraphQL payloads for historical analysis.
|
||||||
|
*
|
||||||
|
* Design Pattern: Metadata/Payload Separation
|
||||||
|
* - Metadata in PostgreSQL (raw_crawl_payloads table): Small, indexed, queryable
|
||||||
|
* - Payload on filesystem: Gzipped JSON at storage_path
|
||||||
|
*
|
||||||
|
* Storage structure:
|
||||||
|
* /storage/payloads/{year}/{month}/{day}/store_{dispensary_id}_{timestamp}.json.gz
|
||||||
|
*
|
||||||
|
* Benefits:
|
||||||
|
* - Compare any two crawls to see what changed
|
||||||
|
* - Replay/re-normalize historical data if logic changes
|
||||||
|
* - Debug issues by seeing exactly what the API returned
|
||||||
|
* - DB stays small, backups stay fast
|
||||||
|
* - ~90% compression (1.5MB -> 150KB per crawl)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
import * as zlib from 'zlib';
|
||||||
|
import { promisify } from 'util';
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import * as crypto from 'crypto';
|
||||||
|
|
||||||
|
const gzip = promisify(zlib.gzip);
|
||||||
|
const gunzip = promisify(zlib.gunzip);
|
||||||
|
|
||||||
|
// Base path for payload storage (matches image storage pattern)
|
||||||
|
const PAYLOAD_BASE_PATH = process.env.PAYLOAD_STORAGE_PATH || './storage/payloads';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result from saving a payload
|
||||||
|
*/
|
||||||
|
export interface SavePayloadResult {
|
||||||
|
id: number;
|
||||||
|
storagePath: string;
|
||||||
|
sizeBytes: number;
|
||||||
|
sizeBytesRaw: number;
|
||||||
|
checksum: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result from loading a payload
|
||||||
|
*/
|
||||||
|
export interface LoadPayloadResult {
|
||||||
|
payload: any;
|
||||||
|
metadata: {
|
||||||
|
id: number;
|
||||||
|
dispensaryId: number;
|
||||||
|
crawlRunId: number | null;
|
||||||
|
productCount: number;
|
||||||
|
fetchedAt: Date;
|
||||||
|
storagePath: string;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate storage path for a payload
|
||||||
|
*
|
||||||
|
* Format: /storage/payloads/{year}/{month}/{day}/store_{dispensary_id}_{timestamp}.json.gz
|
||||||
|
*/
|
||||||
|
function generateStoragePath(dispensaryId: number, timestamp: Date): string {
|
||||||
|
const year = timestamp.getFullYear();
|
||||||
|
const month = String(timestamp.getMonth() + 1).padStart(2, '0');
|
||||||
|
const day = String(timestamp.getDate()).padStart(2, '0');
|
||||||
|
const ts = timestamp.getTime();
|
||||||
|
|
||||||
|
return path.join(
|
||||||
|
PAYLOAD_BASE_PATH,
|
||||||
|
String(year),
|
||||||
|
month,
|
||||||
|
day,
|
||||||
|
`store_${dispensaryId}_${ts}.json.gz`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure directory exists for a file path
|
||||||
|
*/
|
||||||
|
async function ensureDir(filePath: string): Promise<void> {
|
||||||
|
const dir = path.dirname(filePath);
|
||||||
|
await fs.promises.mkdir(dir, { recursive: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate SHA256 checksum of data
|
||||||
|
*/
|
||||||
|
function calculateChecksum(data: Buffer): string {
|
||||||
|
return crypto.createHash('sha256').update(data).digest('hex');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save a raw crawl payload to filesystem and record metadata in DB
|
||||||
|
*
|
||||||
|
* @param pool - Database connection pool
|
||||||
|
* @param dispensaryId - ID of the dispensary
|
||||||
|
* @param payload - Raw JSON payload from GraphQL
|
||||||
|
* @param crawlRunId - Optional crawl_run ID for linking
|
||||||
|
* @param productCount - Number of products in payload
|
||||||
|
* @returns SavePayloadResult with file info and DB record ID
|
||||||
|
*/
|
||||||
|
export async function saveRawPayload(
|
||||||
|
pool: Pool,
|
||||||
|
dispensaryId: number,
|
||||||
|
payload: any,
|
||||||
|
crawlRunId: number | null = null,
|
||||||
|
productCount: number = 0
|
||||||
|
): Promise<SavePayloadResult> {
|
||||||
|
const timestamp = new Date();
|
||||||
|
const storagePath = generateStoragePath(dispensaryId, timestamp);
|
||||||
|
|
||||||
|
// Serialize and compress
|
||||||
|
const jsonStr = JSON.stringify(payload);
|
||||||
|
const rawSize = Buffer.byteLength(jsonStr, 'utf8');
|
||||||
|
const compressed = await gzip(Buffer.from(jsonStr, 'utf8'));
|
||||||
|
const compressedSize = compressed.length;
|
||||||
|
const checksum = calculateChecksum(compressed);
|
||||||
|
|
||||||
|
// Write to filesystem
|
||||||
|
await ensureDir(storagePath);
|
||||||
|
await fs.promises.writeFile(storagePath, compressed);
|
||||||
|
|
||||||
|
// Record metadata in DB
|
||||||
|
const result = await pool.query(`
|
||||||
|
INSERT INTO raw_crawl_payloads (
|
||||||
|
crawl_run_id,
|
||||||
|
dispensary_id,
|
||||||
|
storage_path,
|
||||||
|
product_count,
|
||||||
|
size_bytes,
|
||||||
|
size_bytes_raw,
|
||||||
|
fetched_at,
|
||||||
|
checksum_sha256
|
||||||
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||||
|
RETURNING id
|
||||||
|
`, [
|
||||||
|
crawlRunId,
|
||||||
|
dispensaryId,
|
||||||
|
storagePath,
|
||||||
|
productCount,
|
||||||
|
compressedSize,
|
||||||
|
rawSize,
|
||||||
|
timestamp,
|
||||||
|
checksum
|
||||||
|
]);
|
||||||
|
|
||||||
|
console.log(`[PayloadStorage] Saved payload for store ${dispensaryId}: ${storagePath} (${(compressedSize / 1024).toFixed(1)}KB compressed, ${(rawSize / 1024).toFixed(1)}KB raw)`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
id: result.rows[0].id,
|
||||||
|
storagePath,
|
||||||
|
sizeBytes: compressedSize,
|
||||||
|
sizeBytesRaw: rawSize,
|
||||||
|
checksum
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load a raw payload from filesystem by metadata ID
|
||||||
|
*
|
||||||
|
* @param pool - Database connection pool
|
||||||
|
* @param payloadId - ID from raw_crawl_payloads table
|
||||||
|
* @returns LoadPayloadResult with parsed payload and metadata
|
||||||
|
*/
|
||||||
|
export async function loadRawPayloadById(
|
||||||
|
pool: Pool,
|
||||||
|
payloadId: number
|
||||||
|
): Promise<LoadPayloadResult | null> {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT id, dispensary_id, crawl_run_id, storage_path, product_count, fetched_at
|
||||||
|
FROM raw_crawl_payloads
|
||||||
|
WHERE id = $1
|
||||||
|
`, [payloadId]);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const row = result.rows[0];
|
||||||
|
const payload = await loadPayloadFromPath(row.storage_path);
|
||||||
|
|
||||||
|
return {
|
||||||
|
payload,
|
||||||
|
metadata: {
|
||||||
|
id: row.id,
|
||||||
|
dispensaryId: row.dispensary_id,
|
||||||
|
crawlRunId: row.crawl_run_id,
|
||||||
|
productCount: row.product_count,
|
||||||
|
fetchedAt: row.fetched_at,
|
||||||
|
storagePath: row.storage_path
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load a raw payload directly from filesystem path
|
||||||
|
*
|
||||||
|
* @param storagePath - Path to gzipped JSON file
|
||||||
|
* @returns Parsed JSON payload
|
||||||
|
*/
|
||||||
|
export async function loadPayloadFromPath(storagePath: string): Promise<any> {
|
||||||
|
const compressed = await fs.promises.readFile(storagePath);
|
||||||
|
const decompressed = await gunzip(compressed);
|
||||||
|
return JSON.parse(decompressed.toString('utf8'));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the latest payload for a dispensary
|
||||||
|
*
|
||||||
|
* @param pool - Database connection pool
|
||||||
|
* @param dispensaryId - ID of the dispensary
|
||||||
|
* @returns LoadPayloadResult or null if none exists
|
||||||
|
*/
|
||||||
|
export async function getLatestPayload(
|
||||||
|
pool: Pool,
|
||||||
|
dispensaryId: number
|
||||||
|
): Promise<LoadPayloadResult | null> {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT id, dispensary_id, crawl_run_id, storage_path, product_count, fetched_at
|
||||||
|
FROM raw_crawl_payloads
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
ORDER BY fetched_at DESC
|
||||||
|
LIMIT 1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const row = result.rows[0];
|
||||||
|
const payload = await loadPayloadFromPath(row.storage_path);
|
||||||
|
|
||||||
|
return {
|
||||||
|
payload,
|
||||||
|
metadata: {
|
||||||
|
id: row.id,
|
||||||
|
dispensaryId: row.dispensary_id,
|
||||||
|
crawlRunId: row.crawl_run_id,
|
||||||
|
productCount: row.product_count,
|
||||||
|
fetchedAt: row.fetched_at,
|
||||||
|
storagePath: row.storage_path
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get two payloads for comparison (latest and previous, or by IDs)
|
||||||
|
*
|
||||||
|
* @param pool - Database connection pool
|
||||||
|
* @param dispensaryId - ID of the dispensary
|
||||||
|
* @param limit - Number of recent payloads to retrieve (default 2)
|
||||||
|
* @returns Array of LoadPayloadResult, most recent first
|
||||||
|
*/
|
||||||
|
export async function getRecentPayloads(
|
||||||
|
pool: Pool,
|
||||||
|
dispensaryId: number,
|
||||||
|
limit: number = 2
|
||||||
|
): Promise<LoadPayloadResult[]> {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT id, dispensary_id, crawl_run_id, storage_path, product_count, fetched_at
|
||||||
|
FROM raw_crawl_payloads
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
ORDER BY fetched_at DESC
|
||||||
|
LIMIT $2
|
||||||
|
`, [dispensaryId, limit]);
|
||||||
|
|
||||||
|
const payloads: LoadPayloadResult[] = [];
|
||||||
|
|
||||||
|
for (const row of result.rows) {
|
||||||
|
const payload = await loadPayloadFromPath(row.storage_path);
|
||||||
|
payloads.push({
|
||||||
|
payload,
|
||||||
|
metadata: {
|
||||||
|
id: row.id,
|
||||||
|
dispensaryId: row.dispensary_id,
|
||||||
|
crawlRunId: row.crawl_run_id,
|
||||||
|
productCount: row.product_count,
|
||||||
|
fetchedAt: row.fetched_at,
|
||||||
|
storagePath: row.storage_path
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return payloads;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List payload metadata without loading files (for browsing/pagination)
|
||||||
|
*
|
||||||
|
* @param pool - Database connection pool
|
||||||
|
* @param options - Query options
|
||||||
|
* @returns Array of metadata rows
|
||||||
|
*/
|
||||||
|
export async function listPayloadMetadata(
|
||||||
|
pool: Pool,
|
||||||
|
options: {
|
||||||
|
dispensaryId?: number;
|
||||||
|
startDate?: Date;
|
||||||
|
endDate?: Date;
|
||||||
|
limit?: number;
|
||||||
|
offset?: number;
|
||||||
|
} = {}
|
||||||
|
): Promise<Array<{
|
||||||
|
id: number;
|
||||||
|
dispensaryId: number;
|
||||||
|
crawlRunId: number | null;
|
||||||
|
storagePath: string;
|
||||||
|
productCount: number;
|
||||||
|
sizeBytes: number;
|
||||||
|
sizeBytesRaw: number;
|
||||||
|
fetchedAt: Date;
|
||||||
|
}>> {
|
||||||
|
const conditions: string[] = [];
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (options.dispensaryId) {
|
||||||
|
conditions.push(`dispensary_id = $${paramIndex++}`);
|
||||||
|
params.push(options.dispensaryId);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.startDate) {
|
||||||
|
conditions.push(`fetched_at >= $${paramIndex++}`);
|
||||||
|
params.push(options.startDate);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (options.endDate) {
|
||||||
|
conditions.push(`fetched_at <= $${paramIndex++}`);
|
||||||
|
params.push(options.endDate);
|
||||||
|
}
|
||||||
|
|
||||||
|
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
||||||
|
const limit = options.limit || 50;
|
||||||
|
const offset = options.offset || 0;
|
||||||
|
|
||||||
|
params.push(limit, offset);
|
||||||
|
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
dispensary_id,
|
||||||
|
crawl_run_id,
|
||||||
|
storage_path,
|
||||||
|
product_count,
|
||||||
|
size_bytes,
|
||||||
|
size_bytes_raw,
|
||||||
|
fetched_at
|
||||||
|
FROM raw_crawl_payloads
|
||||||
|
${whereClause}
|
||||||
|
ORDER BY fetched_at DESC
|
||||||
|
LIMIT $${paramIndex++} OFFSET $${paramIndex}
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
return result.rows.map(row => ({
|
||||||
|
id: row.id,
|
||||||
|
dispensaryId: row.dispensary_id,
|
||||||
|
crawlRunId: row.crawl_run_id,
|
||||||
|
storagePath: row.storage_path,
|
||||||
|
productCount: row.product_count,
|
||||||
|
sizeBytes: row.size_bytes,
|
||||||
|
sizeBytesRaw: row.size_bytes_raw,
|
||||||
|
fetchedAt: row.fetched_at
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete old payloads (for retention policy)
|
||||||
|
*
|
||||||
|
* @param pool - Database connection pool
|
||||||
|
* @param olderThan - Delete payloads older than this date
|
||||||
|
* @returns Number of payloads deleted
|
||||||
|
*/
|
||||||
|
export async function deleteOldPayloads(
|
||||||
|
pool: Pool,
|
||||||
|
olderThan: Date
|
||||||
|
): Promise<number> {
|
||||||
|
// Get paths first
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT id, storage_path FROM raw_crawl_payloads
|
||||||
|
WHERE fetched_at < $1
|
||||||
|
`, [olderThan]);
|
||||||
|
|
||||||
|
// Delete files
|
||||||
|
for (const row of result.rows) {
|
||||||
|
try {
|
||||||
|
await fs.promises.unlink(row.storage_path);
|
||||||
|
} catch (err: any) {
|
||||||
|
if (err.code !== 'ENOENT') {
|
||||||
|
console.warn(`[PayloadStorage] Failed to delete ${row.storage_path}: ${err.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete DB records
|
||||||
|
await pool.query(`
|
||||||
|
DELETE FROM raw_crawl_payloads
|
||||||
|
WHERE fetched_at < $1
|
||||||
|
`, [olderThan]);
|
||||||
|
|
||||||
|
console.log(`[PayloadStorage] Deleted ${result.rows.length} payloads older than ${olderThan.toISOString()}`);
|
||||||
|
|
||||||
|
return result.rows.length;
|
||||||
|
}
|
||||||
@@ -1,13 +1,13 @@
|
|||||||
# Build stage
|
# Build stage
|
||||||
FROM node:20-slim AS builder
|
FROM code.cannabrands.app/creationshop/node:20-slim AS builder
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Copy package files
|
# Copy package files
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
|
|
||||||
# Install dependencies
|
# Install dependencies (npm install is more forgiving than npm ci)
|
||||||
RUN npm ci
|
RUN npm install
|
||||||
|
|
||||||
# Copy source files
|
# Copy source files
|
||||||
COPY . .
|
COPY . .
|
||||||
@@ -20,7 +20,7 @@ COPY . .
|
|||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# Production stage
|
# Production stage
|
||||||
FROM nginx:alpine
|
FROM code.cannabrands.app/creationshop/nginx:alpine
|
||||||
|
|
||||||
# Copy built assets from builder stage
|
# Copy built assets from builder stage
|
||||||
COPY --from=builder /app/dist /usr/share/nginx/html
|
COPY --from=builder /app/dist /usr/share/nginx/html
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user