Compare commits
167 Commits
feature/ca
...
feat/task-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6490df9faf | ||
|
|
a077f81c65 | ||
|
|
6bcadd9e71 | ||
|
|
a77bf8611a | ||
|
|
33feca3138 | ||
|
|
7d85a97b63 | ||
|
|
ce081effd4 | ||
|
|
2ed088b4d8 | ||
|
|
d3c49fa246 | ||
|
|
52cb5014fd | ||
|
|
50654be910 | ||
|
|
cdab71a1ee | ||
|
|
a35976b9e9 | ||
|
|
c68210c485 | ||
|
|
f2864bd2ad | ||
|
|
eca9e85242 | ||
|
|
3f958fbff3 | ||
|
|
c84ef0396b | ||
|
|
e1c67dcee5 | ||
|
|
34c8a8cc67 | ||
|
|
6cd1f55119 | ||
|
|
e918234928 | ||
|
|
888a608485 | ||
|
|
b5c3b05246 | ||
|
|
fdce5e0302 | ||
|
|
4679b245de | ||
|
|
a837070f54 | ||
|
|
5a929e9803 | ||
|
|
52b0fad410 | ||
|
|
9944031eea | ||
|
|
2babaa7136 | ||
|
|
90567511dd | ||
|
|
beb16ad0cb | ||
|
|
fc7fc5ea85 | ||
|
|
ab8956b14b | ||
|
|
1d9c90641f | ||
|
|
6126b907f2 | ||
|
|
cc93d2d483 | ||
|
|
7642c17ec0 | ||
|
|
cb60dcf352 | ||
|
|
5ffe05d519 | ||
|
|
8e2f07c941 | ||
|
|
0b6e615075 | ||
|
|
be251c6fb3 | ||
|
|
efb1e89e33 | ||
|
|
529c447413 | ||
|
|
1eaf95c06b | ||
|
|
138ed17d8b | ||
|
|
a880c41d89 | ||
|
|
2a9ae61dce | ||
|
|
1f21911fa1 | ||
|
|
6f0a58f5d2 | ||
|
|
8206dce821 | ||
|
|
ced1afaa8a | ||
|
|
d6c602c567 | ||
|
|
a252a7fefd | ||
|
|
83b06c21cc | ||
|
|
f5214da54c | ||
|
|
e3d4dd0127 | ||
|
|
d0ee0d72f5 | ||
|
|
521f0550cd | ||
|
|
8a09691e91 | ||
|
|
459ad7d9c9 | ||
|
|
d102d27731 | ||
|
|
01810c40a1 | ||
|
|
b7d33e1cbf | ||
|
|
5b34b5a78c | ||
|
|
c091d2316b | ||
|
|
e8862b8a8b | ||
|
|
1b46ab699d | ||
|
|
ac1995f63f | ||
|
|
de93669652 | ||
|
|
dffc124920 | ||
|
|
932ceb0287 | ||
|
|
824d48fd85 | ||
|
|
47fdab0382 | ||
|
|
ed7ddc6375 | ||
|
|
cf06f4a8c0 | ||
|
|
a2fa21f65c | ||
|
|
61e915968f | ||
|
|
4949b22457 | ||
|
|
1fb0eb94c2 | ||
|
|
9aefb554bc | ||
|
|
a4338669a9 | ||
|
|
1fa9ea496c | ||
|
|
31756a2233 | ||
|
|
166583621b | ||
|
|
ca952c4674 | ||
|
|
4054778b6c | ||
|
|
56a5f00015 | ||
|
|
a96d50c481 | ||
|
|
4806212f46 | ||
|
|
2486f3c6b2 | ||
|
|
f25bebf6ee | ||
|
|
22dad6d0fc | ||
|
|
03eab66d35 | ||
|
|
97b1ab23d8 | ||
|
|
9fff0ba430 | ||
|
|
7d3e91b2e6 | ||
|
|
74957a9ec5 | ||
|
|
2d035c46cf | ||
|
|
53445fe72a | ||
|
|
37cc8956c5 | ||
|
|
197c82f921 | ||
|
|
2c52493a9c | ||
|
|
2ee2ba6b8c | ||
|
|
bafcf1694a | ||
|
|
95792aab15 | ||
|
|
38ae2c3a3e | ||
|
|
249d3c1b7f | ||
|
|
9647f94f89 | ||
|
|
afc288d2cf | ||
|
|
df01ce6aad | ||
|
|
aea93bc96b | ||
|
|
4e84f30f8b | ||
|
|
b20a0a4fa5 | ||
|
|
6eb1babc86 | ||
|
|
9a9c2f76a2 | ||
|
|
56cc171287 | ||
|
|
0295637ed6 | ||
|
|
9c6dd37316 | ||
|
|
524d13209a | ||
|
|
9199db3927 | ||
|
|
a0652c7c73 | ||
|
|
89c262ee20 | ||
|
|
7f9cf559cf | ||
|
|
bbe039c868 | ||
|
|
4e5c09a2a5 | ||
|
|
7f65598332 | ||
|
|
75315ed91e | ||
|
|
7fe7d17b43 | ||
|
|
7e517b5801 | ||
|
|
38ba9021d1 | ||
|
|
ddebad48d3 | ||
|
|
1cebf2e296 | ||
|
|
1d6e67d837 | ||
|
|
cfb4b6e4ce | ||
|
|
f418c403d6 | ||
|
|
be4221af46 | ||
|
|
ca07606b05 | ||
|
|
baf1bf2eb7 | ||
|
|
4ef3a8d72b | ||
|
|
09dd756eff | ||
|
|
ec8ef6210c | ||
|
|
a9b7a4d7a9 | ||
|
|
5119d5ccf9 | ||
|
|
91efd1d03d | ||
|
|
aa776226b0 | ||
|
|
e9435150e9 | ||
|
|
d399b966e6 | ||
|
|
f5f0e25384 | ||
|
|
04de33e5f7 | ||
|
|
37dfea25e1 | ||
|
|
e2166bc25f | ||
|
|
b5e8f039bf | ||
|
|
346e6d1cd8 | ||
|
|
be434d25e3 | ||
|
|
ecc201e9d4 | ||
|
|
67bfdf47a5 | ||
|
|
3fa22a6ba1 | ||
|
|
9f898f68db | ||
|
|
f78b05360a | ||
|
|
2f483b3084 | ||
|
|
9711d594db | ||
|
|
39aebfcb82 | ||
|
|
5415cac2f3 | ||
|
|
70d2364a6f |
@@ -1,38 +1,76 @@
|
|||||||
when:
|
|
||||||
- event: [push, pull_request]
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
# Build checks
|
# ===========================================
|
||||||
|
# PR VALIDATION: Parallel type checks (PRs only)
|
||||||
|
# ===========================================
|
||||||
typecheck-backend:
|
typecheck-backend:
|
||||||
image: node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- cd backend
|
- cd backend
|
||||||
- npm ci
|
- npm ci --prefer-offline
|
||||||
- npx tsc --noEmit || true
|
- npx tsc --noEmit
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
build-cannaiq:
|
typecheck-cannaiq:
|
||||||
image: node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- cd cannaiq
|
- cd cannaiq
|
||||||
- npm ci
|
- npm ci --prefer-offline
|
||||||
- npx tsc --noEmit
|
- npx tsc --noEmit
|
||||||
- npm run build
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
build-findadispo:
|
typecheck-findadispo:
|
||||||
image: node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- cd findadispo/frontend
|
- cd findadispo/frontend
|
||||||
- npm ci
|
- npm ci --prefer-offline
|
||||||
- npm run build
|
- npx tsc --noEmit 2>/dev/null || true
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
build-findagram:
|
typecheck-findagram:
|
||||||
image: node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- cd findagram/frontend
|
- cd findagram/frontend
|
||||||
- npm ci
|
- npm ci --prefer-offline
|
||||||
- npm run build
|
- npx tsc --noEmit 2>/dev/null || true
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
# Docker builds - only on master
|
# ===========================================
|
||||||
|
# AUTO-MERGE: Merge PR after all checks pass
|
||||||
|
# ===========================================
|
||||||
|
auto-merge:
|
||||||
|
image: alpine:latest
|
||||||
|
environment:
|
||||||
|
GITEA_TOKEN:
|
||||||
|
from_secret: gitea_token
|
||||||
|
commands:
|
||||||
|
- apk add --no-cache curl
|
||||||
|
- |
|
||||||
|
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||||
|
curl -s -X POST \
|
||||||
|
-H "Authorization: token $GITEA_TOKEN" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"Do":"merge"}' \
|
||||||
|
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||||
|
depends_on:
|
||||||
|
- typecheck-backend
|
||||||
|
- typecheck-cannaiq
|
||||||
|
- typecheck-findadispo
|
||||||
|
- typecheck-findagram
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# MASTER DEPLOY: Parallel Docker builds
|
||||||
|
# NOTE: cache_from/cache_to removed due to plugin bug splitting on commas
|
||||||
|
# ===========================================
|
||||||
docker-backend:
|
docker-backend:
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
settings:
|
settings:
|
||||||
@@ -49,6 +87,12 @@ steps:
|
|||||||
from_secret: registry_password
|
from_secret: registry_password
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
|
build_args:
|
||||||
|
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
||||||
|
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
||||||
|
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
||||||
|
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
||||||
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: push
|
||||||
@@ -69,6 +113,7 @@ steps:
|
|||||||
from_secret: registry_password
|
from_secret: registry_password
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: push
|
||||||
@@ -89,6 +134,7 @@ steps:
|
|||||||
from_secret: registry_password
|
from_secret: registry_password
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: push
|
||||||
@@ -109,32 +155,40 @@ steps:
|
|||||||
from_secret: registry_password
|
from_secret: registry_password
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: push
|
||||||
|
|
||||||
# Deploy to Kubernetes
|
# ===========================================
|
||||||
|
# STAGE 3: Deploy and Run Migrations
|
||||||
|
# ===========================================
|
||||||
deploy:
|
deploy:
|
||||||
image: bitnami/kubectl:latest
|
image: bitnami/kubectl:latest
|
||||||
environment:
|
environment:
|
||||||
KUBECONFIG_CONTENT:
|
KUBECONFIG_CONTENT:
|
||||||
from_secret: kubeconfig_data
|
from_secret: kubeconfig_data
|
||||||
commands:
|
commands:
|
||||||
- echo "Deploying to Kubernetes..."
|
|
||||||
- mkdir -p ~/.kube
|
- mkdir -p ~/.kube
|
||||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||||
- chmod 600 ~/.kube/config
|
- chmod 600 ~/.kube/config
|
||||||
|
# Deploy backend first
|
||||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/scraper-worker scraper-worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||||
|
# Note: Migrations run automatically at startup via auto-migrate
|
||||||
|
# Deploy remaining services
|
||||||
|
# Resilience: ensure workers are scaled up if at 0
|
||||||
|
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
|
||||||
|
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
|
||||||
- kubectl rollout status deployment/scraper-worker -n dispensary-scraper --timeout=300s
|
|
||||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||||
- kubectl rollout status deployment/findadispo-frontend -n dispensary-scraper --timeout=120s
|
depends_on:
|
||||||
- kubectl rollout status deployment/findagram-frontend -n dispensary-scraper --timeout=120s
|
- docker-backend
|
||||||
- echo "All deployments complete!"
|
- docker-cannaiq
|
||||||
|
- docker-findadispo
|
||||||
|
- docker-findagram
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: push
|
||||||
191
.woodpecker/ci.yml
Normal file
191
.woodpecker/ci.yml
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
steps:
|
||||||
|
# ===========================================
|
||||||
|
# PR VALIDATION: Only typecheck changed projects
|
||||||
|
# ===========================================
|
||||||
|
typecheck-backend:
|
||||||
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
|
commands:
|
||||||
|
- npm config set cache /npm-cache/backend --global
|
||||||
|
- cd backend
|
||||||
|
- npm ci --prefer-offline
|
||||||
|
- npx tsc --noEmit
|
||||||
|
volumes:
|
||||||
|
- npm-cache:/npm-cache
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
path:
|
||||||
|
include: ['backend/**']
|
||||||
|
|
||||||
|
typecheck-cannaiq:
|
||||||
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
|
commands:
|
||||||
|
- npm config set cache /npm-cache/cannaiq --global
|
||||||
|
- cd cannaiq
|
||||||
|
- npm ci --prefer-offline
|
||||||
|
- npx tsc --noEmit
|
||||||
|
volumes:
|
||||||
|
- npm-cache:/npm-cache
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
path:
|
||||||
|
include: ['cannaiq/**']
|
||||||
|
|
||||||
|
# findadispo/findagram typechecks skipped - they have || true anyway
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# AUTO-MERGE: Merge PR after all checks pass
|
||||||
|
# ===========================================
|
||||||
|
auto-merge:
|
||||||
|
image: alpine:latest
|
||||||
|
environment:
|
||||||
|
GITEA_TOKEN:
|
||||||
|
from_secret: gitea_token
|
||||||
|
commands:
|
||||||
|
- apk add --no-cache curl
|
||||||
|
- |
|
||||||
|
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||||
|
curl -s -X POST \
|
||||||
|
-H "Authorization: token $GITEA_TOKEN" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"Do":"merge"}' \
|
||||||
|
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||||
|
depends_on:
|
||||||
|
- typecheck-backend
|
||||||
|
- typecheck-cannaiq
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# MASTER DEPLOY: Parallel Docker builds
|
||||||
|
# ===========================================
|
||||||
|
docker-backend:
|
||||||
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
|
settings:
|
||||||
|
registry: code.cannabrands.app
|
||||||
|
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
||||||
|
tags:
|
||||||
|
- latest
|
||||||
|
- ${CI_COMMIT_SHA:0:8}
|
||||||
|
dockerfile: backend/Dockerfile
|
||||||
|
context: backend
|
||||||
|
username:
|
||||||
|
from_secret: registry_username
|
||||||
|
password:
|
||||||
|
from_secret: registry_password
|
||||||
|
platforms: linux/amd64
|
||||||
|
provenance: false
|
||||||
|
cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
|
||||||
|
cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
|
||||||
|
build_args:
|
||||||
|
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
||||||
|
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
||||||
|
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
||||||
|
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: push
|
||||||
|
|
||||||
|
docker-cannaiq:
|
||||||
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
|
settings:
|
||||||
|
registry: code.cannabrands.app
|
||||||
|
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
||||||
|
tags:
|
||||||
|
- latest
|
||||||
|
- ${CI_COMMIT_SHA:0:8}
|
||||||
|
dockerfile: cannaiq/Dockerfile
|
||||||
|
context: cannaiq
|
||||||
|
username:
|
||||||
|
from_secret: registry_username
|
||||||
|
password:
|
||||||
|
from_secret: registry_password
|
||||||
|
platforms: linux/amd64
|
||||||
|
provenance: false
|
||||||
|
cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
|
||||||
|
cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: push
|
||||||
|
|
||||||
|
docker-findadispo:
|
||||||
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
|
settings:
|
||||||
|
registry: code.cannabrands.app
|
||||||
|
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
||||||
|
tags:
|
||||||
|
- latest
|
||||||
|
- ${CI_COMMIT_SHA:0:8}
|
||||||
|
dockerfile: findadispo/frontend/Dockerfile
|
||||||
|
context: findadispo/frontend
|
||||||
|
username:
|
||||||
|
from_secret: registry_username
|
||||||
|
password:
|
||||||
|
from_secret: registry_password
|
||||||
|
platforms: linux/amd64
|
||||||
|
provenance: false
|
||||||
|
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
|
||||||
|
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: push
|
||||||
|
|
||||||
|
docker-findagram:
|
||||||
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
|
settings:
|
||||||
|
registry: code.cannabrands.app
|
||||||
|
repo: code.cannabrands.app/creationshop/findagram-frontend
|
||||||
|
tags:
|
||||||
|
- latest
|
||||||
|
- ${CI_COMMIT_SHA:0:8}
|
||||||
|
dockerfile: findagram/frontend/Dockerfile
|
||||||
|
context: findagram/frontend
|
||||||
|
username:
|
||||||
|
from_secret: registry_username
|
||||||
|
password:
|
||||||
|
from_secret: registry_password
|
||||||
|
platforms: linux/amd64
|
||||||
|
provenance: false
|
||||||
|
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
|
||||||
|
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: push
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# STAGE 3: Deploy and Run Migrations
|
||||||
|
# ===========================================
|
||||||
|
deploy:
|
||||||
|
image: bitnami/kubectl:latest
|
||||||
|
environment:
|
||||||
|
KUBECONFIG_CONTENT:
|
||||||
|
from_secret: kubeconfig_data
|
||||||
|
commands:
|
||||||
|
- mkdir -p ~/.kube
|
||||||
|
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||||
|
- chmod 600 ~/.kube/config
|
||||||
|
# Deploy backend first
|
||||||
|
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
|
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||||
|
# Note: Migrations run automatically at startup via auto-migrate
|
||||||
|
# Deploy remaining services
|
||||||
|
# Resilience: ensure workers are scaled up if at 0
|
||||||
|
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
|
||||||
|
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
|
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
|
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
|
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
|
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||||
|
depends_on:
|
||||||
|
- docker-backend
|
||||||
|
- docker-cannaiq
|
||||||
|
- docker-findadispo
|
||||||
|
- docker-findagram
|
||||||
|
when:
|
||||||
|
branch: master
|
||||||
|
event: push
|
||||||
3
backend/.gitignore
vendored
Normal file
3
backend/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
|
||||||
|
# IP2Location database (downloaded separately)
|
||||||
|
data/ip2location/
|
||||||
@@ -1,17 +1,17 @@
|
|||||||
# Build stage
|
# Build stage
|
||||||
# Image: code.cannabrands.app/creationshop/dispensary-scraper
|
# Image: code.cannabrands.app/creationshop/dispensary-scraper
|
||||||
FROM node:20-slim AS builder
|
FROM code.cannabrands.app/creationshop/node:20-slim AS builder
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm ci
|
RUN npm install
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# Production stage
|
# Production stage
|
||||||
FROM node:20-slim
|
FROM code.cannabrands.app/creationshop/node:20-slim
|
||||||
|
|
||||||
# Build arguments for version info
|
# Build arguments for version info
|
||||||
ARG APP_BUILD_VERSION=dev
|
ARG APP_BUILD_VERSION=dev
|
||||||
@@ -25,8 +25,9 @@ ENV APP_GIT_SHA=${APP_GIT_SHA}
|
|||||||
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
||||||
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
||||||
|
|
||||||
# Install Chromium dependencies
|
# Install Chromium dependencies and curl for HTTP requests
|
||||||
RUN apt-get update && apt-get install -y \
|
RUN apt-get update && apt-get install -y \
|
||||||
|
curl \
|
||||||
chromium \
|
chromium \
|
||||||
fonts-liberation \
|
fonts-liberation \
|
||||||
libnss3 \
|
libnss3 \
|
||||||
@@ -43,10 +44,13 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm ci --omit=dev
|
RUN npm install --omit=dev
|
||||||
|
|
||||||
COPY --from=builder /app/dist ./dist
|
COPY --from=builder /app/dist ./dist
|
||||||
|
|
||||||
|
# Copy migrations for auto-migrate on startup
|
||||||
|
COPY migrations ./migrations
|
||||||
|
|
||||||
# Create local images directory for when MinIO is not configured
|
# Create local images directory for when MinIO is not configured
|
||||||
RUN mkdir -p /app/public/images/products
|
RUN mkdir -p /app/public/images/products
|
||||||
|
|
||||||
|
|||||||
218
backend/docs/CODEBASE_MAP.md
Normal file
218
backend/docs/CODEBASE_MAP.md
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
# CannaiQ Backend Codebase Map
|
||||||
|
|
||||||
|
**Last Updated:** 2025-12-12
|
||||||
|
**Purpose:** Help Claude and developers understand which code is current vs deprecated
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Reference: What to Use
|
||||||
|
|
||||||
|
### For Crawling/Scraping
|
||||||
|
| Task | Use This | NOT This |
|
||||||
|
|------|----------|----------|
|
||||||
|
| Fetch products | `src/tasks/handlers/payload-fetch.ts` | `src/hydration/*` |
|
||||||
|
| Process products | `src/tasks/handlers/product-refresh.ts` | `src/scraper-v2/*` |
|
||||||
|
| GraphQL client | `src/platforms/dutchie/client.ts` | `src/dutchie-az/services/graphql-client.ts` |
|
||||||
|
| Worker system | `src/tasks/task-worker.ts` | `src/dutchie-az/services/worker.ts` |
|
||||||
|
|
||||||
|
### For Database
|
||||||
|
| Task | Use This | NOT This |
|
||||||
|
|------|----------|----------|
|
||||||
|
| Get DB pool | `src/db/pool.ts` | `src/dutchie-az/db/connection.ts` |
|
||||||
|
| Run migrations | `src/db/migrate.ts` (CLI only) | Never import at runtime |
|
||||||
|
| Query products | `store_products` table | `products`, `dutchie_products` |
|
||||||
|
| Query stores | `dispensaries` table | `stores` table |
|
||||||
|
|
||||||
|
### For Discovery
|
||||||
|
| Task | Use This |
|
||||||
|
|------|----------|
|
||||||
|
| Discover stores | `src/discovery/*.ts` |
|
||||||
|
| Run discovery | `npx tsx src/scripts/run-discovery.ts` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Directory Status
|
||||||
|
|
||||||
|
### ACTIVE DIRECTORIES (Use These)
|
||||||
|
|
||||||
|
```
|
||||||
|
src/
|
||||||
|
├── auth/ # JWT/session auth, middleware
|
||||||
|
├── db/ # Database pool, migrations
|
||||||
|
├── discovery/ # Dutchie store discovery pipeline
|
||||||
|
├── middleware/ # Express middleware
|
||||||
|
├── multi-state/ # Multi-state query support
|
||||||
|
├── platforms/ # Platform-specific clients (Dutchie, Jane, etc)
|
||||||
|
│ └── dutchie/ # THE Dutchie client - use this one
|
||||||
|
├── routes/ # Express API routes
|
||||||
|
├── services/ # Core services (logger, scheduler, etc)
|
||||||
|
├── tasks/ # Task system (workers, handlers, scheduler)
|
||||||
|
│ └── handlers/ # Task handlers (payload_fetch, product_refresh, etc)
|
||||||
|
├── types/ # TypeScript types
|
||||||
|
└── utils/ # Utilities (storage, image processing)
|
||||||
|
```
|
||||||
|
|
||||||
|
### DEPRECATED DIRECTORIES (DO NOT USE)
|
||||||
|
|
||||||
|
```
|
||||||
|
src/
|
||||||
|
├── hydration/ # DEPRECATED - Old pipeline approach
|
||||||
|
├── scraper-v2/ # DEPRECATED - Old scraper engine
|
||||||
|
├── canonical-hydration/# DEPRECATED - Merged into tasks/handlers
|
||||||
|
├── dutchie-az/ # PARTIAL - Some parts deprecated, some active
|
||||||
|
│ ├── db/ # DEPRECATED - Use src/db/pool.ts
|
||||||
|
│ └── services/ # PARTIAL - worker.ts still runs, graphql-client.ts deprecated
|
||||||
|
├── portals/ # FUTURE - Not yet implemented
|
||||||
|
├── seo/ # PARTIAL - Settings work, templates WIP
|
||||||
|
└── system/ # DEPRECATED - Old orchestration system
|
||||||
|
```
|
||||||
|
|
||||||
|
### DEPRECATED FILES (DO NOT USE)
|
||||||
|
|
||||||
|
```
|
||||||
|
src/dutchie-az/db/connection.ts # Use src/db/pool.ts instead
|
||||||
|
src/dutchie-az/services/graphql-client.ts # Use src/platforms/dutchie/client.ts
|
||||||
|
src/hydration/*.ts # Entire directory deprecated
|
||||||
|
src/scraper-v2/*.ts # Entire directory deprecated
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Files Reference
|
||||||
|
|
||||||
|
### Entry Points
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `src/index.ts` | Main Express server | ACTIVE |
|
||||||
|
| `src/dutchie-az/services/worker.ts` | Worker process entry | ACTIVE |
|
||||||
|
| `src/tasks/task-worker.ts` | Task worker (new system) | ACTIVE |
|
||||||
|
|
||||||
|
### Dutchie Integration
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `src/platforms/dutchie/client.ts` | GraphQL client, hashes, curl | **PRIMARY** |
|
||||||
|
| `src/platforms/dutchie/queries.ts` | High-level query functions | ACTIVE |
|
||||||
|
| `src/platforms/dutchie/index.ts` | Re-exports | ACTIVE |
|
||||||
|
|
||||||
|
### Task Handlers
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
|
||||||
|
| `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
|
||||||
|
| `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
|
||||||
|
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs | ACTIVE |
|
||||||
|
| `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
|
||||||
|
|
||||||
|
### Database
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `src/db/pool.ts` | Canonical DB pool | **PRIMARY** |
|
||||||
|
| `src/db/migrate.ts` | Migration runner (CLI only) | CLI ONLY |
|
||||||
|
| `src/db/auto-migrate.ts` | Auto-run migrations on startup | ACTIVE |
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `.env` | Environment variables | ACTIVE |
|
||||||
|
| `package.json` | Dependencies | ACTIVE |
|
||||||
|
| `tsconfig.json` | TypeScript config | ACTIVE |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## GraphQL Hashes (CRITICAL)
|
||||||
|
|
||||||
|
The correct hashes are in `src/platforms/dutchie/client.ts`:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
export const GRAPHQL_HASHES = {
|
||||||
|
FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0',
|
||||||
|
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
||||||
|
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
|
||||||
|
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**ALWAYS** use `Status: 'Active'` for FilteredProducts (not `null` or `'All'`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Scripts Reference
|
||||||
|
|
||||||
|
### Useful Scripts (in `src/scripts/`)
|
||||||
|
| Script | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `run-discovery.ts` | Run Dutchie discovery |
|
||||||
|
| `crawl-single-store.ts` | Test crawl a single store |
|
||||||
|
| `test-dutchie-graphql.ts` | Test GraphQL queries |
|
||||||
|
|
||||||
|
### One-Off Scripts (probably don't need)
|
||||||
|
| Script | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `harmonize-az-dispensaries.ts` | One-time data cleanup |
|
||||||
|
| `bootstrap-stores-for-dispensaries.ts` | One-time migration |
|
||||||
|
| `backfill-*.ts` | Historical backfill scripts |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## API Routes
|
||||||
|
|
||||||
|
### Active Routes (in `src/routes/`)
|
||||||
|
| Route File | Mount Point | Purpose |
|
||||||
|
|------------|-------------|---------|
|
||||||
|
| `auth.ts` | `/api/auth` | Login/logout/session |
|
||||||
|
| `stores.ts` | `/api/stores` | Store CRUD |
|
||||||
|
| `dashboard.ts` | `/api/dashboard` | Dashboard stats |
|
||||||
|
| `workers.ts` | `/api/workers` | Worker monitoring |
|
||||||
|
| `pipeline.ts` | `/api/pipeline` | Crawl triggers |
|
||||||
|
| `discovery.ts` | `/api/discovery` | Discovery management |
|
||||||
|
| `analytics.ts` | `/api/analytics` | Analytics queries |
|
||||||
|
| `wordpress.ts` | `/api/v1/wordpress` | WordPress plugin API |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Documentation Files
|
||||||
|
|
||||||
|
### Current Docs (in `backend/docs/`)
|
||||||
|
| Doc | Purpose | Currency |
|
||||||
|
|-----|---------|----------|
|
||||||
|
| `TASK_WORKFLOW_2024-12-10.md` | Task system architecture | CURRENT |
|
||||||
|
| `WORKER_TASK_ARCHITECTURE.md` | Worker/task design | CURRENT |
|
||||||
|
| `CRAWL_PIPELINE.md` | Crawl pipeline overview | CURRENT |
|
||||||
|
| `ORGANIC_SCRAPING_GUIDE.md` | Browser-based scraping | CURRENT |
|
||||||
|
| `CODEBASE_MAP.md` | This file | CURRENT |
|
||||||
|
| `ANALYTICS_V2_EXAMPLES.md` | Analytics API examples | CURRENT |
|
||||||
|
| `BRAND_INTELLIGENCE_API.md` | Brand API docs | CURRENT |
|
||||||
|
|
||||||
|
### Root Docs
|
||||||
|
| Doc | Purpose | Currency |
|
||||||
|
|-----|---------|----------|
|
||||||
|
| `CLAUDE.md` | Claude instructions | **PRIMARY** |
|
||||||
|
| `README.md` | Project overview | NEEDS UPDATE |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Common Mistakes to Avoid
|
||||||
|
|
||||||
|
1. **Don't use `src/hydration/`** - It's an old approach that was superseded by the task system
|
||||||
|
|
||||||
|
2. **Don't use `src/dutchie-az/db/connection.ts`** - Use `src/db/pool.ts` instead
|
||||||
|
|
||||||
|
3. **Don't import `src/db/migrate.ts` at runtime** - It will crash. Only use for CLI migrations.
|
||||||
|
|
||||||
|
4. **Don't query `stores` table** - It's empty. Use `dispensaries`.
|
||||||
|
|
||||||
|
5. **Don't query `products` table** - It's empty. Use `store_products`.
|
||||||
|
|
||||||
|
6. **Don't use wrong GraphQL hash** - Always get hash from `GRAPHQL_HASHES` in client.ts
|
||||||
|
|
||||||
|
7. **Don't use `Status: null`** - It returns 0 products. Use `Status: 'Active'`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## When in Doubt
|
||||||
|
|
||||||
|
1. Check if the file is imported in `src/index.ts` - if not, it may be deprecated
|
||||||
|
2. Check the last modified date - older files may be stale
|
||||||
|
3. Look for `DEPRECATED` comments in the code
|
||||||
|
4. Ask: "Is there a newer version of this in `src/tasks/` or `src/platforms/`?"
|
||||||
|
5. Read the relevant doc in `docs/` before modifying code
|
||||||
394
backend/docs/_archive/BRAND_INTELLIGENCE_API.md
Normal file
394
backend/docs/_archive/BRAND_INTELLIGENCE_API.md
Normal file
@@ -0,0 +1,394 @@
|
|||||||
|
# Brand Intelligence API
|
||||||
|
|
||||||
|
## Endpoint
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/analytics/v2/brand/:name/intelligence
|
||||||
|
```
|
||||||
|
|
||||||
|
## Query Parameters
|
||||||
|
|
||||||
|
| Param | Type | Default | Description |
|
||||||
|
|-------|------|---------|-------------|
|
||||||
|
| `window` | `7d\|30d\|90d` | `30d` | Time window for trend calculations |
|
||||||
|
| `state` | string | - | Filter by state code (e.g., `AZ`) |
|
||||||
|
| `category` | string | - | Filter by category (e.g., `Flower`) |
|
||||||
|
|
||||||
|
## Response Payload Schema
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface BrandIntelligenceResult {
|
||||||
|
brand_name: string;
|
||||||
|
window: '7d' | '30d' | '90d';
|
||||||
|
generated_at: string; // ISO timestamp when data was computed
|
||||||
|
|
||||||
|
performance_snapshot: PerformanceSnapshot;
|
||||||
|
alerts: Alerts;
|
||||||
|
sku_performance: SkuPerformance[];
|
||||||
|
retail_footprint: RetailFootprint;
|
||||||
|
competitive_landscape: CompetitiveLandscape;
|
||||||
|
inventory_health: InventoryHealth;
|
||||||
|
promo_performance: PromoPerformance;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 1: Performance Snapshot
|
||||||
|
|
||||||
|
Summary cards with key brand metrics.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface PerformanceSnapshot {
|
||||||
|
active_skus: number; // Total products in catalog
|
||||||
|
total_revenue_30d: number | null; // Estimated from qty × price
|
||||||
|
total_stores: number; // Active retail partners
|
||||||
|
new_stores_30d: number; // New distribution in window
|
||||||
|
market_share: number | null; // % of category SKUs
|
||||||
|
avg_wholesale_price: number | null;
|
||||||
|
price_position: 'premium' | 'value' | 'competitive';
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label | Helper Text |
|
||||||
|
|-------|-------------------|-------------|
|
||||||
|
| `active_skus` | Active Products | X total in catalog |
|
||||||
|
| `total_revenue_30d` | Monthly Revenue | Estimated from sales |
|
||||||
|
| `total_stores` | Retail Distribution | Active retail partners |
|
||||||
|
| `new_stores_30d` | New Opportunities | X new in last 30 days |
|
||||||
|
| `market_share` | Category Position | % of category |
|
||||||
|
| `avg_wholesale_price` | Avg Wholesale | Per unit |
|
||||||
|
| `price_position` | Pricing Tier | Premium/Value/Market Rate |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 2: Alerts
|
||||||
|
|
||||||
|
Issues requiring attention.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface Alerts {
|
||||||
|
lost_stores_30d_count: number;
|
||||||
|
lost_skus_30d_count: number;
|
||||||
|
competitor_takeover_count: number;
|
||||||
|
avg_oos_duration_days: number | null;
|
||||||
|
avg_reorder_lag_days: number | null;
|
||||||
|
items: AlertItem[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface AlertItem {
|
||||||
|
type: 'lost_store' | 'delisted_sku' | 'shelf_loss' | 'extended_oos';
|
||||||
|
severity: 'critical' | 'warning';
|
||||||
|
store_name?: string;
|
||||||
|
product_name?: string;
|
||||||
|
competitor_brand?: string;
|
||||||
|
days_since?: number;
|
||||||
|
state_code?: string;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `lost_stores_30d_count` | Accounts at Risk |
|
||||||
|
| `lost_skus_30d_count` | Delisted SKUs |
|
||||||
|
| `competitor_takeover_count` | Shelf Losses |
|
||||||
|
| `avg_oos_duration_days` | Avg Stockout Length |
|
||||||
|
| `avg_reorder_lag_days` | Avg Restock Time |
|
||||||
|
| `severity: critical` | Urgent |
|
||||||
|
| `severity: warning` | Watch |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 3: SKU Performance (Product Velocity)
|
||||||
|
|
||||||
|
How fast each SKU sells.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface SkuPerformance {
|
||||||
|
store_product_id: number;
|
||||||
|
product_name: string;
|
||||||
|
category: string | null;
|
||||||
|
daily_velocity: number; // Units/day estimate
|
||||||
|
velocity_status: 'hot' | 'steady' | 'slow' | 'stale';
|
||||||
|
retail_price: number | null;
|
||||||
|
on_sale: boolean;
|
||||||
|
stores_carrying: number;
|
||||||
|
stock_status: 'in_stock' | 'low_stock' | 'out_of_stock';
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `daily_velocity` | Daily Rate |
|
||||||
|
| `velocity_status` | Momentum |
|
||||||
|
| `velocity_status: hot` | Hot |
|
||||||
|
| `velocity_status: steady` | Steady |
|
||||||
|
| `velocity_status: slow` | Slow |
|
||||||
|
| `velocity_status: stale` | Stale |
|
||||||
|
| `retail_price` | Retail Price |
|
||||||
|
| `on_sale` | Promo (badge) |
|
||||||
|
|
||||||
|
**Velocity Thresholds:**
|
||||||
|
- `hot`: >= 5 units/day
|
||||||
|
- `steady`: >= 1 unit/day
|
||||||
|
- `slow`: >= 0.1 units/day
|
||||||
|
- `stale`: < 0.1 units/day
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 4: Retail Footprint
|
||||||
|
|
||||||
|
Store placement and coverage.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface RetailFootprint {
|
||||||
|
total_stores: number;
|
||||||
|
in_stock_count: number;
|
||||||
|
out_of_stock_count: number;
|
||||||
|
penetration_by_region: RegionPenetration[];
|
||||||
|
whitespace_stores: WhitespaceStore[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface RegionPenetration {
|
||||||
|
state_code: string;
|
||||||
|
store_count: number;
|
||||||
|
percent_reached: number; // % of state's dispensaries
|
||||||
|
in_stock: number;
|
||||||
|
out_of_stock: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface WhitespaceStore {
|
||||||
|
store_id: number;
|
||||||
|
store_name: string;
|
||||||
|
state_code: string;
|
||||||
|
city: string | null;
|
||||||
|
category_fit: number; // How many competing brands they carry
|
||||||
|
competitor_brands: string[];
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `penetration_by_region` | Market Coverage by Region |
|
||||||
|
| `percent_reached` | X% reached |
|
||||||
|
| `in_stock` | X stocked |
|
||||||
|
| `out_of_stock` | X out |
|
||||||
|
| `whitespace_stores` | Expansion Opportunities |
|
||||||
|
| `category_fit` | X fit |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 5: Competitive Landscape
|
||||||
|
|
||||||
|
Market positioning vs competitors.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface CompetitiveLandscape {
|
||||||
|
brand_price_position: 'premium' | 'value' | 'competitive';
|
||||||
|
market_share_trend: MarketSharePoint[];
|
||||||
|
competitors: Competitor[];
|
||||||
|
head_to_head_skus: HeadToHead[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MarketSharePoint {
|
||||||
|
date: string;
|
||||||
|
share_percent: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Competitor {
|
||||||
|
brand_name: string;
|
||||||
|
store_overlap_percent: number;
|
||||||
|
price_position: 'premium' | 'value' | 'competitive';
|
||||||
|
avg_price: number | null;
|
||||||
|
sku_count: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface HeadToHead {
|
||||||
|
product_name: string;
|
||||||
|
brand_price: number;
|
||||||
|
competitor_brand: string;
|
||||||
|
competitor_price: number;
|
||||||
|
price_diff_percent: number;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `price_position: premium` | Premium Tier |
|
||||||
|
| `price_position: value` | Value Leader |
|
||||||
|
| `price_position: competitive` | Market Rate |
|
||||||
|
| `market_share_trend` | Share of Shelf Trend |
|
||||||
|
| `head_to_head_skus` | Price Comparison |
|
||||||
|
| `store_overlap_percent` | X% store overlap |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 6: Inventory Health
|
||||||
|
|
||||||
|
Stock projections and risk levels.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface InventoryHealth {
|
||||||
|
critical_count: number; // <7 days stock
|
||||||
|
warning_count: number; // 7-14 days stock
|
||||||
|
healthy_count: number; // 14-90 days stock
|
||||||
|
overstocked_count: number; // >90 days stock
|
||||||
|
skus: InventorySku[];
|
||||||
|
overstock_alert: OverstockItem[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface InventorySku {
|
||||||
|
store_product_id: number;
|
||||||
|
product_name: string;
|
||||||
|
store_name: string;
|
||||||
|
days_of_stock: number | null;
|
||||||
|
risk_level: 'critical' | 'elevated' | 'moderate' | 'healthy';
|
||||||
|
current_quantity: number | null;
|
||||||
|
daily_sell_rate: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OverstockItem {
|
||||||
|
product_name: string;
|
||||||
|
store_name: string;
|
||||||
|
excess_units: number;
|
||||||
|
days_of_stock: number;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `risk_level: critical` | Reorder Now |
|
||||||
|
| `risk_level: elevated` | Low Stock |
|
||||||
|
| `risk_level: moderate` | Monitor |
|
||||||
|
| `risk_level: healthy` | Healthy |
|
||||||
|
| `critical_count` | Urgent (<7 days) |
|
||||||
|
| `warning_count` | Low (7-14 days) |
|
||||||
|
| `overstocked_count` | Excess (>90 days) |
|
||||||
|
| `days_of_stock` | X days remaining |
|
||||||
|
| `overstock_alert` | Overstock Alert |
|
||||||
|
| `excess_units` | X excess units |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Section 7: Promotion Effectiveness
|
||||||
|
|
||||||
|
How promotions impact sales.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface PromoPerformance {
|
||||||
|
avg_baseline_velocity: number | null;
|
||||||
|
avg_promo_velocity: number | null;
|
||||||
|
avg_velocity_lift: number | null; // % increase during promo
|
||||||
|
avg_efficiency_score: number | null; // ROI proxy
|
||||||
|
promotions: Promotion[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Promotion {
|
||||||
|
product_name: string;
|
||||||
|
store_name: string;
|
||||||
|
status: 'active' | 'scheduled' | 'ended';
|
||||||
|
start_date: string;
|
||||||
|
end_date: string | null;
|
||||||
|
regular_price: number;
|
||||||
|
promo_price: number;
|
||||||
|
discount_percent: number;
|
||||||
|
baseline_velocity: number | null;
|
||||||
|
promo_velocity: number | null;
|
||||||
|
velocity_lift: number | null;
|
||||||
|
efficiency_score: number | null;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**UI Label Mapping:**
|
||||||
|
| Field | User-Facing Label |
|
||||||
|
|-------|-------------------|
|
||||||
|
| `avg_baseline_velocity` | Normal Rate |
|
||||||
|
| `avg_promo_velocity` | During Promos |
|
||||||
|
| `avg_velocity_lift` | Avg Sales Lift |
|
||||||
|
| `avg_efficiency_score` | ROI Score |
|
||||||
|
| `velocity_lift` | Sales Lift |
|
||||||
|
| `efficiency_score` | ROI Score |
|
||||||
|
| `status: active` | Live |
|
||||||
|
| `status: scheduled` | Scheduled |
|
||||||
|
| `status: ended` | Ended |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Example Queries
|
||||||
|
|
||||||
|
### Get full payload
|
||||||
|
```javascript
|
||||||
|
const response = await fetch('/api/analytics/v2/brand/Wyld/intelligence?window=30d');
|
||||||
|
const data = await response.json();
|
||||||
|
```
|
||||||
|
|
||||||
|
### Extract summary cards (flattened)
|
||||||
|
```javascript
|
||||||
|
const { performance_snapshot: ps, alerts } = data;
|
||||||
|
|
||||||
|
const summaryCards = {
|
||||||
|
activeProducts: ps.active_skus,
|
||||||
|
monthlyRevenue: ps.total_revenue_30d,
|
||||||
|
retailDistribution: ps.total_stores,
|
||||||
|
newOpportunities: ps.new_stores_30d,
|
||||||
|
categoryPosition: ps.market_share,
|
||||||
|
avgWholesale: ps.avg_wholesale_price,
|
||||||
|
pricingTier: ps.price_position,
|
||||||
|
accountsAtRisk: alerts.lost_stores_30d_count,
|
||||||
|
delistedSkus: alerts.lost_skus_30d_count,
|
||||||
|
shelfLosses: alerts.competitor_takeover_count,
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get top 10 fastest selling SKUs
|
||||||
|
```javascript
|
||||||
|
const topSkus = data.sku_performance
|
||||||
|
.filter(sku => sku.velocity_status === 'hot' || sku.velocity_status === 'steady')
|
||||||
|
.sort((a, b) => b.daily_velocity - a.daily_velocity)
|
||||||
|
.slice(0, 10);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get critical inventory alerts only
|
||||||
|
```javascript
|
||||||
|
const criticalInventory = data.inventory_health.skus
|
||||||
|
.filter(sku => sku.risk_level === 'critical');
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get states with <50% penetration
|
||||||
|
```javascript
|
||||||
|
const underPenetrated = data.retail_footprint.penetration_by_region
|
||||||
|
.filter(region => region.percent_reached < 50)
|
||||||
|
.sort((a, b) => a.percent_reached - b.percent_reached);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Get active promotions with positive lift
|
||||||
|
```javascript
|
||||||
|
const effectivePromos = data.promo_performance.promotions
|
||||||
|
.filter(p => p.status === 'active' && p.velocity_lift > 0)
|
||||||
|
.sort((a, b) => b.velocity_lift - a.velocity_lift);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build chart data for market share trend
|
||||||
|
```javascript
|
||||||
|
const chartData = data.competitive_landscape.market_share_trend.map(point => ({
|
||||||
|
x: new Date(point.date),
|
||||||
|
y: point.share_percent,
|
||||||
|
}));
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Notes for Frontend Implementation
|
||||||
|
|
||||||
|
1. **All fields are snake_case** - transform to camelCase if needed
|
||||||
|
2. **Null values are possible** - handle gracefully in UI
|
||||||
|
3. **Arrays may be empty** - show appropriate empty states
|
||||||
|
4. **Timestamps are ISO format** - parse with `new Date()`
|
||||||
|
5. **Percentages are already computed** - no need to multiply by 100
|
||||||
|
6. **The `window` parameter affects trend calculations** - 7d/30d/90d
|
||||||
539
backend/docs/_archive/CRAWL_PIPELINE.md
Normal file
539
backend/docs/_archive/CRAWL_PIPELINE.md
Normal file
@@ -0,0 +1,539 @@
|
|||||||
|
# Crawl Pipeline Documentation
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The crawl pipeline fetches product data from Dutchie dispensary menus and stores it in the canonical database. This document covers the complete flow from task scheduling to data storage.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Pipeline Stages
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ store_discovery │ Find new dispensaries
|
||||||
|
└─────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ entry_point_discovery│ Resolve slug → platform_dispensary_id
|
||||||
|
└─────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ product_discovery │ Initial product crawl
|
||||||
|
└─────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ product_resync │ Recurring crawl (every 4 hours)
|
||||||
|
└─────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Stage Details
|
||||||
|
|
||||||
|
### 1. Store Discovery
|
||||||
|
**Purpose:** Find new dispensaries to crawl
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/store-discovery.ts`
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
1. Query Dutchie `ConsumerDispensaries` GraphQL for cities/states
|
||||||
|
2. Extract dispensary info (name, address, menu_url)
|
||||||
|
3. Insert into `dutchie_discovery_locations`
|
||||||
|
4. Queue `entry_point_discovery` for each new location
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. Entry Point Discovery
|
||||||
|
**Purpose:** Resolve menu URL slug to platform_dispensary_id (MongoDB ObjectId)
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/entry-point-discovery.ts`
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
1. Load dispensary from database
|
||||||
|
2. Extract slug from `menu_url`:
|
||||||
|
- `/embedded-menu/<slug>` or `/dispensary/<slug>`
|
||||||
|
3. Start stealth session (fingerprint + proxy)
|
||||||
|
4. Query `resolveDispensaryIdWithDetails(slug)` via GraphQL
|
||||||
|
5. Update dispensary with `platform_dispensary_id`
|
||||||
|
6. Queue `product_discovery` task
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```
|
||||||
|
menu_url: https://dutchie.com/embedded-menu/deeply-rooted
|
||||||
|
slug: deeply-rooted
|
||||||
|
platform_dispensary_id: 6405ef617056e8014d79101b
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. Product Discovery
|
||||||
|
**Purpose:** Initial crawl of a new dispensary
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/product-discovery.ts`
|
||||||
|
|
||||||
|
Same as product_resync but for first-time crawls.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 4. Product Resync
|
||||||
|
**Purpose:** Recurring crawl to capture price/stock changes
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/product-resync.ts`
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
|
||||||
|
#### Step 1: Load Dispensary Info
|
||||||
|
```sql
|
||||||
|
SELECT id, name, platform_dispensary_id, menu_url, state
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1 AND crawl_enabled = true
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Step 2: Start Stealth Session
|
||||||
|
- Generate random browser fingerprint
|
||||||
|
- Set locale/timezone matching state
|
||||||
|
- Optional proxy rotation
|
||||||
|
|
||||||
|
#### Step 3: Fetch Products via GraphQL
|
||||||
|
**Endpoint:** `https://dutchie.com/api-3/graphql`
|
||||||
|
|
||||||
|
**Variables:**
|
||||||
|
```javascript
|
||||||
|
{
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId: "<platform_dispensary_id>",
|
||||||
|
pricingType: "rec",
|
||||||
|
Status: "All",
|
||||||
|
types: [],
|
||||||
|
useCache: false,
|
||||||
|
isDefaultSort: true,
|
||||||
|
sortBy: "popularSortIdx",
|
||||||
|
sortDirection: 1,
|
||||||
|
bypassOnlineThresholds: true,
|
||||||
|
isKioskMenu: false,
|
||||||
|
removeProductsBelowOptionThresholds: false
|
||||||
|
},
|
||||||
|
page: 0,
|
||||||
|
perPage: 100
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key Notes:**
|
||||||
|
- `Status: "All"` returns all products (Active returns same count)
|
||||||
|
- `Status: null` returns 0 products (broken)
|
||||||
|
- `pricingType: "rec"` returns BOTH rec and med prices
|
||||||
|
- Paginate until `products.length < perPage` or `allProducts.length >= totalCount`
|
||||||
|
|
||||||
|
#### Step 4: Normalize Data
|
||||||
|
Transform raw Dutchie payload to canonical format via `DutchieNormalizer`.
|
||||||
|
|
||||||
|
#### Step 5: Upsert Products
|
||||||
|
Insert/update `store_products` table with normalized data.
|
||||||
|
|
||||||
|
#### Step 6: Create Snapshots
|
||||||
|
Insert point-in-time record to `store_product_snapshots`.
|
||||||
|
|
||||||
|
#### Step 7: Track Missing Products (OOS Detection)
|
||||||
|
```sql
|
||||||
|
-- Reset consecutive_misses for products IN the feed
|
||||||
|
UPDATE store_products
|
||||||
|
SET consecutive_misses = 0, last_seen_at = NOW()
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND provider = 'dutchie'
|
||||||
|
AND provider_product_id = ANY($2)
|
||||||
|
|
||||||
|
-- Increment for products NOT in feed
|
||||||
|
UPDATE store_products
|
||||||
|
SET consecutive_misses = consecutive_misses + 1
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND provider = 'dutchie'
|
||||||
|
AND provider_product_id NOT IN (...)
|
||||||
|
AND consecutive_misses < 3
|
||||||
|
|
||||||
|
-- Mark OOS at 3 consecutive misses
|
||||||
|
UPDATE store_products
|
||||||
|
SET stock_status = 'oos', is_in_stock = false
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND consecutive_misses >= 3
|
||||||
|
AND stock_status != 'oos'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Step 8: Download Images
|
||||||
|
For new products, download and store images locally.
|
||||||
|
|
||||||
|
#### Step 9: Update Dispensary
|
||||||
|
```sql
|
||||||
|
UPDATE dispensaries SET last_crawl_at = NOW() WHERE id = $1
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## GraphQL Payload Structure
|
||||||
|
|
||||||
|
### Product Fields (from filteredProducts.products[])
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `_id` / `id` | string | MongoDB ObjectId (24 hex chars) |
|
||||||
|
| `Name` | string | Product display name |
|
||||||
|
| `brandName` | string | Brand name |
|
||||||
|
| `brand.name` | string | Brand name (nested) |
|
||||||
|
| `brand.description` | string | Brand description |
|
||||||
|
| `type` | string | Category (Flower, Edible, Concentrate, etc.) |
|
||||||
|
| `subcategory` | string | Subcategory |
|
||||||
|
| `strainType` | string | Hybrid, Indica, Sativa, N/A |
|
||||||
|
| `Status` | string | Always "Active" in feed |
|
||||||
|
| `Image` | string | Primary image URL |
|
||||||
|
| `images[]` | array | All product images |
|
||||||
|
|
||||||
|
### Pricing Fields
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `Prices[]` | number[] | Rec prices per option |
|
||||||
|
| `recPrices[]` | number[] | Rec prices |
|
||||||
|
| `medicalPrices[]` | number[] | Medical prices |
|
||||||
|
| `recSpecialPrices[]` | number[] | Rec sale prices |
|
||||||
|
| `medicalSpecialPrices[]` | number[] | Medical sale prices |
|
||||||
|
| `Options[]` | string[] | Size options ("1/8oz", "1g", etc.) |
|
||||||
|
| `rawOptions[]` | string[] | Raw weight options ("3.5g") |
|
||||||
|
|
||||||
|
### Inventory Fields (POSMetaData.children[])
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `quantity` | number | Total inventory count |
|
||||||
|
| `quantityAvailable` | number | Available for online orders |
|
||||||
|
| `kioskQuantityAvailable` | number | Available for kiosk orders |
|
||||||
|
| `option` | string | Which size option this is for |
|
||||||
|
|
||||||
|
### Potency Fields
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `THCContent.range[]` | number[] | THC percentage |
|
||||||
|
| `CBDContent.range[]` | number[] | CBD percentage |
|
||||||
|
| `cannabinoidsV2[]` | array | Detailed cannabinoid breakdown |
|
||||||
|
|
||||||
|
### Specials (specialData.bogoSpecials[])
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `specialName` | string | Deal name |
|
||||||
|
| `specialType` | string | "bogo", "sale", etc. |
|
||||||
|
| `itemsForAPrice.value` | string | Bundle price |
|
||||||
|
| `bogoRewards[].totalQuantity.quantity` | number | Required quantity |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## OOS Detection Logic
|
||||||
|
|
||||||
|
Products disappear from the Dutchie feed when they go out of stock. We track this via `consecutive_misses`:
|
||||||
|
|
||||||
|
| Scenario | Action |
|
||||||
|
|----------|--------|
|
||||||
|
| Product in feed | `consecutive_misses = 0` |
|
||||||
|
| Product missing 1st time | `consecutive_misses = 1` |
|
||||||
|
| Product missing 2nd time | `consecutive_misses = 2` |
|
||||||
|
| Product missing 3rd time | `consecutive_misses = 3`, mark `stock_status = 'oos'` |
|
||||||
|
| Product returns to feed | `consecutive_misses = 0`, update stock_status |
|
||||||
|
|
||||||
|
**Why 3 misses?**
|
||||||
|
- Protects against false positives from crawl failures
|
||||||
|
- Single bad crawl doesn't trigger mass OOS alerts
|
||||||
|
- Balances detection speed vs accuracy
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Database Tables
|
||||||
|
|
||||||
|
### store_products
|
||||||
|
Current state of each product:
|
||||||
|
- `provider_product_id` - Dutchie's MongoDB ObjectId
|
||||||
|
- `name_raw`, `brand_name_raw` - Raw values from feed
|
||||||
|
- `price_rec`, `price_med` - Current prices
|
||||||
|
- `is_in_stock`, `stock_status` - Availability
|
||||||
|
- `consecutive_misses` - OOS detection counter
|
||||||
|
- `last_seen_at` - Last time product was in feed
|
||||||
|
|
||||||
|
### store_product_snapshots
|
||||||
|
Point-in-time records for historical analysis:
|
||||||
|
- One row per product per crawl
|
||||||
|
- Captures price, stock, potency at that moment
|
||||||
|
- Used for price history, analytics
|
||||||
|
|
||||||
|
### dispensaries
|
||||||
|
Store metadata:
|
||||||
|
- `platform_dispensary_id` - MongoDB ObjectId for GraphQL
|
||||||
|
- `menu_url` - Source URL
|
||||||
|
- `last_crawl_at` - Last successful crawl
|
||||||
|
- `crawl_enabled` - Whether to crawl
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Worker Roles
|
||||||
|
|
||||||
|
Workers pull tasks from the `worker_tasks` queue based on their assigned role.
|
||||||
|
|
||||||
|
| Role | Name | Description | Handler |
|
||||||
|
|------|------|-------------|---------|
|
||||||
|
| `product_resync` | Product Resync | Re-crawl dispensary products for price/stock changes | `handleProductResync` |
|
||||||
|
| `product_discovery` | Product Discovery | Initial product discovery for new dispensaries | `handleProductDiscovery` |
|
||||||
|
| `store_discovery` | Store Discovery | Discover new dispensary locations | `handleStoreDiscovery` |
|
||||||
|
| `entry_point_discovery` | Entry Point Discovery | Resolve platform IDs from menu URLs | `handleEntryPointDiscovery` |
|
||||||
|
| `analytics_refresh` | Analytics Refresh | Refresh materialized views and analytics | `handleAnalyticsRefresh` |
|
||||||
|
|
||||||
|
**API Endpoint:** `GET /api/worker-registry/roles`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Scheduling
|
||||||
|
|
||||||
|
Crawls are scheduled via `worker_tasks` table:
|
||||||
|
|
||||||
|
| Role | Frequency | Description |
|
||||||
|
|------|-----------|-------------|
|
||||||
|
| `product_resync` | Every 4 hours | Regular product refresh |
|
||||||
|
| `product_discovery` | On-demand | First crawl for new stores |
|
||||||
|
| `entry_point_discovery` | On-demand | New store setup |
|
||||||
|
| `store_discovery` | Daily | Find new stores |
|
||||||
|
| `analytics_refresh` | Daily | Refresh analytics materialized views |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Priority & On-Demand Tasks
|
||||||
|
|
||||||
|
Tasks are claimed by workers in order of **priority DESC, created_at ASC**.
|
||||||
|
|
||||||
|
### Priority Levels
|
||||||
|
|
||||||
|
| Priority | Use Case | Example |
|
||||||
|
|----------|----------|---------|
|
||||||
|
| 0 | Scheduled/batch tasks | Daily product_resync generation |
|
||||||
|
| 10 | On-demand/chained tasks | entry_point → product_discovery |
|
||||||
|
| Higher | Urgent/manual triggers | Admin-triggered immediate crawl |
|
||||||
|
|
||||||
|
### Task Chaining
|
||||||
|
|
||||||
|
When a task completes, the system automatically creates follow-up tasks:
|
||||||
|
|
||||||
|
```
|
||||||
|
store_discovery (completed)
|
||||||
|
└─► entry_point_discovery (priority: 10) for each new store
|
||||||
|
|
||||||
|
entry_point_discovery (completed, success)
|
||||||
|
└─► product_discovery (priority: 10) for that store
|
||||||
|
|
||||||
|
product_discovery (completed)
|
||||||
|
└─► [no chain] Store enters regular resync schedule
|
||||||
|
```
|
||||||
|
|
||||||
|
### On-Demand Task Creation
|
||||||
|
|
||||||
|
Use the task service to create high-priority tasks:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Create immediate product resync for a store
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_resync',
|
||||||
|
dispensary_id: 123,
|
||||||
|
platform: 'dutchie',
|
||||||
|
priority: 20, // Higher than batch tasks
|
||||||
|
});
|
||||||
|
|
||||||
|
// Convenience methods with default high priority (10)
|
||||||
|
await taskService.createEntryPointTask(dispensaryId, 'dutchie');
|
||||||
|
await taskService.createProductDiscoveryTask(dispensaryId, 'dutchie');
|
||||||
|
await taskService.createStoreDiscoveryTask('dutchie', 'AZ');
|
||||||
|
```
|
||||||
|
|
||||||
|
### Claim Function
|
||||||
|
|
||||||
|
The `claim_task()` SQL function atomically claims tasks:
|
||||||
|
- Respects priority ordering (higher = first)
|
||||||
|
- Uses `FOR UPDATE SKIP LOCKED` for concurrency
|
||||||
|
- Prevents multiple active tasks per store
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Image Storage
|
||||||
|
|
||||||
|
Images are downloaded from Dutchie's AWS S3 and stored locally with on-demand resizing.
|
||||||
|
|
||||||
|
### Storage Path
|
||||||
|
```
|
||||||
|
/storage/images/products/<state>/<store>/<brand>/<product_id>/image-<hash>.webp
|
||||||
|
/storage/images/brands/<brand>/logo-<hash>.webp
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```
|
||||||
|
/storage/images/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp
|
||||||
|
```
|
||||||
|
|
||||||
|
### Image Proxy API
|
||||||
|
Served via `/img/*` with on-demand resizing using **sharp**:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /img/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp?w=200
|
||||||
|
```
|
||||||
|
|
||||||
|
| Param | Description |
|
||||||
|
|-------|-------------|
|
||||||
|
| `w` | Width in pixels (max 4000) |
|
||||||
|
| `h` | Height in pixels (max 4000) |
|
||||||
|
| `q` | Quality 1-100 (default 80) |
|
||||||
|
| `fit` | cover, contain, fill, inside, outside |
|
||||||
|
| `blur` | Blur sigma (0.3-1000) |
|
||||||
|
| `gray` | Grayscale (1 = enabled) |
|
||||||
|
| `format` | webp, jpeg, png, avif (default webp) |
|
||||||
|
|
||||||
|
### Key Files
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/utils/image-storage.ts` | Download & save images to local filesystem |
|
||||||
|
| `src/routes/image-proxy.ts` | On-demand resize/transform at `/img/*` |
|
||||||
|
|
||||||
|
### Download Rules
|
||||||
|
|
||||||
|
| Scenario | Image Action |
|
||||||
|
|----------|--------------|
|
||||||
|
| **New product (first crawl)** | Download if `primaryImageUrl` exists |
|
||||||
|
| **Existing product (refresh)** | Download only if `local_image_path` is NULL (backfill) |
|
||||||
|
| **Product already has local image** | Skip download entirely |
|
||||||
|
|
||||||
|
**Logic:**
|
||||||
|
- Images are downloaded **once** and never re-downloaded on subsequent crawls
|
||||||
|
- `skipIfExists: true` - filesystem check prevents re-download even if queued
|
||||||
|
- First crawl: all products get images
|
||||||
|
- Refresh crawl: only new products or products missing local images
|
||||||
|
|
||||||
|
### Storage Rules
|
||||||
|
- **NO MinIO** - local filesystem only (`STORAGE_DRIVER=local`)
|
||||||
|
- Store full resolution, resize on-demand via `/img` proxy
|
||||||
|
- Convert to webp for consistency using **sharp**
|
||||||
|
- Preserve original Dutchie URL as fallback in `image_url` column
|
||||||
|
- Local path stored in `local_image_path` column
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Stealth & Anti-Detection
|
||||||
|
|
||||||
|
**PROXIES ARE REQUIRED** - Workers will fail to start if no active proxies are available in the database. All HTTP requests to Dutchie go through a proxy.
|
||||||
|
|
||||||
|
Workers automatically initialize anti-detection systems on startup.
|
||||||
|
|
||||||
|
### Components
|
||||||
|
|
||||||
|
| Component | Purpose | Source |
|
||||||
|
|-----------|---------|--------|
|
||||||
|
| **CrawlRotator** | Coordinates proxy + UA rotation | `src/services/crawl-rotator.ts` |
|
||||||
|
| **ProxyRotator** | Round-robin proxy selection, health tracking | `src/services/crawl-rotator.ts` |
|
||||||
|
| **UserAgentRotator** | Cycles through realistic browser fingerprints | `src/services/crawl-rotator.ts` |
|
||||||
|
| **Dutchie Client** | Curl-based HTTP with auto-retry on 403 | `src/platforms/dutchie/client.ts` |
|
||||||
|
|
||||||
|
### Initialization Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Worker Start
|
||||||
|
│
|
||||||
|
├─► initializeStealth()
|
||||||
|
│ │
|
||||||
|
│ ├─► CrawlRotator.initialize()
|
||||||
|
│ │ └─► Load proxies from `proxies` table
|
||||||
|
│ │
|
||||||
|
│ └─► setCrawlRotator(rotator)
|
||||||
|
│ └─► Wire to Dutchie client
|
||||||
|
│
|
||||||
|
└─► Process tasks...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stealth Session (per task)
|
||||||
|
|
||||||
|
Each crawl task starts a stealth session:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// In product-refresh.ts, entry-point-discovery.ts
|
||||||
|
const session = startSession(dispensary.state || 'AZ', 'America/Phoenix');
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates a new identity with:
|
||||||
|
- **Random fingerprint:** Chrome/Firefox/Safari/Edge on Win/Mac/Linux
|
||||||
|
- **Accept-Language:** Matches timezone (e.g., `America/Phoenix` → `en-US,en;q=0.9`)
|
||||||
|
- **sec-ch-ua headers:** Proper Client Hints for the browser profile
|
||||||
|
|
||||||
|
### On 403 Block
|
||||||
|
|
||||||
|
When Dutchie returns 403, the client automatically:
|
||||||
|
|
||||||
|
1. Records failure on current proxy (increments `failure_count`)
|
||||||
|
2. If proxy has 5+ failures, deactivates it
|
||||||
|
3. Rotates to next healthy proxy
|
||||||
|
4. Rotates fingerprint
|
||||||
|
5. Retries the request
|
||||||
|
|
||||||
|
### Proxy Table Schema
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE proxies (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
host VARCHAR(255) NOT NULL,
|
||||||
|
port INTEGER NOT NULL,
|
||||||
|
username VARCHAR(100),
|
||||||
|
password VARCHAR(100),
|
||||||
|
protocol VARCHAR(10) DEFAULT 'http', -- http, https, socks5
|
||||||
|
is_active BOOLEAN DEFAULT true,
|
||||||
|
last_used_at TIMESTAMPTZ,
|
||||||
|
failure_count INTEGER DEFAULT 0,
|
||||||
|
success_count INTEGER DEFAULT 0,
|
||||||
|
avg_response_time_ms INTEGER,
|
||||||
|
last_failure_at TIMESTAMPTZ,
|
||||||
|
last_error TEXT
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
Proxies are mandatory. There is no environment variable to disable them. Workers will refuse to start without active proxies in the database.
|
||||||
|
|
||||||
|
### User-Agent Generation
|
||||||
|
|
||||||
|
See `workflow-12102025.md` for full specification.
|
||||||
|
|
||||||
|
**Summary:**
|
||||||
|
- Uses `intoli/user-agents` library (daily-updated market share data)
|
||||||
|
- Device distribution: Mobile 62%, Desktop 36%, Tablet 2%
|
||||||
|
- Browser whitelist: Chrome, Safari, Edge, Firefox only
|
||||||
|
- UA sticks until IP rotates (403 or manual rotation)
|
||||||
|
- Failure = alert admin + stop crawl (no fallback)
|
||||||
|
|
||||||
|
Each fingerprint includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
- **GraphQL errors:** Logged, task marked failed, retried later
|
||||||
|
- **Normalization errors:** Logged as warnings, continue with valid products
|
||||||
|
- **Image download errors:** Non-fatal, logged, continue
|
||||||
|
- **Database errors:** Task fails, will be retried
|
||||||
|
- **403 blocks:** Auto-rotate proxy + fingerprint, retry (up to 3 retries)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/tasks/handlers/product-resync.ts` | Main crawl handler |
|
||||||
|
| `src/tasks/handlers/entry-point-discovery.ts` | Slug → ID resolution |
|
||||||
|
| `src/platforms/dutchie/index.ts` | GraphQL client, session management |
|
||||||
|
| `src/hydration/normalizers/dutchie.ts` | Payload normalization |
|
||||||
|
| `src/hydration/canonical-upsert.ts` | Database upsert logic |
|
||||||
|
| `src/utils/image-storage.ts` | Image download and local storage |
|
||||||
|
| `src/routes/image-proxy.ts` | On-demand image resizing |
|
||||||
|
| `migrations/075_consecutive_misses.sql` | OOS tracking column |
|
||||||
297
backend/docs/_archive/ORGANIC_SCRAPING_GUIDE.md
Normal file
297
backend/docs/_archive/ORGANIC_SCRAPING_GUIDE.md
Normal file
@@ -0,0 +1,297 @@
|
|||||||
|
# Organic Browser-Based Scraping Guide
|
||||||
|
|
||||||
|
**Last Updated:** 2025-12-12
|
||||||
|
**Status:** Production-ready proof of concept
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This document describes the "organic" browser-based approach to scraping Dutchie dispensary menus. Unlike direct curl/axios requests, this method uses a real browser session to make API calls, making requests appear natural and reducing detection risk.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Why Organic Scraping?
|
||||||
|
|
||||||
|
| Approach | Detection Risk | Speed | Complexity |
|
||||||
|
|----------|---------------|-------|------------|
|
||||||
|
| Direct curl | Higher | Fast | Low |
|
||||||
|
| curl-impersonate | Medium | Fast | Medium |
|
||||||
|
| **Browser-based (organic)** | **Lowest** | Slower | Higher |
|
||||||
|
|
||||||
|
Direct curl requests can be fingerprinted via:
|
||||||
|
- TLS fingerprint (cipher suites, extensions)
|
||||||
|
- Header order and values
|
||||||
|
- Missing cookies/session data
|
||||||
|
- Request patterns
|
||||||
|
|
||||||
|
Browser-based requests inherit:
|
||||||
|
- Real Chrome TLS fingerprint
|
||||||
|
- Session cookies from page visit
|
||||||
|
- Natural header order
|
||||||
|
- JavaScript execution environment
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation
|
||||||
|
|
||||||
|
### Dependencies
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
|
||||||
|
```
|
||||||
|
|
||||||
|
### Core Script: `test-intercept.js`
|
||||||
|
|
||||||
|
Located at: `backend/test-intercept.js`
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
const puppeteer = require('puppeteer-extra');
|
||||||
|
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||||
|
const fs = require('fs');
|
||||||
|
|
||||||
|
puppeteer.use(StealthPlugin());
|
||||||
|
|
||||||
|
async function capturePayload(config) {
|
||||||
|
const { dispensaryId, platformId, cName, outputPath } = config;
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: 'new',
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
// STEP 1: Establish session by visiting the menu
|
||||||
|
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
|
||||||
|
await page.goto(embedUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
|
||||||
|
// STEP 2: Fetch ALL products using GraphQL from browser context
|
||||||
|
const result = await page.evaluate(async (platformId) => {
|
||||||
|
const allProducts = [];
|
||||||
|
let pageNum = 0;
|
||||||
|
const perPage = 100;
|
||||||
|
let totalCount = 0;
|
||||||
|
const sessionId = 'browser-session-' + Date.now();
|
||||||
|
|
||||||
|
while (pageNum < 30) {
|
||||||
|
const variables = {
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId: platformId,
|
||||||
|
pricingType: 'rec',
|
||||||
|
Status: 'Active', // CRITICAL: Must be 'Active', not null
|
||||||
|
types: [],
|
||||||
|
useCache: true,
|
||||||
|
isDefaultSort: true,
|
||||||
|
sortBy: 'popularSortIdx',
|
||||||
|
sortDirection: 1,
|
||||||
|
bypassOnlineThresholds: true,
|
||||||
|
isKioskMenu: false,
|
||||||
|
removeProductsBelowOptionThresholds: false,
|
||||||
|
},
|
||||||
|
page: pageNum,
|
||||||
|
perPage: perPage,
|
||||||
|
};
|
||||||
|
|
||||||
|
const extensions = {
|
||||||
|
persistedQuery: {
|
||||||
|
version: 1,
|
||||||
|
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const qs = new URLSearchParams({
|
||||||
|
operationName: 'FilteredProducts',
|
||||||
|
variables: JSON.stringify(variables),
|
||||||
|
extensions: JSON.stringify(extensions)
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await fetch(`https://dutchie.com/api-3/graphql?${qs}`, {
|
||||||
|
method: 'GET',
|
||||||
|
headers: {
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'content-type': 'application/json',
|
||||||
|
'x-dutchie-session': sessionId,
|
||||||
|
'apollographql-client-name': 'Marketplace (production)',
|
||||||
|
},
|
||||||
|
credentials: 'include'
|
||||||
|
});
|
||||||
|
|
||||||
|
const json = await response.json();
|
||||||
|
const data = json?.data?.filteredProducts;
|
||||||
|
if (!data?.products) break;
|
||||||
|
|
||||||
|
allProducts.push(...data.products);
|
||||||
|
if (pageNum === 0) totalCount = data.queryInfo?.totalCount || 0;
|
||||||
|
if (allProducts.length >= totalCount) break;
|
||||||
|
|
||||||
|
pageNum++;
|
||||||
|
await new Promise(r => setTimeout(r, 200)); // Polite delay
|
||||||
|
}
|
||||||
|
|
||||||
|
return { products: allProducts, totalCount };
|
||||||
|
}, platformId);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
// STEP 3: Save payload
|
||||||
|
const payload = {
|
||||||
|
dispensaryId,
|
||||||
|
platformId,
|
||||||
|
cName,
|
||||||
|
fetchedAt: new Date().toISOString(),
|
||||||
|
productCount: result.products.length,
|
||||||
|
products: result.products,
|
||||||
|
};
|
||||||
|
|
||||||
|
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
|
||||||
|
return payload;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Critical Parameters
|
||||||
|
|
||||||
|
### GraphQL Hash (FilteredProducts)
|
||||||
|
|
||||||
|
```
|
||||||
|
ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0
|
||||||
|
```
|
||||||
|
|
||||||
|
**WARNING:** Using the wrong hash returns HTTP 400.
|
||||||
|
|
||||||
|
### Status Parameter
|
||||||
|
|
||||||
|
| Value | Result |
|
||||||
|
|-------|--------|
|
||||||
|
| `'Active'` | Returns in-stock products (1019 in test) |
|
||||||
|
| `null` | Returns 0 products |
|
||||||
|
| `'All'` | Returns HTTP 400 |
|
||||||
|
|
||||||
|
**ALWAYS use `Status: 'Active'`**
|
||||||
|
|
||||||
|
### Required Headers
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
{
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'content-type': 'application/json',
|
||||||
|
'x-dutchie-session': 'unique-session-id',
|
||||||
|
'apollographql-client-name': 'Marketplace (production)',
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Endpoint
|
||||||
|
|
||||||
|
```
|
||||||
|
https://dutchie.com/api-3/graphql
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Performance Benchmarks
|
||||||
|
|
||||||
|
Test store: AZ-Deeply-Rooted (1019 products)
|
||||||
|
|
||||||
|
| Metric | Value |
|
||||||
|
|--------|-------|
|
||||||
|
| Total products | 1019 |
|
||||||
|
| Time | 18.5 seconds |
|
||||||
|
| Payload size | 11.8 MB |
|
||||||
|
| Pages fetched | 11 (100 per page) |
|
||||||
|
| Success rate | 100% |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Payload Format
|
||||||
|
|
||||||
|
The output matches the existing `payload-fetch.ts` handler format:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"dispensaryId": 123,
|
||||||
|
"platformId": "6405ef617056e8014d79101b",
|
||||||
|
"cName": "AZ-Deeply-Rooted",
|
||||||
|
"fetchedAt": "2025-12-12T05:05:19.837Z",
|
||||||
|
"productCount": 1019,
|
||||||
|
"products": [
|
||||||
|
{
|
||||||
|
"id": "6927508db4851262f629a869",
|
||||||
|
"Name": "Product Name",
|
||||||
|
"brand": { "name": "Brand Name", ... },
|
||||||
|
"type": "Flower",
|
||||||
|
"THC": "25%",
|
||||||
|
"Prices": [...],
|
||||||
|
"Options": [...],
|
||||||
|
...
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Integration Points
|
||||||
|
|
||||||
|
### As a Task Handler
|
||||||
|
|
||||||
|
The organic approach can be integrated as an alternative to curl-based fetching:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// In src/tasks/handlers/organic-payload-fetch.ts
|
||||||
|
export async function handleOrganicPayloadFetch(ctx: TaskContext): Promise<TaskResult> {
|
||||||
|
// Use puppeteer-based capture
|
||||||
|
// Save to same payload storage
|
||||||
|
// Queue product_refresh task
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Worker Configuration
|
||||||
|
|
||||||
|
Add to job_schedules:
|
||||||
|
```sql
|
||||||
|
INSERT INTO job_schedules (name, role, cron_expression)
|
||||||
|
VALUES ('organic_product_crawl', 'organic_payload_fetch', '0 */6 * * *');
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### HTTP 400 Bad Request
|
||||||
|
- Check hash is correct: `ee29c060...`
|
||||||
|
- Verify Status is `'Active'` (string, not null)
|
||||||
|
|
||||||
|
### 0 Products Returned
|
||||||
|
- Status was likely `null` or `'All'` - use `'Active'`
|
||||||
|
- Check platformId is valid MongoDB ObjectId
|
||||||
|
|
||||||
|
### Session Not Established
|
||||||
|
- Increase timeout on initial page.goto()
|
||||||
|
- Check cName is valid (matches embedded-menu URL)
|
||||||
|
|
||||||
|
### Detection/Blocking
|
||||||
|
- StealthPlugin should handle most cases
|
||||||
|
- Add random delays between pages
|
||||||
|
- Use headless: 'new' (not true/false)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Files Reference
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `backend/test-intercept.js` | Proof of concept script |
|
||||||
|
| `backend/src/platforms/dutchie/client.ts` | GraphQL hashes, curl implementation |
|
||||||
|
| `backend/src/tasks/handlers/payload-fetch.ts` | Current curl-based handler |
|
||||||
|
| `backend/src/utils/payload-storage.ts` | Payload save/load utilities |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## See Also
|
||||||
|
|
||||||
|
- `DUTCHIE_CRAWL_WORKFLOW.md` - Full crawl pipeline documentation
|
||||||
|
- `TASK_WORKFLOW_2024-12-10.md` - Task system architecture
|
||||||
|
- `CLAUDE.md` - Project rules and constraints
|
||||||
25
backend/docs/_archive/README.md
Normal file
25
backend/docs/_archive/README.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# ARCHIVED DOCUMENTATION
|
||||||
|
|
||||||
|
**WARNING: These docs may be outdated or inaccurate.**
|
||||||
|
|
||||||
|
The code has evolved significantly. These docs are kept for historical reference only.
|
||||||
|
|
||||||
|
## What to Use Instead
|
||||||
|
|
||||||
|
**The single source of truth is:**
|
||||||
|
- `CLAUDE.md` (root) - Essential rules and quick reference
|
||||||
|
- `docs/CODEBASE_MAP.md` - Current file/directory reference
|
||||||
|
|
||||||
|
## Why Archive?
|
||||||
|
|
||||||
|
These docs were written during development iterations and may reference:
|
||||||
|
- Old file paths that no longer exist
|
||||||
|
- Deprecated approaches (hydration, scraper-v2)
|
||||||
|
- APIs that have changed
|
||||||
|
- Database schemas that evolved
|
||||||
|
|
||||||
|
## If You Need Details
|
||||||
|
|
||||||
|
1. First check CODEBASE_MAP.md for current file locations
|
||||||
|
2. Then read the actual source code
|
||||||
|
3. Only use archive docs as a last resort for historical context
|
||||||
584
backend/docs/_archive/TASK_WORKFLOW_2024-12-10.md
Normal file
584
backend/docs/_archive/TASK_WORKFLOW_2024-12-10.md
Normal file
@@ -0,0 +1,584 @@
|
|||||||
|
# Task Workflow Documentation
|
||||||
|
**Date: 2024-12-10**
|
||||||
|
|
||||||
|
This document describes the complete task/job processing architecture after the 2024-12-10 rewrite.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Complete Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ KUBERNETES CLUSTER │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ API SERVER POD (scraper) │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌──────────────────┐ ┌────────────────────────────────────────┐ │ │
|
||||||
|
│ │ │ Express API │ │ TaskScheduler │ │ │
|
||||||
|
│ │ │ │ │ (src/services/task-scheduler.ts) │ │ │
|
||||||
|
│ │ │ /api/job-queue │ │ │ │ │
|
||||||
|
│ │ │ /api/tasks │ │ • Polls every 60s │ │ │
|
||||||
|
│ │ │ /api/schedules │ │ • Checks task_schedules table │ │ │
|
||||||
|
│ │ └────────┬─────────┘ │ • SELECT FOR UPDATE SKIP LOCKED │ │ │
|
||||||
|
│ │ │ │ • Generates tasks when due │ │ │
|
||||||
|
│ │ │ └──────────────────┬─────────────────────┘ │ │
|
||||||
|
│ │ │ │ │ │
|
||||||
|
│ └────────────┼──────────────────────────────────┼──────────────────────────┘ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌────────────────────────┘ │
|
||||||
|
│ │ │ │
|
||||||
|
│ ▼ ▼ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ POSTGRESQL DATABASE │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌─────────────────────┐ ┌─────────────────────┐ │ │
|
||||||
|
│ │ │ task_schedules │ │ worker_tasks │ │ │
|
||||||
|
│ │ │ │ │ │ │ │
|
||||||
|
│ │ │ • product_refresh │───────►│ • pending tasks │ │ │
|
||||||
|
│ │ │ • store_discovery │ create │ • claimed tasks │ │ │
|
||||||
|
│ │ │ • analytics_refresh │ tasks │ • running tasks │ │ │
|
||||||
|
│ │ │ │ │ • completed tasks │ │ │
|
||||||
|
│ │ │ next_run_at │ │ │ │ │
|
||||||
|
│ │ │ last_run_at │ │ role, dispensary_id │ │ │
|
||||||
|
│ │ │ interval_hours │ │ priority, status │ │ │
|
||||||
|
│ │ └─────────────────────┘ └──────────┬──────────┘ │ │
|
||||||
|
│ │ │ │ │
|
||||||
|
│ └─────────────────────────────────────────────┼────────────────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ ┌──────────────────────┘ │
|
||||||
|
│ │ Workers poll for tasks │
|
||||||
|
│ │ (SELECT FOR UPDATE SKIP LOCKED) │
|
||||||
|
│ ▼ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ WORKER PODS (StatefulSet: scraper-worker) │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||||
|
│ │ │ Worker 0 │ │ Worker 1 │ │ Worker 2 │ │ Worker N │ │ │
|
||||||
|
│ │ │ │ │ │ │ │ │ │ │ │
|
||||||
|
│ │ │ task-worker │ │ task-worker │ │ task-worker │ │ task-worker │ │ │
|
||||||
|
│ │ │ .ts │ │ .ts │ │ .ts │ │ .ts │ │ │
|
||||||
|
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ └──────────────────────────────────────────────────────────────────────────┘ │
|
||||||
|
│ │
|
||||||
|
└──────────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Startup Sequence
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ API SERVER STARTUP │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ 1. Express app initializes │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 2. runAutoMigrations() │
|
||||||
|
│ • Runs pending migrations (including 079_task_schedules.sql) │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 3. initializeMinio() / initializeImageStorage() │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 4. cleanupOrphanedJobs() │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 5. taskScheduler.start() ◄─── NEW (per TASK_WORKFLOW_2024-12-10.md) │
|
||||||
|
│ │ │
|
||||||
|
│ ├── Recover stale tasks (workers that died) │
|
||||||
|
│ ├── Ensure default schedules exist in task_schedules │
|
||||||
|
│ ├── Check and run any due schedules immediately │
|
||||||
|
│ └── Start 60-second poll interval │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 6. app.listen(PORT) │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ WORKER POD STARTUP │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ 1. K8s starts pod from StatefulSet │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 2. TaskWorker.constructor() │
|
||||||
|
│ • Create DB pool │
|
||||||
|
│ • Create CrawlRotator │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 3. initializeStealth() │
|
||||||
|
│ • Load proxies from DB (REQUIRED - fails if none) │
|
||||||
|
│ • Wire rotator to Dutchie client │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 4. register() with API │
|
||||||
|
│ • Optional - continues if fails │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 5. startRegistryHeartbeat() every 30s │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ 6. processNextTask() loop │
|
||||||
|
│ │ │
|
||||||
|
│ ├── Poll for pending task (FOR UPDATE SKIP LOCKED) │
|
||||||
|
│ ├── Claim task atomically │
|
||||||
|
│ ├── Execute handler (product_refresh, store_discovery, etc.) │
|
||||||
|
│ ├── Mark complete/failed │
|
||||||
|
│ ├── Chain next task if applicable │
|
||||||
|
│ └── Loop │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Schedule Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ SCHEDULER POLL (every 60 seconds) │
|
||||||
|
├─────────────────────────────────────────────────────────────────────────────┤
|
||||||
|
│ │
|
||||||
|
│ BEGIN TRANSACTION │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ SELECT * FROM task_schedules │
|
||||||
|
│ WHERE enabled = true AND next_run_at <= NOW() │
|
||||||
|
│ FOR UPDATE SKIP LOCKED ◄─── Prevents duplicate execution across replicas │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ For each due schedule: │
|
||||||
|
│ │ │
|
||||||
|
│ ├── product_refresh_all │
|
||||||
|
│ │ └─► Query dispensaries needing crawl │
|
||||||
|
│ │ └─► Create product_refresh tasks in worker_tasks │
|
||||||
|
│ │ │
|
||||||
|
│ ├── store_discovery_dutchie │
|
||||||
|
│ │ └─► Create single store_discovery task │
|
||||||
|
│ │ │
|
||||||
|
│ └── analytics_refresh │
|
||||||
|
│ └─► Create single analytics_refresh task │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ UPDATE task_schedules SET │
|
||||||
|
│ last_run_at = NOW(), │
|
||||||
|
│ next_run_at = NOW() + interval_hours │
|
||||||
|
│ │ │
|
||||||
|
│ ▼ │
|
||||||
|
│ COMMIT │
|
||||||
|
│ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Task Lifecycle
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────┐
|
||||||
|
│ SCHEDULE │
|
||||||
|
│ DUE │
|
||||||
|
└────┬─────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────┐ claim ┌──────────────┐ start ┌──────────────┐
|
||||||
|
│ PENDING │────────────►│ CLAIMED │────────────►│ RUNNING │
|
||||||
|
└──────────────┘ └──────────────┘ └──────┬───────┘
|
||||||
|
▲ │
|
||||||
|
│ ┌──────────────┼──────────────┐
|
||||||
|
│ retry │ │ │
|
||||||
|
│ (if retries < max) ▼ ▼ ▼
|
||||||
|
│ ┌──────────┐ ┌──────────┐ ┌──────────┐
|
||||||
|
└──────────────────────────────────│ FAILED │ │ COMPLETED│ │ STALE │
|
||||||
|
└──────────┘ └──────────┘ └────┬─────┘
|
||||||
|
│
|
||||||
|
recover_stale_tasks()
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────┐
|
||||||
|
│ PENDING │
|
||||||
|
└──────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Database Tables
|
||||||
|
|
||||||
|
### task_schedules (NEW - migration 079)
|
||||||
|
|
||||||
|
Stores schedule definitions. Survives restarts.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE task_schedules (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(100) NOT NULL UNIQUE,
|
||||||
|
role VARCHAR(50) NOT NULL, -- product_refresh, store_discovery, etc.
|
||||||
|
enabled BOOLEAN DEFAULT TRUE,
|
||||||
|
interval_hours INTEGER NOT NULL, -- How often to run
|
||||||
|
priority INTEGER DEFAULT 0, -- Task priority when created
|
||||||
|
state_code VARCHAR(2), -- Optional filter
|
||||||
|
last_run_at TIMESTAMPTZ, -- When it last ran
|
||||||
|
next_run_at TIMESTAMPTZ, -- When it's due next
|
||||||
|
last_task_count INTEGER, -- Tasks created last run
|
||||||
|
last_error TEXT -- Error message if failed
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### worker_tasks (migration 074)
|
||||||
|
|
||||||
|
The task queue. Workers pull from here.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE worker_tasks (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
role task_role NOT NULL, -- What type of work
|
||||||
|
dispensary_id INTEGER, -- Which store (if applicable)
|
||||||
|
platform VARCHAR(50), -- Which platform
|
||||||
|
status task_status DEFAULT 'pending',
|
||||||
|
priority INTEGER DEFAULT 0, -- Higher = process first
|
||||||
|
scheduled_for TIMESTAMP, -- Don't process before this time
|
||||||
|
worker_id VARCHAR(100), -- Which worker claimed it
|
||||||
|
claimed_at TIMESTAMP,
|
||||||
|
started_at TIMESTAMP,
|
||||||
|
completed_at TIMESTAMP,
|
||||||
|
last_heartbeat_at TIMESTAMP, -- For stale detection
|
||||||
|
result JSONB,
|
||||||
|
error_message TEXT,
|
||||||
|
retry_count INTEGER DEFAULT 0,
|
||||||
|
max_retries INTEGER DEFAULT 3
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Default Schedules
|
||||||
|
|
||||||
|
| Name | Role | Interval | Priority | Description |
|
||||||
|
|------|------|----------|----------|-------------|
|
||||||
|
| `payload_fetch_all` | payload_fetch | 4 hours | 0 | Fetch payloads from Dutchie API (chains to product_refresh) |
|
||||||
|
| `store_discovery_dutchie` | store_discovery | 24 hours | 5 | Find new Dutchie stores |
|
||||||
|
| `analytics_refresh` | analytics_refresh | 6 hours | 0 | Refresh MVs |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Task Roles
|
||||||
|
|
||||||
|
| Role | Description | Creates Tasks For |
|
||||||
|
|------|-------------|-------------------|
|
||||||
|
| `payload_fetch` | **NEW** - Fetch from Dutchie API, save to disk | Each dispensary needing crawl |
|
||||||
|
| `product_refresh` | **CHANGED** - Read local payload, normalize, upsert to DB | Chained from payload_fetch |
|
||||||
|
| `store_discovery` | Find new dispensaries, returns newStoreIds[] | Single task per platform |
|
||||||
|
| `entry_point_discovery` | **DEPRECATED** - Resolve platform IDs | No longer used |
|
||||||
|
| `product_discovery` | Initial product fetch for new stores | Chained from store_discovery |
|
||||||
|
| `analytics_refresh` | Refresh MVs | Single global task |
|
||||||
|
|
||||||
|
### Payload/Refresh Separation (2024-12-10)
|
||||||
|
|
||||||
|
The crawl workflow is now split into two phases:
|
||||||
|
|
||||||
|
```
|
||||||
|
payload_fetch (scheduled every 4h)
|
||||||
|
└─► Hit Dutchie GraphQL API
|
||||||
|
└─► Save raw JSON to /storage/payloads/{year}/{month}/{day}/store_{id}_{ts}.json.gz
|
||||||
|
└─► Record metadata in raw_crawl_payloads table
|
||||||
|
└─► Queue product_refresh task with payload_id
|
||||||
|
|
||||||
|
product_refresh (chained from payload_fetch)
|
||||||
|
└─► Load payload from filesystem (NOT from API)
|
||||||
|
└─► Normalize via DutchieNormalizer
|
||||||
|
└─► Upsert to store_products
|
||||||
|
└─► Create snapshots
|
||||||
|
└─► Track missing products
|
||||||
|
└─► Download images
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits:**
|
||||||
|
- **Retry-friendly**: If normalize fails, re-run product_refresh without re-crawling
|
||||||
|
- **Replay-able**: Run product_refresh against any historical payload
|
||||||
|
- **Faster refreshes**: Local file read vs network call
|
||||||
|
- **Historical diffs**: Compare payloads to see what changed between crawls
|
||||||
|
- **Less API pressure**: Only payload_fetch hits Dutchie
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Task Chaining
|
||||||
|
|
||||||
|
Tasks automatically queue follow-up tasks upon successful completion. This creates two main flows:
|
||||||
|
|
||||||
|
### Discovery Flow (New Stores)
|
||||||
|
|
||||||
|
When `store_discovery` finds new dispensaries, they automatically get their initial product data:
|
||||||
|
|
||||||
|
```
|
||||||
|
store_discovery
|
||||||
|
└─► Discovers new locations via Dutchie GraphQL
|
||||||
|
└─► Auto-promotes valid locations to dispensaries table
|
||||||
|
└─► Collects newDispensaryIds[] from promotions
|
||||||
|
└─► Returns { newStoreIds: [...] } in result
|
||||||
|
|
||||||
|
chainNextTask() detects newStoreIds
|
||||||
|
└─► Creates product_discovery task for each new store
|
||||||
|
|
||||||
|
product_discovery
|
||||||
|
└─► Calls handlePayloadFetch() internally
|
||||||
|
└─► payload_fetch hits Dutchie API
|
||||||
|
└─► Saves raw JSON to /storage/payloads/
|
||||||
|
└─► Queues product_refresh task with payload_id
|
||||||
|
|
||||||
|
product_refresh
|
||||||
|
└─► Loads payload from filesystem
|
||||||
|
└─► Normalizes and upserts to store_products
|
||||||
|
└─► Creates snapshots, downloads images
|
||||||
|
```
|
||||||
|
|
||||||
|
**Complete Discovery Chain:**
|
||||||
|
```
|
||||||
|
store_discovery → product_discovery → payload_fetch → product_refresh
|
||||||
|
(internal call) (queues next)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Scheduled Flow (Existing Stores)
|
||||||
|
|
||||||
|
For existing stores, `payload_fetch_all` schedule runs every 4 hours:
|
||||||
|
|
||||||
|
```
|
||||||
|
TaskScheduler (every 60s)
|
||||||
|
└─► Checks task_schedules for due schedules
|
||||||
|
└─► payload_fetch_all is due
|
||||||
|
└─► Generates payload_fetch task for each dispensary
|
||||||
|
|
||||||
|
payload_fetch
|
||||||
|
└─► Hits Dutchie GraphQL API
|
||||||
|
└─► Saves raw JSON to /storage/payloads/
|
||||||
|
└─► Queues product_refresh task with payload_id
|
||||||
|
|
||||||
|
product_refresh
|
||||||
|
└─► Loads payload from filesystem (NOT API)
|
||||||
|
└─► Normalizes via DutchieNormalizer
|
||||||
|
└─► Upserts to store_products
|
||||||
|
└─► Creates snapshots
|
||||||
|
```
|
||||||
|
|
||||||
|
**Complete Scheduled Chain:**
|
||||||
|
```
|
||||||
|
payload_fetch → product_refresh
|
||||||
|
(queues) (reads local)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Chaining Implementation
|
||||||
|
|
||||||
|
Task chaining is handled in two places:
|
||||||
|
|
||||||
|
1. **Internal chaining (handler calls handler):**
|
||||||
|
- `product_discovery` calls `handlePayloadFetch()` directly
|
||||||
|
|
||||||
|
2. **External chaining (chainNextTask() in task-service.ts):**
|
||||||
|
- Called after task completion
|
||||||
|
- `store_discovery` → queues `product_discovery` for each newStoreId
|
||||||
|
|
||||||
|
3. **Queue-based chaining (taskService.createTask):**
|
||||||
|
- `payload_fetch` queues `product_refresh` with `payload: { payload_id }`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Payload API Endpoints
|
||||||
|
|
||||||
|
Raw crawl payloads can be accessed via the Payloads API:
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `GET /api/payloads` | GET | List payload metadata (paginated) |
|
||||||
|
| `GET /api/payloads/:id` | GET | Get payload metadata by ID |
|
||||||
|
| `GET /api/payloads/:id/data` | GET | Get full payload JSON (decompressed) |
|
||||||
|
| `GET /api/payloads/store/:dispensaryId` | GET | List payloads for a store |
|
||||||
|
| `GET /api/payloads/store/:dispensaryId/latest` | GET | Get latest payload for a store |
|
||||||
|
| `GET /api/payloads/store/:dispensaryId/diff` | GET | Diff two payloads for changes |
|
||||||
|
|
||||||
|
### Payload Diff Response
|
||||||
|
|
||||||
|
The diff endpoint returns:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"from": { "id": 123, "fetchedAt": "...", "productCount": 100 },
|
||||||
|
"to": { "id": 456, "fetchedAt": "...", "productCount": 105 },
|
||||||
|
"diff": {
|
||||||
|
"added": 10,
|
||||||
|
"removed": 5,
|
||||||
|
"priceChanges": 8,
|
||||||
|
"stockChanges": 12
|
||||||
|
},
|
||||||
|
"details": {
|
||||||
|
"added": [...],
|
||||||
|
"removed": [...],
|
||||||
|
"priceChanges": [...],
|
||||||
|
"stockChanges": [...]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Schedules (NEW)
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `GET /api/schedules` | GET | List all schedules |
|
||||||
|
| `PUT /api/schedules/:id` | PUT | Update schedule |
|
||||||
|
| `POST /api/schedules/:id/trigger` | POST | Run schedule immediately |
|
||||||
|
|
||||||
|
### Task Creation (rewired 2024-12-10)
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `POST /api/job-queue/enqueue` | POST | Create single task |
|
||||||
|
| `POST /api/job-queue/enqueue-batch` | POST | Create batch tasks |
|
||||||
|
| `POST /api/job-queue/enqueue-state` | POST | Create tasks for state |
|
||||||
|
| `POST /api/tasks` | POST | Direct task creation |
|
||||||
|
|
||||||
|
### Task Management
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `GET /api/tasks` | GET | List tasks |
|
||||||
|
| `GET /api/tasks/:id` | GET | Get single task |
|
||||||
|
| `GET /api/tasks/counts` | GET | Task counts by status |
|
||||||
|
| `POST /api/tasks/recover-stale` | POST | Recover stale tasks |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Files
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/services/task-scheduler.ts` | **NEW** - DB-driven scheduler |
|
||||||
|
| `src/tasks/task-worker.ts` | Worker that processes tasks |
|
||||||
|
| `src/tasks/task-service.ts` | Task CRUD operations |
|
||||||
|
| `src/tasks/handlers/payload-fetch.ts` | **NEW** - Fetches from API, saves to disk |
|
||||||
|
| `src/tasks/handlers/product-refresh.ts` | **CHANGED** - Reads from disk, processes to DB |
|
||||||
|
| `src/utils/payload-storage.ts` | **NEW** - Payload save/load utilities |
|
||||||
|
| `src/routes/tasks.ts` | Task API endpoints |
|
||||||
|
| `src/routes/job-queue.ts` | Job Queue UI endpoints (rewired) |
|
||||||
|
| `migrations/079_task_schedules.sql` | Schedule table |
|
||||||
|
| `migrations/080_raw_crawl_payloads.sql` | Payload metadata table |
|
||||||
|
| `migrations/081_payload_fetch_columns.sql` | payload, last_fetch_at columns |
|
||||||
|
| `migrations/074_worker_task_queue.sql` | Task queue table |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Legacy Code (DEPRECATED)
|
||||||
|
|
||||||
|
| File | Status | Replacement |
|
||||||
|
|------|--------|-------------|
|
||||||
|
| `src/services/scheduler.ts` | DEPRECATED | `task-scheduler.ts` |
|
||||||
|
| `dispensary_crawl_jobs` table | ORPHANED | `worker_tasks` |
|
||||||
|
| `job_schedules` table | LEGACY | `task_schedules` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Dashboard Integration
|
||||||
|
|
||||||
|
Both pages remain wired to the dashboard:
|
||||||
|
|
||||||
|
| Page | Data Source | Actions |
|
||||||
|
|------|-------------|---------|
|
||||||
|
| **Job Queue** | `worker_tasks`, `task_schedules` | Create tasks, view schedules |
|
||||||
|
| **Task Queue** | `worker_tasks` | View tasks, recover stale |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Multi-Replica Safety
|
||||||
|
|
||||||
|
The scheduler uses `SELECT FOR UPDATE SKIP LOCKED` to ensure:
|
||||||
|
|
||||||
|
1. **Only one replica** executes a schedule at a time
|
||||||
|
2. **No duplicate tasks** created
|
||||||
|
3. **Survives pod restarts** - state in DB, not memory
|
||||||
|
4. **Self-healing** - recovers stale tasks on startup
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- This query is atomic across all API server replicas
|
||||||
|
SELECT * FROM task_schedules
|
||||||
|
WHERE enabled = true AND next_run_at <= NOW()
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Worker Scaling (K8s)
|
||||||
|
|
||||||
|
Workers run as a StatefulSet in Kubernetes. You can scale from the admin UI or CLI.
|
||||||
|
|
||||||
|
### From Admin UI
|
||||||
|
|
||||||
|
The Workers page (`/admin/workers`) provides:
|
||||||
|
- Current replica count display
|
||||||
|
- Scale up/down buttons
|
||||||
|
- Target replica input
|
||||||
|
|
||||||
|
### API Endpoints
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `GET /api/workers/k8s/replicas` | GET | Get current/desired replica counts |
|
||||||
|
| `POST /api/workers/k8s/scale` | POST | Scale to N replicas (body: `{ replicas: N }`) |
|
||||||
|
|
||||||
|
### From CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# View current replicas
|
||||||
|
kubectl get statefulset scraper-worker -n dispensary-scraper
|
||||||
|
|
||||||
|
# Scale to 10 workers
|
||||||
|
kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=10
|
||||||
|
|
||||||
|
# Scale down to 3 workers
|
||||||
|
kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
Environment variables for the API server:
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `K8S_NAMESPACE` | `dispensary-scraper` | Kubernetes namespace |
|
||||||
|
| `K8S_WORKER_STATEFULSET` | `scraper-worker` | StatefulSet name |
|
||||||
|
|
||||||
|
### RBAC Requirements
|
||||||
|
|
||||||
|
The API server pod needs these K8s permissions:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: worker-scaler
|
||||||
|
namespace: dispensary-scraper
|
||||||
|
rules:
|
||||||
|
- apiGroups: ["apps"]
|
||||||
|
resources: ["statefulsets"]
|
||||||
|
verbs: ["get", "patch"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: scraper-worker-scaler
|
||||||
|
namespace: dispensary-scraper
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: default
|
||||||
|
namespace: dispensary-scraper
|
||||||
|
roleRef:
|
||||||
|
kind: Role
|
||||||
|
name: worker-scaler
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
```
|
||||||
542
backend/docs/_archive/WORKER_TASK_ARCHITECTURE.md
Normal file
542
backend/docs/_archive/WORKER_TASK_ARCHITECTURE.md
Normal file
@@ -0,0 +1,542 @@
|
|||||||
|
# Worker Task Architecture
|
||||||
|
|
||||||
|
This document describes the unified task-based worker system that replaces the legacy fragmented job systems.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The task worker architecture provides a single, unified system for managing all background work in CannaiQ:
|
||||||
|
|
||||||
|
- **Store discovery** - Find new dispensaries on platforms
|
||||||
|
- **Entry point discovery** - Resolve platform IDs from menu URLs
|
||||||
|
- **Product discovery** - Initial product fetch for new stores
|
||||||
|
- **Product resync** - Regular price/stock updates for existing stores
|
||||||
|
- **Analytics refresh** - Refresh materialized views and analytics
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Database Tables
|
||||||
|
|
||||||
|
**`worker_tasks`** - Central task queue
|
||||||
|
```sql
|
||||||
|
CREATE TABLE worker_tasks (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
role task_role NOT NULL, -- What type of work
|
||||||
|
dispensary_id INTEGER, -- Which store (if applicable)
|
||||||
|
platform VARCHAR(50), -- Which platform (dutchie, etc.)
|
||||||
|
status task_status DEFAULT 'pending',
|
||||||
|
priority INTEGER DEFAULT 0, -- Higher = process first
|
||||||
|
scheduled_for TIMESTAMP, -- Don't process before this time
|
||||||
|
worker_id VARCHAR(100), -- Which worker claimed it
|
||||||
|
claimed_at TIMESTAMP,
|
||||||
|
started_at TIMESTAMP,
|
||||||
|
completed_at TIMESTAMP,
|
||||||
|
last_heartbeat_at TIMESTAMP, -- For stale detection
|
||||||
|
result JSONB, -- Output from handler
|
||||||
|
error_message TEXT,
|
||||||
|
retry_count INTEGER DEFAULT 0,
|
||||||
|
max_retries INTEGER DEFAULT 3,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key indexes:**
|
||||||
|
- `idx_worker_tasks_pending_priority` - For efficient task claiming
|
||||||
|
- `idx_worker_tasks_active_dispensary` - Prevents concurrent tasks per store (partial unique index)
|
||||||
|
|
||||||
|
### Task Roles
|
||||||
|
|
||||||
|
| Role | Purpose | Per-Store | Scheduled |
|
||||||
|
|------|---------|-----------|-----------|
|
||||||
|
| `store_discovery` | Find new stores on a platform | No | Daily |
|
||||||
|
| `entry_point_discovery` | Resolve platform IDs | Yes | On-demand |
|
||||||
|
| `product_discovery` | Initial product fetch | Yes | After entry_point |
|
||||||
|
| `product_resync` | Price/stock updates | Yes | Every 4 hours |
|
||||||
|
| `analytics_refresh` | Refresh MVs | No | Daily |
|
||||||
|
|
||||||
|
### Task Lifecycle
|
||||||
|
|
||||||
|
```
|
||||||
|
pending → claimed → running → completed
|
||||||
|
↓
|
||||||
|
failed
|
||||||
|
```
|
||||||
|
|
||||||
|
1. **pending** - Task is waiting to be picked up
|
||||||
|
2. **claimed** - Worker has claimed it (atomic via SELECT FOR UPDATE SKIP LOCKED)
|
||||||
|
3. **running** - Worker is actively processing
|
||||||
|
4. **completed** - Task finished successfully
|
||||||
|
5. **failed** - Task encountered an error
|
||||||
|
6. **stale** - Task lost its worker (recovered automatically)
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
### Core Files
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/tasks/task-service.ts` | TaskService - CRUD, claiming, capacity metrics |
|
||||||
|
| `src/tasks/task-worker.ts` | TaskWorker - Main worker loop |
|
||||||
|
| `src/tasks/index.ts` | Module exports |
|
||||||
|
| `src/routes/tasks.ts` | API endpoints |
|
||||||
|
| `migrations/074_worker_task_queue.sql` | Database schema |
|
||||||
|
|
||||||
|
### Task Handlers
|
||||||
|
|
||||||
|
| File | Role |
|
||||||
|
|------|------|
|
||||||
|
| `src/tasks/handlers/store-discovery.ts` | `store_discovery` |
|
||||||
|
| `src/tasks/handlers/entry-point-discovery.ts` | `entry_point_discovery` |
|
||||||
|
| `src/tasks/handlers/product-discovery.ts` | `product_discovery` |
|
||||||
|
| `src/tasks/handlers/product-resync.ts` | `product_resync` |
|
||||||
|
| `src/tasks/handlers/analytics-refresh.ts` | `analytics_refresh` |
|
||||||
|
|
||||||
|
## Running Workers
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `WORKER_ROLE` | (required) | Which task role to process |
|
||||||
|
| `WORKER_ID` | auto-generated | Custom worker identifier |
|
||||||
|
| `POLL_INTERVAL_MS` | 5000 | How often to check for tasks |
|
||||||
|
| `HEARTBEAT_INTERVAL_MS` | 30000 | How often to update heartbeat |
|
||||||
|
|
||||||
|
### Starting a Worker
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start a product resync worker
|
||||||
|
WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts
|
||||||
|
|
||||||
|
# Start with custom ID
|
||||||
|
WORKER_ROLE=product_resync WORKER_ID=resync-1 npx tsx src/tasks/task-worker.ts
|
||||||
|
|
||||||
|
# Start multiple workers for different roles
|
||||||
|
WORKER_ROLE=store_discovery npx tsx src/tasks/task-worker.ts &
|
||||||
|
WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts &
|
||||||
|
```
|
||||||
|
|
||||||
|
### Kubernetes Deployment
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: task-worker-resync
|
||||||
|
spec:
|
||||||
|
replicas: 3
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: worker
|
||||||
|
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
|
||||||
|
command: ["npx", "tsx", "src/tasks/task-worker.ts"]
|
||||||
|
env:
|
||||||
|
- name: WORKER_ROLE
|
||||||
|
value: "product_resync"
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Task Management
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks` | GET | List tasks with filters |
|
||||||
|
| `/api/tasks` | POST | Create a new task |
|
||||||
|
| `/api/tasks/:id` | GET | Get task by ID |
|
||||||
|
| `/api/tasks/counts` | GET | Get counts by status |
|
||||||
|
| `/api/tasks/capacity` | GET | Get capacity metrics |
|
||||||
|
| `/api/tasks/capacity/:role` | GET | Get role-specific capacity |
|
||||||
|
| `/api/tasks/recover-stale` | POST | Recover tasks from dead workers |
|
||||||
|
|
||||||
|
### Task Generation
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks/generate/resync` | POST | Generate daily resync tasks |
|
||||||
|
| `/api/tasks/generate/discovery` | POST | Create store discovery task |
|
||||||
|
|
||||||
|
### Migration (from legacy systems)
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks/migration/status` | GET | Compare old vs new systems |
|
||||||
|
| `/api/tasks/migration/disable-old-schedules` | POST | Disable job_schedules |
|
||||||
|
| `/api/tasks/migration/cancel-pending-crawl-jobs` | POST | Cancel old crawl jobs |
|
||||||
|
| `/api/tasks/migration/create-resync-tasks` | POST | Create tasks for all stores |
|
||||||
|
| `/api/tasks/migration/full-migrate` | POST | One-click migration |
|
||||||
|
|
||||||
|
### Role-Specific Endpoints
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks/role/:role/last-completion` | GET | Last completion time |
|
||||||
|
| `/api/tasks/role/:role/recent` | GET | Recent completions |
|
||||||
|
| `/api/tasks/store/:id/active` | GET | Check if store has active task |
|
||||||
|
|
||||||
|
## Capacity Planning
|
||||||
|
|
||||||
|
The `v_worker_capacity` view provides real-time metrics:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM v_worker_capacity;
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- `pending_tasks` - Tasks waiting to be claimed
|
||||||
|
- `ready_tasks` - Tasks ready now (scheduled_for is null or past)
|
||||||
|
- `claimed_tasks` - Tasks claimed but not started
|
||||||
|
- `running_tasks` - Tasks actively processing
|
||||||
|
- `completed_last_hour` - Recent completions
|
||||||
|
- `failed_last_hour` - Recent failures
|
||||||
|
- `active_workers` - Workers with recent heartbeats
|
||||||
|
- `avg_duration_sec` - Average task duration
|
||||||
|
- `tasks_per_worker_hour` - Throughput estimate
|
||||||
|
- `estimated_hours_to_drain` - Time to clear queue
|
||||||
|
|
||||||
|
### Scaling Recommendations
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// API: GET /api/tasks/capacity/:role
|
||||||
|
{
|
||||||
|
"role": "product_resync",
|
||||||
|
"pending_tasks": 500,
|
||||||
|
"active_workers": 3,
|
||||||
|
"workers_needed": {
|
||||||
|
"for_1_hour": 10,
|
||||||
|
"for_4_hours": 3,
|
||||||
|
"for_8_hours": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Task Chaining
|
||||||
|
|
||||||
|
Tasks can automatically create follow-up tasks:
|
||||||
|
|
||||||
|
```
|
||||||
|
store_discovery → entry_point_discovery → product_discovery
|
||||||
|
↓
|
||||||
|
(store has platform_dispensary_id)
|
||||||
|
↓
|
||||||
|
Daily resync tasks
|
||||||
|
```
|
||||||
|
|
||||||
|
The `chainNextTask()` method handles this automatically.
|
||||||
|
|
||||||
|
## Stale Task Recovery
|
||||||
|
|
||||||
|
Tasks are considered stale if `last_heartbeat_at` is older than the threshold (default 10 minutes).
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT recover_stale_tasks(10); -- 10 minute threshold
|
||||||
|
```
|
||||||
|
|
||||||
|
Or via API:
|
||||||
|
```bash
|
||||||
|
curl -X POST /api/tasks/recover-stale \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"threshold_minutes": 10}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Migration from Legacy Systems
|
||||||
|
|
||||||
|
### Legacy Systems Replaced
|
||||||
|
|
||||||
|
1. **job_schedules + job_run_logs** - Scheduled job definitions
|
||||||
|
2. **dispensary_crawl_jobs** - Per-dispensary crawl queue
|
||||||
|
3. **SyncOrchestrator + HydrationWorker** - Raw payload processing
|
||||||
|
|
||||||
|
### Migration Steps
|
||||||
|
|
||||||
|
**Option 1: One-Click Migration**
|
||||||
|
```bash
|
||||||
|
curl -X POST /api/tasks/migration/full-migrate
|
||||||
|
```
|
||||||
|
|
||||||
|
This will:
|
||||||
|
1. Disable all job_schedules
|
||||||
|
2. Cancel pending dispensary_crawl_jobs
|
||||||
|
3. Generate resync tasks for all stores
|
||||||
|
4. Create discovery and analytics tasks
|
||||||
|
|
||||||
|
**Option 2: Manual Migration**
|
||||||
|
```bash
|
||||||
|
# 1. Check current status
|
||||||
|
curl /api/tasks/migration/status
|
||||||
|
|
||||||
|
# 2. Disable old schedules
|
||||||
|
curl -X POST /api/tasks/migration/disable-old-schedules
|
||||||
|
|
||||||
|
# 3. Cancel pending crawl jobs
|
||||||
|
curl -X POST /api/tasks/migration/cancel-pending-crawl-jobs
|
||||||
|
|
||||||
|
# 4. Create resync tasks
|
||||||
|
curl -X POST /api/tasks/migration/create-resync-tasks \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"state_code": "AZ"}'
|
||||||
|
|
||||||
|
# 5. Generate daily resync schedule
|
||||||
|
curl -X POST /api/tasks/generate/resync \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"batches_per_day": 6}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Per-Store Locking
|
||||||
|
|
||||||
|
The system prevents concurrent tasks for the same store using a partial unique index:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE UNIQUE INDEX idx_worker_tasks_active_dispensary
|
||||||
|
ON worker_tasks (dispensary_id)
|
||||||
|
WHERE dispensary_id IS NOT NULL
|
||||||
|
AND status IN ('claimed', 'running');
|
||||||
|
```
|
||||||
|
|
||||||
|
This ensures only one task can be active per store at any time.
|
||||||
|
|
||||||
|
## Task Priority
|
||||||
|
|
||||||
|
Tasks are claimed in priority order (higher first), then by creation time:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ORDER BY priority DESC, created_at ASC
|
||||||
|
```
|
||||||
|
|
||||||
|
Default priorities:
|
||||||
|
- `store_discovery`: 0
|
||||||
|
- `entry_point_discovery`: 10 (high - new stores)
|
||||||
|
- `product_discovery`: 10 (high - new stores)
|
||||||
|
- `product_resync`: 0
|
||||||
|
- `analytics_refresh`: 0
|
||||||
|
|
||||||
|
## Scheduled Tasks
|
||||||
|
|
||||||
|
Tasks can be scheduled for future execution:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_resync',
|
||||||
|
dispensary_id: 123,
|
||||||
|
scheduled_for: new Date('2025-01-10T06:00:00Z'),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
The `generate_resync_tasks()` function creates staggered tasks throughout the day:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT generate_resync_tasks(6, '2025-01-10'); -- 6 batches = every 4 hours
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dashboard Integration
|
||||||
|
|
||||||
|
The admin dashboard shows task queue status in the main overview:
|
||||||
|
|
||||||
|
```
|
||||||
|
Task Queue Summary
|
||||||
|
------------------
|
||||||
|
Pending: 45
|
||||||
|
Running: 3
|
||||||
|
Completed: 1,234
|
||||||
|
Failed: 12
|
||||||
|
```
|
||||||
|
|
||||||
|
Full task management is available at `/admin/tasks`.
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
Failed tasks include the error message in `error_message` and can be retried:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- View failed tasks
|
||||||
|
SELECT id, role, dispensary_id, error_message, retry_count
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE status = 'failed'
|
||||||
|
ORDER BY completed_at DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
-- Retry failed tasks
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET status = 'pending', retry_count = retry_count + 1
|
||||||
|
WHERE status = 'failed' AND retry_count < max_retries;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Concurrent Task Processing (Added 2024-12)
|
||||||
|
|
||||||
|
Workers can now process multiple tasks concurrently within a single worker instance. This improves throughput by utilizing async I/O efficiently.
|
||||||
|
|
||||||
|
### Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Pod (K8s) │
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ TaskWorker │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
|
||||||
|
│ │ │ Task 1 │ │ Task 2 │ │ Task 3 │ (concurrent)│ │
|
||||||
|
│ │ └─────────┘ └─────────┘ └─────────┘ │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ Resource Monitor │ │
|
||||||
|
│ │ ├── Memory: 65% (threshold: 85%) │ │
|
||||||
|
│ │ ├── CPU: 45% (threshold: 90%) │ │
|
||||||
|
│ │ └── Status: Normal │ │
|
||||||
|
│ └─────────────────────────────────────────────────────┘ │
|
||||||
|
└─────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `MAX_CONCURRENT_TASKS` | 3 | Maximum tasks a worker will run concurrently |
|
||||||
|
| `MEMORY_BACKOFF_THRESHOLD` | 0.85 | Back off when heap memory exceeds 85% |
|
||||||
|
| `CPU_BACKOFF_THRESHOLD` | 0.90 | Back off when CPU exceeds 90% |
|
||||||
|
| `BACKOFF_DURATION_MS` | 10000 | How long to wait when backing off (10s) |
|
||||||
|
|
||||||
|
### How It Works
|
||||||
|
|
||||||
|
1. **Main Loop**: Worker continuously tries to fill up to `MAX_CONCURRENT_TASKS`
|
||||||
|
2. **Resource Monitoring**: Before claiming a new task, worker checks memory and CPU
|
||||||
|
3. **Backoff**: If resources exceed thresholds, worker pauses and stops claiming new tasks
|
||||||
|
4. **Concurrent Execution**: Tasks run in parallel using `Promise` - they don't block each other
|
||||||
|
5. **Graceful Shutdown**: On SIGTERM/decommission, worker stops claiming but waits for active tasks
|
||||||
|
|
||||||
|
### Resource Monitoring
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ResourceStats interface
|
||||||
|
interface ResourceStats {
|
||||||
|
memoryPercent: number; // Current heap usage as decimal (0.0-1.0)
|
||||||
|
memoryMb: number; // Current heap used in MB
|
||||||
|
memoryTotalMb: number; // Total heap available in MB
|
||||||
|
cpuPercent: number; // CPU usage as percentage (0-100)
|
||||||
|
isBackingOff: boolean; // True if worker is in backoff state
|
||||||
|
backoffReason: string; // Why the worker is backing off
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Heartbeat Data
|
||||||
|
|
||||||
|
Workers report the following in their heartbeat:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"worker_id": "worker-abc123",
|
||||||
|
"current_task_id": 456,
|
||||||
|
"current_task_ids": [456, 457, 458],
|
||||||
|
"active_task_count": 3,
|
||||||
|
"max_concurrent_tasks": 3,
|
||||||
|
"status": "active",
|
||||||
|
"resources": {
|
||||||
|
"memory_mb": 256,
|
||||||
|
"memory_total_mb": 512,
|
||||||
|
"memory_rss_mb": 320,
|
||||||
|
"memory_percent": 50,
|
||||||
|
"cpu_user_ms": 12500,
|
||||||
|
"cpu_system_ms": 3200,
|
||||||
|
"cpu_percent": 45,
|
||||||
|
"is_backing_off": false,
|
||||||
|
"backoff_reason": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Backoff Behavior
|
||||||
|
|
||||||
|
When resources exceed thresholds:
|
||||||
|
|
||||||
|
1. Worker logs the backoff reason:
|
||||||
|
```
|
||||||
|
[TaskWorker] MyWorker backing off: Memory at 87.3% (threshold: 85%)
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Worker stops claiming new tasks but continues existing tasks
|
||||||
|
|
||||||
|
3. After `BACKOFF_DURATION_MS`, worker rechecks resources
|
||||||
|
|
||||||
|
4. When resources return to normal:
|
||||||
|
```
|
||||||
|
[TaskWorker] MyWorker resuming normal operation
|
||||||
|
```
|
||||||
|
|
||||||
|
### UI Display
|
||||||
|
|
||||||
|
The Workers Dashboard shows:
|
||||||
|
|
||||||
|
- **Tasks Column**: `2/3 tasks` (active/max concurrent)
|
||||||
|
- **Resources Column**: Memory % and CPU % with color coding
|
||||||
|
- Green: < 50%
|
||||||
|
- Yellow: 50-74%
|
||||||
|
- Amber: 75-89%
|
||||||
|
- Red: 90%+
|
||||||
|
- **Backing Off**: Orange warning badge when worker is in backoff state
|
||||||
|
|
||||||
|
### Task Count Badge Details
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────┐
|
||||||
|
│ Worker: "MyWorker" │
|
||||||
|
│ Tasks: 2/3 tasks #456, #457 │
|
||||||
|
│ Resources: 🧠 65% 💻 45% │
|
||||||
|
│ Status: ● Active │
|
||||||
|
└─────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Best Practices
|
||||||
|
|
||||||
|
1. **Start Conservative**: Use `MAX_CONCURRENT_TASKS=3` initially
|
||||||
|
2. **Monitor Resources**: Watch for frequent backoffs in logs
|
||||||
|
3. **Tune Per Workload**: I/O-bound tasks benefit from higher concurrency
|
||||||
|
4. **Scale Horizontally**: Add more pods rather than cranking concurrency too high
|
||||||
|
|
||||||
|
### Code References
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/tasks/task-worker.ts:68-71` | Concurrency environment variables |
|
||||||
|
| `src/tasks/task-worker.ts:104-111` | ResourceStats interface |
|
||||||
|
| `src/tasks/task-worker.ts:149-179` | getResourceStats() method |
|
||||||
|
| `src/tasks/task-worker.ts:184-196` | shouldBackOff() method |
|
||||||
|
| `src/tasks/task-worker.ts:462-516` | mainLoop() with concurrent claiming |
|
||||||
|
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
|
||||||
|
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
### Logs
|
||||||
|
|
||||||
|
Workers log to stdout:
|
||||||
|
```
|
||||||
|
[TaskWorker] Starting worker worker-product_resync-a1b2c3d4 for role: product_resync
|
||||||
|
[TaskWorker] Claimed task 123 (product_resync) for dispensary 456
|
||||||
|
[TaskWorker] Task 123 completed successfully
|
||||||
|
```
|
||||||
|
|
||||||
|
### Health Check
|
||||||
|
|
||||||
|
Check if workers are active:
|
||||||
|
```sql
|
||||||
|
SELECT worker_id, role, COUNT(*), MAX(last_heartbeat_at)
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE last_heartbeat_at > NOW() - INTERVAL '5 minutes'
|
||||||
|
GROUP BY worker_id, role;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Metrics
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Tasks by status
|
||||||
|
SELECT status, COUNT(*) FROM worker_tasks GROUP BY status;
|
||||||
|
|
||||||
|
-- Tasks by role
|
||||||
|
SELECT role, status, COUNT(*) FROM worker_tasks GROUP BY role, status;
|
||||||
|
|
||||||
|
-- Average duration by role
|
||||||
|
SELECT role, AVG(EXTRACT(EPOCH FROM (completed_at - started_at))) as avg_seconds
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours'
|
||||||
|
GROUP BY role;
|
||||||
|
```
|
||||||
69
backend/k8s/cronjob-ip2location.yaml
Normal file
69
backend/k8s/cronjob-ip2location.yaml
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
apiVersion: batch/v1
|
||||||
|
kind: CronJob
|
||||||
|
metadata:
|
||||||
|
name: ip2location-update
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
# Run on the 1st of every month at 3am UTC
|
||||||
|
schedule: "0 3 1 * *"
|
||||||
|
concurrencyPolicy: Forbid
|
||||||
|
successfulJobsHistoryLimit: 3
|
||||||
|
failedJobsHistoryLimit: 3
|
||||||
|
jobTemplate:
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: ip2location-updater
|
||||||
|
image: curlimages/curl:latest
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
set -e
|
||||||
|
echo "Downloading IP2Location LITE DB5..."
|
||||||
|
|
||||||
|
# Download to temp
|
||||||
|
cd /tmp
|
||||||
|
curl -L -o ip2location.zip "https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB5LITEBIN"
|
||||||
|
|
||||||
|
# Extract
|
||||||
|
unzip -o ip2location.zip
|
||||||
|
|
||||||
|
# Find and copy the BIN file
|
||||||
|
BIN_FILE=$(ls *.BIN 2>/dev/null | head -1)
|
||||||
|
if [ -z "$BIN_FILE" ]; then
|
||||||
|
echo "ERROR: No BIN file found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Copy to shared volume
|
||||||
|
cp "$BIN_FILE" /data/IP2LOCATION-LITE-DB5.BIN
|
||||||
|
|
||||||
|
echo "Done! Database updated: /data/IP2LOCATION-LITE-DB5.BIN"
|
||||||
|
env:
|
||||||
|
- name: IP2LOCATION_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: dutchie-backend-secret
|
||||||
|
key: IP2LOCATION_TOKEN
|
||||||
|
volumeMounts:
|
||||||
|
- name: ip2location-data
|
||||||
|
mountPath: /data
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
volumes:
|
||||||
|
- name: ip2location-data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ip2location-pvc
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: ip2location-pvc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 100Mi
|
||||||
@@ -26,6 +26,12 @@ spec:
|
|||||||
name: dutchie-backend-config
|
name: dutchie-backend-config
|
||||||
- secretRef:
|
- secretRef:
|
||||||
name: dutchie-backend-secret
|
name: dutchie-backend-secret
|
||||||
|
env:
|
||||||
|
- name: IP2LOCATION_DB_PATH
|
||||||
|
value: /data/ip2location/IP2LOCATION-LITE-DB5.BIN
|
||||||
|
volumeMounts:
|
||||||
|
- name: ip2location-data
|
||||||
|
mountPath: /data/ip2location
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: "256Mi"
|
memory: "256Mi"
|
||||||
@@ -45,3 +51,7 @@ spec:
|
|||||||
port: 3010
|
port: 3010
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
|
volumes:
|
||||||
|
- name: ip2location-data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ip2location-pvc
|
||||||
|
|||||||
77
backend/k8s/scraper-worker-statefulset.yaml
Normal file
77
backend/k8s/scraper-worker-statefulset.yaml
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: scraper-worker
|
||||||
|
namespace: dispensary-scraper
|
||||||
|
labels:
|
||||||
|
app: scraper-worker
|
||||||
|
spec:
|
||||||
|
clusterIP: None # Headless service required for StatefulSet
|
||||||
|
selector:
|
||||||
|
app: scraper-worker
|
||||||
|
ports:
|
||||||
|
- port: 3010
|
||||||
|
name: http
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: scraper-worker
|
||||||
|
namespace: dispensary-scraper
|
||||||
|
spec:
|
||||||
|
serviceName: scraper-worker
|
||||||
|
replicas: 8
|
||||||
|
podManagementPolicy: Parallel # Start all pods at once
|
||||||
|
updateStrategy:
|
||||||
|
type: OnDelete # Pods only update when manually deleted - no automatic restarts
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: scraper-worker
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: scraper-worker
|
||||||
|
spec:
|
||||||
|
terminationGracePeriodSeconds: 60
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: regcred
|
||||||
|
containers:
|
||||||
|
- name: worker
|
||||||
|
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
|
||||||
|
imagePullPolicy: Always
|
||||||
|
command: ["node"]
|
||||||
|
args: ["dist/tasks/task-worker.js"]
|
||||||
|
env:
|
||||||
|
- name: WORKER_MODE
|
||||||
|
value: "true"
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: MAX_CONCURRENT_TASKS
|
||||||
|
value: "50"
|
||||||
|
- name: API_BASE_URL
|
||||||
|
value: http://scraper
|
||||||
|
- name: NODE_OPTIONS
|
||||||
|
value: --max-old-space-size=1500
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: scraper-config
|
||||||
|
- secretRef:
|
||||||
|
name: scraper-secrets
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 1Gi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 2Gi
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
- pgrep -f 'task-worker' > /dev/null
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 30
|
||||||
|
failureThreshold: 3
|
||||||
119
backend/migrations/051_worker_definitions.sql
Normal file
119
backend/migrations/051_worker_definitions.sql
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
-- Migration 051: Worker Definitions
|
||||||
|
-- Creates a dedicated workers table for named workers with roles and assignments
|
||||||
|
|
||||||
|
-- Workers table - defines named workers with roles
|
||||||
|
CREATE TABLE IF NOT EXISTS workers (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(100) NOT NULL UNIQUE,
|
||||||
|
role VARCHAR(100) NOT NULL,
|
||||||
|
description TEXT,
|
||||||
|
enabled BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Schedule configuration (for dedicated crawl workers)
|
||||||
|
schedule_type VARCHAR(50) DEFAULT 'interval', -- 'interval', 'cron', 'manual'
|
||||||
|
interval_minutes INTEGER DEFAULT 240,
|
||||||
|
cron_expression VARCHAR(100), -- e.g., '0 */4 * * *'
|
||||||
|
jitter_minutes INTEGER DEFAULT 30,
|
||||||
|
|
||||||
|
-- Assignment scope
|
||||||
|
assignment_type VARCHAR(50) DEFAULT 'all', -- 'all', 'state', 'dispensary', 'chain'
|
||||||
|
assigned_state_codes TEXT[], -- e.g., ['AZ', 'CA']
|
||||||
|
assigned_dispensary_ids INTEGER[],
|
||||||
|
assigned_chain_ids INTEGER[],
|
||||||
|
|
||||||
|
-- Job configuration
|
||||||
|
job_type VARCHAR(50) NOT NULL DEFAULT 'dutchie_product_crawl',
|
||||||
|
job_config JSONB DEFAULT '{}',
|
||||||
|
priority INTEGER DEFAULT 0,
|
||||||
|
max_concurrent INTEGER DEFAULT 1,
|
||||||
|
|
||||||
|
-- Status tracking
|
||||||
|
status VARCHAR(50) DEFAULT 'idle', -- 'idle', 'running', 'paused', 'error'
|
||||||
|
last_run_at TIMESTAMPTZ,
|
||||||
|
last_status VARCHAR(50),
|
||||||
|
last_error TEXT,
|
||||||
|
last_duration_ms INTEGER,
|
||||||
|
next_run_at TIMESTAMPTZ,
|
||||||
|
current_job_id INTEGER,
|
||||||
|
|
||||||
|
-- Metrics
|
||||||
|
total_runs INTEGER DEFAULT 0,
|
||||||
|
successful_runs INTEGER DEFAULT 0,
|
||||||
|
failed_runs INTEGER DEFAULT 0,
|
||||||
|
avg_duration_ms INTEGER,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Worker run history
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_runs (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id INTEGER NOT NULL REFERENCES workers(id) ON DELETE CASCADE,
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
completed_at TIMESTAMPTZ,
|
||||||
|
status VARCHAR(50) DEFAULT 'running', -- 'running', 'success', 'error', 'cancelled'
|
||||||
|
duration_ms INTEGER,
|
||||||
|
|
||||||
|
-- What was processed
|
||||||
|
jobs_created INTEGER DEFAULT 0,
|
||||||
|
jobs_completed INTEGER DEFAULT 0,
|
||||||
|
jobs_failed INTEGER DEFAULT 0,
|
||||||
|
dispensaries_crawled INTEGER DEFAULT 0,
|
||||||
|
products_found INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
error_message TEXT,
|
||||||
|
metadata JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for efficient lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_workers_enabled ON workers(enabled) WHERE enabled = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_workers_next_run ON workers(next_run_at) WHERE enabled = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_workers_status ON workers(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_runs_worker_id ON worker_runs(worker_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_runs_started_at ON worker_runs(started_at DESC);
|
||||||
|
|
||||||
|
-- Add worker_id to dispensary_crawl_jobs if not exists
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'assigned_worker_id'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE dispensary_crawl_jobs ADD COLUMN assigned_worker_id INTEGER REFERENCES workers(id);
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- Migrate existing job_schedules workers to new workers table
|
||||||
|
INSERT INTO workers (name, role, description, enabled, interval_minutes, jitter_minutes, job_type, job_config, last_run_at, last_status, last_error, last_duration_ms, next_run_at)
|
||||||
|
SELECT
|
||||||
|
worker_name,
|
||||||
|
worker_role,
|
||||||
|
description,
|
||||||
|
enabled,
|
||||||
|
base_interval_minutes,
|
||||||
|
jitter_minutes,
|
||||||
|
job_name,
|
||||||
|
job_config,
|
||||||
|
last_run_at,
|
||||||
|
last_status,
|
||||||
|
last_error_message,
|
||||||
|
last_duration_ms,
|
||||||
|
next_run_at
|
||||||
|
FROM job_schedules
|
||||||
|
WHERE worker_name IS NOT NULL
|
||||||
|
ON CONFLICT (name) DO UPDATE SET
|
||||||
|
updated_at = NOW();
|
||||||
|
|
||||||
|
-- Available worker roles (reference)
|
||||||
|
COMMENT ON TABLE workers IS 'Named workers with specific roles and assignments. Roles include:
|
||||||
|
- product_sync: Crawls products from dispensary menus
|
||||||
|
- store_discovery: Discovers new dispensary locations
|
||||||
|
- entry_point_finder: Detects menu providers and resolves platform IDs
|
||||||
|
- analytics_refresh: Refreshes materialized views and analytics
|
||||||
|
- price_monitor: Monitors price changes and triggers alerts
|
||||||
|
- inventory_sync: Syncs inventory levels
|
||||||
|
- image_processor: Downloads and processes product images
|
||||||
|
- data_validator: Validates data integrity';
|
||||||
49
backend/migrations/052_seo_settings.sql
Normal file
49
backend/migrations/052_seo_settings.sql
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
-- Migration 052: SEO Settings Table
|
||||||
|
-- Key/value store for SEO Orchestrator configuration
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS seo_settings (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
key TEXT UNIQUE NOT NULL,
|
||||||
|
value JSONB NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create index on key for fast lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_seo_settings_key ON seo_settings(key);
|
||||||
|
|
||||||
|
-- Seed with default settings
|
||||||
|
INSERT INTO seo_settings (key, value) VALUES
|
||||||
|
-- Section 1: Global Content Generation Settings
|
||||||
|
('primary_prompt_template', '"You are a cannabis industry content expert. Generate SEO-optimized content for {{page_type}} pages about {{subject}}. Focus on: {{focus_areas}}. Maintain a {{tone}} tone and keep content {{length}}."'),
|
||||||
|
('regeneration_prompt_template', '"Regenerate the following SEO content with fresh perspectives. Original topic: {{subject}}. Improve upon: {{improvement_areas}}. Maintain compliance with cannabis industry standards."'),
|
||||||
|
('default_content_length', '"medium"'),
|
||||||
|
('tone_voice', '"informational"'),
|
||||||
|
|
||||||
|
-- Section 2: Automatic Refresh Rules
|
||||||
|
('auto_refresh_interval', '"weekly"'),
|
||||||
|
('trigger_pct_product_change', 'true'),
|
||||||
|
('trigger_pct_brand_change', 'true'),
|
||||||
|
('trigger_new_stores', 'true'),
|
||||||
|
('trigger_market_shift', 'false'),
|
||||||
|
('webhook_url', '""'),
|
||||||
|
('notify_on_trigger', 'false'),
|
||||||
|
|
||||||
|
-- Section 3: Page-Level Defaults
|
||||||
|
('default_title_template', '"{{state_name}} Dispensaries | Find Cannabis Near You | CannaiQ"'),
|
||||||
|
('default_meta_description_template', '"Discover the best dispensaries in {{state_name}}. Browse {{dispensary_count}}+ licensed retailers, compare prices, and find cannabis products near you."'),
|
||||||
|
('default_slug_template', '"dispensaries-{{state_code_lower}}"'),
|
||||||
|
('default_og_image_template', '"/images/seo/og-{{state_code_lower}}.jpg"'),
|
||||||
|
('enable_ai_images', 'false'),
|
||||||
|
|
||||||
|
-- Section 4: Crawl / Dataset Configuration
|
||||||
|
('primary_data_provider', '"cannaiq"'),
|
||||||
|
('fallback_data_provider', '"dutchie"'),
|
||||||
|
('min_data_freshness_hours', '24'),
|
||||||
|
('stale_data_behavior', '"allow_with_warning"')
|
||||||
|
ON CONFLICT (key) DO NOTHING;
|
||||||
|
|
||||||
|
-- Record migration
|
||||||
|
INSERT INTO schema_migrations (version, name, applied_at)
|
||||||
|
VALUES ('052', 'seo_settings', NOW())
|
||||||
|
ON CONFLICT (version) DO NOTHING;
|
||||||
140
backend/migrations/066_dutchie_field_alignment.sql
Normal file
140
backend/migrations/066_dutchie_field_alignment.sql
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
-- Migration 066: Align dispensaries and discovery_locations tables with Dutchie field names
|
||||||
|
-- Uses snake_case convention (Postgres standard) mapped from Dutchie's camelCase
|
||||||
|
--
|
||||||
|
-- Changes:
|
||||||
|
-- 1. dispensaries: rename address→address1, zip→zipcode, remove company_name
|
||||||
|
-- 2. dispensaries: add missing Dutchie fields
|
||||||
|
-- 3. dutchie_discovery_locations: add missing Dutchie fields
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- DISPENSARIES TABLE
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- Rename address to address1 (matches Dutchie's address1)
|
||||||
|
ALTER TABLE dispensaries RENAME COLUMN address TO address1;
|
||||||
|
|
||||||
|
-- Rename zip to zipcode (matches Dutchie's zip, but we use zipcode for clarity)
|
||||||
|
ALTER TABLE dispensaries RENAME COLUMN zip TO zipcode;
|
||||||
|
|
||||||
|
-- Drop company_name (redundant with name)
|
||||||
|
ALTER TABLE dispensaries DROP COLUMN IF EXISTS company_name;
|
||||||
|
|
||||||
|
-- Add address2
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS address2 VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add country
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States';
|
||||||
|
|
||||||
|
-- Add timezone
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add email
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS email VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add description
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS description TEXT;
|
||||||
|
|
||||||
|
-- Add logo_image (Dutchie: logoImage)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS logo_image TEXT;
|
||||||
|
|
||||||
|
-- Add banner_image (Dutchie: bannerImage)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS banner_image TEXT;
|
||||||
|
|
||||||
|
-- Add offer_pickup (Dutchie: offerPickup)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_pickup BOOLEAN DEFAULT TRUE;
|
||||||
|
|
||||||
|
-- Add offer_delivery (Dutchie: offerDelivery)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_delivery BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add offer_curbside_pickup (Dutchie: offerCurbsidePickup)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_curbside_pickup BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add is_medical (Dutchie: isMedical)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_medical BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add is_recreational (Dutchie: isRecreational)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_recreational BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add chain_slug (Dutchie: chain)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add enterprise_id (Dutchie: retailer.enterpriseId)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100);
|
||||||
|
|
||||||
|
-- Add status (Dutchie: status - open/closed)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS status VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add c_name (Dutchie: cName - the URL slug used in embedded menus)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- DUTCHIE_DISCOVERY_LOCATIONS TABLE
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- Add phone
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS phone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add website (Dutchie: embedBackUrl)
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS website TEXT;
|
||||||
|
|
||||||
|
-- Add email
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS email VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add description
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS description TEXT;
|
||||||
|
|
||||||
|
-- Add logo_image
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS logo_image TEXT;
|
||||||
|
|
||||||
|
-- Add banner_image
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS banner_image TEXT;
|
||||||
|
|
||||||
|
-- Add chain_slug
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add enterprise_id
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100);
|
||||||
|
|
||||||
|
-- Add c_name
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add country
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States';
|
||||||
|
|
||||||
|
-- Add store status
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS store_status VARCHAR(50);
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- INDEXES
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- Index for chain lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_chain_slug ON dispensaries(chain_slug) WHERE chain_slug IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_chain_slug ON dutchie_discovery_locations(chain_slug) WHERE chain_slug IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for enterprise lookups (for multi-location chains)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_enterprise_id ON dispensaries(enterprise_id) WHERE enterprise_id IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_enterprise_id ON dutchie_discovery_locations(enterprise_id) WHERE enterprise_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for c_name lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_c_name ON dispensaries(c_name) WHERE c_name IS NOT NULL;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- COMMENTS
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
COMMENT ON COLUMN dispensaries.address1 IS 'Street address line 1 (Dutchie: address1)';
|
||||||
|
COMMENT ON COLUMN dispensaries.address2 IS 'Street address line 2 (Dutchie: address2)';
|
||||||
|
COMMENT ON COLUMN dispensaries.zipcode IS 'ZIP/postal code (Dutchie: zip)';
|
||||||
|
COMMENT ON COLUMN dispensaries.c_name IS 'Dutchie URL slug for embedded menus (Dutchie: cName)';
|
||||||
|
COMMENT ON COLUMN dispensaries.chain_slug IS 'Chain identifier slug (Dutchie: chain)';
|
||||||
|
COMMENT ON COLUMN dispensaries.enterprise_id IS 'Parent enterprise UUID (Dutchie: retailer.enterpriseId)';
|
||||||
|
COMMENT ON COLUMN dispensaries.logo_image IS 'Logo image URL (Dutchie: logoImage)';
|
||||||
|
COMMENT ON COLUMN dispensaries.banner_image IS 'Banner image URL (Dutchie: bannerImage)';
|
||||||
|
COMMENT ON COLUMN dispensaries.offer_pickup IS 'Offers in-store pickup (Dutchie: offerPickup)';
|
||||||
|
COMMENT ON COLUMN dispensaries.offer_delivery IS 'Offers delivery (Dutchie: offerDelivery)';
|
||||||
|
COMMENT ON COLUMN dispensaries.offer_curbside_pickup IS 'Offers curbside pickup (Dutchie: offerCurbsidePickup)';
|
||||||
|
COMMENT ON COLUMN dispensaries.is_medical IS 'Licensed for medical sales (Dutchie: isMedical)';
|
||||||
|
COMMENT ON COLUMN dispensaries.is_recreational IS 'Licensed for recreational sales (Dutchie: isRecreational)';
|
||||||
|
|
||||||
|
SELECT 'Migration 066 completed: Dutchie field alignment' as status;
|
||||||
24
backend/migrations/067_promotion_log.sql
Normal file
24
backend/migrations/067_promotion_log.sql
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
-- Promotion log table for tracking discovery → dispensary promotions
|
||||||
|
-- Tracks validation and promotion actions for audit/review
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS dutchie_promotion_log (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
discovery_id INTEGER REFERENCES dutchie_discovery_locations(id) ON DELETE SET NULL,
|
||||||
|
dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE SET NULL,
|
||||||
|
action VARCHAR(50) NOT NULL, -- 'validated', 'rejected', 'promoted_create', 'promoted_update', 'skipped'
|
||||||
|
state_code VARCHAR(10),
|
||||||
|
store_name VARCHAR(255),
|
||||||
|
validation_errors TEXT[], -- Array of error messages if rejected
|
||||||
|
field_changes JSONB, -- Before/after snapshot of changed fields
|
||||||
|
triggered_by VARCHAR(100) DEFAULT 'auto', -- 'auto', 'manual', 'api'
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for efficient querying
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_discovery_id ON dutchie_promotion_log(discovery_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_dispensary_id ON dutchie_promotion_log(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_action ON dutchie_promotion_log(action);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_state_code ON dutchie_promotion_log(state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_created_at ON dutchie_promotion_log(created_at DESC);
|
||||||
|
|
||||||
|
COMMENT ON TABLE dutchie_promotion_log IS 'Audit log for discovery location validation and promotion to dispensaries';
|
||||||
95
backend/migrations/068_crawler_status_alerts.sql
Normal file
95
backend/migrations/068_crawler_status_alerts.sql
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
-- Migration 068: Crawler Status Alerts
|
||||||
|
-- Creates status_alerts table for dashboard notifications and status change logging
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STATUS ALERTS TABLE
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS crawler_status_alerts (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- References
|
||||||
|
dispensary_id INTEGER REFERENCES dispensaries(id),
|
||||||
|
profile_id INTEGER REFERENCES dispensary_crawler_profiles(id),
|
||||||
|
|
||||||
|
-- Alert info
|
||||||
|
alert_type VARCHAR(50) NOT NULL, -- 'status_change', 'crawl_error', 'validation_failed', 'promoted', 'demoted'
|
||||||
|
severity VARCHAR(20) DEFAULT 'info', -- 'info', 'warning', 'error', 'critical'
|
||||||
|
|
||||||
|
-- Status transition
|
||||||
|
previous_status VARCHAR(50),
|
||||||
|
new_status VARCHAR(50),
|
||||||
|
|
||||||
|
-- Context
|
||||||
|
message TEXT,
|
||||||
|
error_details JSONB,
|
||||||
|
metadata JSONB, -- Additional context (product counts, error codes, etc.)
|
||||||
|
|
||||||
|
-- Tracking
|
||||||
|
acknowledged BOOLEAN DEFAULT FALSE,
|
||||||
|
acknowledged_at TIMESTAMP WITH TIME ZONE,
|
||||||
|
acknowledged_by VARCHAR(100),
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_dispensary ON crawler_status_alerts(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_type ON crawler_status_alerts(alert_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_severity ON crawler_status_alerts(severity);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_unack ON crawler_status_alerts(acknowledged) WHERE acknowledged = FALSE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_created ON crawler_status_alerts(created_at DESC);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STATUS DEFINITIONS (for reference/validation)
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
COMMENT ON TABLE crawler_status_alerts IS 'Crawler status change notifications for dashboard alerting';
|
||||||
|
COMMENT ON COLUMN crawler_status_alerts.alert_type IS 'Type: status_change, crawl_error, validation_failed, promoted, demoted';
|
||||||
|
COMMENT ON COLUMN crawler_status_alerts.severity IS 'Severity: info, warning, error, critical';
|
||||||
|
COMMENT ON COLUMN crawler_status_alerts.previous_status IS 'Previous crawler status before change';
|
||||||
|
COMMENT ON COLUMN crawler_status_alerts.new_status IS 'New crawler status after change';
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STATUS TRACKING ON PROFILES
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Add columns for status tracking if not exists
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- Consecutive success count for auto-promotion
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_successes') THEN
|
||||||
|
ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_successes INTEGER DEFAULT 0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Consecutive failure count for auto-demotion
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_failures') THEN
|
||||||
|
ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_failures INTEGER DEFAULT 0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Last status change timestamp
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_changed_at') THEN
|
||||||
|
ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_changed_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Status change reason
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_reason') THEN
|
||||||
|
ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_reason TEXT;
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VALID STATUS VALUES
|
||||||
|
-- ============================================================
|
||||||
|
-- Status values for dispensary_crawler_profiles.status:
|
||||||
|
-- 'sandbox' - Newly created, being validated
|
||||||
|
-- 'production' - Healthy, actively crawled
|
||||||
|
-- 'needs_manual' - Requires human intervention
|
||||||
|
-- 'failing' - Multiple consecutive failures
|
||||||
|
-- 'disabled' - Manually disabled
|
||||||
|
-- 'legacy' - No profile, uses default method (virtual status)
|
||||||
163
backend/migrations/069_six_stage_status.sql
Normal file
163
backend/migrations/069_six_stage_status.sql
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
-- Migration 069: Seven-Stage Status System
|
||||||
|
--
|
||||||
|
-- Implements explicit 7-stage pipeline for store lifecycle:
|
||||||
|
-- 1. discovered - Found via Dutchie API, raw data
|
||||||
|
-- 2. validated - Passed field checks, ready for promotion
|
||||||
|
-- 3. promoted - In dispensaries table, has crawler profile
|
||||||
|
-- 4. sandbox - First crawl attempted, testing
|
||||||
|
-- 5. hydrating - Products are being loaded/updated
|
||||||
|
-- 6. production - Healthy, scheduled crawls via Horizon
|
||||||
|
-- 7. failing - Crawl errors, needs attention
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STAGE ENUM TYPE
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- Create enum if not exists
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'store_stage') THEN
|
||||||
|
CREATE TYPE store_stage AS ENUM (
|
||||||
|
'discovered',
|
||||||
|
'validated',
|
||||||
|
'promoted',
|
||||||
|
'sandbox',
|
||||||
|
'hydrating',
|
||||||
|
'production',
|
||||||
|
'failing'
|
||||||
|
);
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- UPDATE DISCOVERY LOCATIONS TABLE
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Add stage column to discovery locations (replaces status)
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dutchie_discovery_locations' AND column_name = 'stage') THEN
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN stage VARCHAR(20) DEFAULT 'discovered';
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- Migrate existing status values to stage
|
||||||
|
UPDATE dutchie_discovery_locations
|
||||||
|
SET stage = CASE
|
||||||
|
WHEN status = 'discovered' THEN 'discovered'
|
||||||
|
WHEN status = 'verified' THEN 'validated'
|
||||||
|
WHEN status = 'rejected' THEN 'failing'
|
||||||
|
WHEN status = 'merged' THEN 'validated'
|
||||||
|
ELSE 'discovered'
|
||||||
|
END
|
||||||
|
WHERE stage IS NULL OR stage = '';
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- UPDATE CRAWLER PROFILES TABLE
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Ensure status column exists and update to new values
|
||||||
|
UPDATE dispensary_crawler_profiles
|
||||||
|
SET status = CASE
|
||||||
|
WHEN status = 'sandbox' THEN 'sandbox'
|
||||||
|
WHEN status = 'production' THEN 'production'
|
||||||
|
WHEN status = 'needs_manual' THEN 'failing'
|
||||||
|
WHEN status = 'failing' THEN 'failing'
|
||||||
|
WHEN status = 'disabled' THEN 'failing'
|
||||||
|
WHEN status IS NULL THEN 'promoted'
|
||||||
|
ELSE 'promoted'
|
||||||
|
END;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- ADD STAGE TRACKING TO DISPENSARIES
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- Add stage column to dispensaries for quick filtering
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensaries' AND column_name = 'stage') THEN
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN stage VARCHAR(20) DEFAULT 'promoted';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Add stage_changed_at for tracking
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensaries' AND column_name = 'stage_changed_at') THEN
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN stage_changed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Add first_crawl_at to track sandbox → production transition
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensaries' AND column_name = 'first_crawl_at') THEN
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN first_crawl_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Add last_successful_crawl_at
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensaries' AND column_name = 'last_successful_crawl_at') THEN
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN last_successful_crawl_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- Set initial stage for existing dispensaries based on their crawler profile status
|
||||||
|
UPDATE dispensaries d
|
||||||
|
SET stage = COALESCE(
|
||||||
|
(SELECT dcp.status FROM dispensary_crawler_profiles dcp
|
||||||
|
WHERE dcp.dispensary_id = d.id AND dcp.enabled = true
|
||||||
|
ORDER BY dcp.updated_at DESC LIMIT 1),
|
||||||
|
'promoted'
|
||||||
|
)
|
||||||
|
WHERE d.stage IS NULL OR d.stage = '';
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- INDEXES FOR STAGE-BASED QUERIES
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_stage ON dispensaries(stage);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_stage_state ON dispensaries(stage, state);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_stage ON dutchie_discovery_locations(stage);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_status ON dispensary_crawler_profiles(status);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STAGE TRANSITION LOG
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS stage_transitions (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- What changed
|
||||||
|
entity_type VARCHAR(20) NOT NULL, -- 'discovery_location' or 'dispensary'
|
||||||
|
entity_id INTEGER NOT NULL,
|
||||||
|
|
||||||
|
-- Stage change
|
||||||
|
from_stage VARCHAR(20),
|
||||||
|
to_stage VARCHAR(20) NOT NULL,
|
||||||
|
|
||||||
|
-- Context
|
||||||
|
trigger_type VARCHAR(50) NOT NULL, -- 'api', 'scheduler', 'manual', 'auto'
|
||||||
|
trigger_endpoint VARCHAR(200),
|
||||||
|
|
||||||
|
-- Outcome
|
||||||
|
success BOOLEAN DEFAULT TRUE,
|
||||||
|
error_message TEXT,
|
||||||
|
metadata JSONB,
|
||||||
|
|
||||||
|
-- Timing
|
||||||
|
duration_ms INTEGER,
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_stage_transitions_entity ON stage_transitions(entity_type, entity_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_stage_transitions_to_stage ON stage_transitions(to_stage);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_stage_transitions_created ON stage_transitions(created_at DESC);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- COMMENTS
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
COMMENT ON TABLE stage_transitions IS 'Audit log for all stage transitions in the pipeline';
|
||||||
|
COMMENT ON COLUMN dispensaries.stage IS 'Current pipeline stage: discovered, validated, promoted, sandbox, production, failing';
|
||||||
|
COMMENT ON COLUMN dispensaries.stage_changed_at IS 'When the stage was last changed';
|
||||||
|
COMMENT ON COLUMN dispensaries.first_crawl_at IS 'When the first crawl was attempted (sandbox stage)';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_successful_crawl_at IS 'When the last successful crawl completed';
|
||||||
239
backend/migrations/070_product_variants.sql
Normal file
239
backend/migrations/070_product_variants.sql
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
-- ============================================================================
|
||||||
|
-- Migration 070: Product Variants Tables
|
||||||
|
-- ============================================================================
|
||||||
|
--
|
||||||
|
-- Purpose: Store variant-level pricing and inventory as first-class entities
|
||||||
|
-- to enable time-series analytics, price comparisons, and sale tracking.
|
||||||
|
--
|
||||||
|
-- Enables queries like:
|
||||||
|
-- - Price history for a specific variant (1g Blue Dream over time)
|
||||||
|
-- - Sale frequency analysis (how often is this on special?)
|
||||||
|
-- - Cross-store price comparison (who has cheapest 1g flower?)
|
||||||
|
-- - Current specials across all stores
|
||||||
|
--
|
||||||
|
-- RULES:
|
||||||
|
-- - STRICTLY ADDITIVE (no DROP, DELETE, TRUNCATE)
|
||||||
|
-- - All new tables use IF NOT EXISTS
|
||||||
|
-- - All indexes use IF NOT EXISTS
|
||||||
|
--
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SECTION 1: PRODUCT_VARIANTS TABLE (Current State)
|
||||||
|
-- ============================================================================
|
||||||
|
-- One row per product+option combination. Tracks current pricing/inventory.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS product_variants (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
store_product_id INTEGER NOT NULL REFERENCES store_products(id) ON DELETE CASCADE,
|
||||||
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
|
||||||
|
-- Variant identity (from Dutchie POSMetaData.children)
|
||||||
|
option VARCHAR(100) NOT NULL, -- "1g", "3.5g", "1/8oz", "100mg"
|
||||||
|
canonical_sku VARCHAR(100), -- Dutchie canonicalSKU
|
||||||
|
canonical_id VARCHAR(100), -- Dutchie canonicalID
|
||||||
|
canonical_name VARCHAR(500), -- Dutchie canonicalName
|
||||||
|
|
||||||
|
-- Current pricing (in dollars, not cents)
|
||||||
|
price_rec NUMERIC(10,2),
|
||||||
|
price_med NUMERIC(10,2),
|
||||||
|
price_rec_special NUMERIC(10,2),
|
||||||
|
price_med_special NUMERIC(10,2),
|
||||||
|
|
||||||
|
-- Current inventory
|
||||||
|
quantity INTEGER,
|
||||||
|
quantity_available INTEGER,
|
||||||
|
in_stock BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Special/sale status
|
||||||
|
is_on_special BOOLEAN DEFAULT FALSE,
|
||||||
|
|
||||||
|
-- Weight/size parsing (for analytics)
|
||||||
|
weight_value NUMERIC(10,2), -- 1, 3.5, 28, etc.
|
||||||
|
weight_unit VARCHAR(20), -- g, oz, mg, ml, etc.
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_price_change_at TIMESTAMPTZ,
|
||||||
|
last_stock_change_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(store_product_id, option)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_store_product ON product_variants(store_product_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_dispensary ON product_variants(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_option ON product_variants(option);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_in_stock ON product_variants(dispensary_id, in_stock) WHERE in_stock = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_on_special ON product_variants(dispensary_id, is_on_special) WHERE is_on_special = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_canonical_sku ON product_variants(canonical_sku) WHERE canonical_sku IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_price_rec ON product_variants(price_rec) WHERE price_rec IS NOT NULL;
|
||||||
|
|
||||||
|
COMMENT ON TABLE product_variants IS 'Current state of each product variant (weight/size option). One row per product+option.';
|
||||||
|
COMMENT ON COLUMN product_variants.option IS 'Weight/size option string from Dutchie (e.g., "1g", "3.5g", "1/8oz")';
|
||||||
|
COMMENT ON COLUMN product_variants.canonical_sku IS 'Dutchie POS SKU for cross-store matching';
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SECTION 2: PRODUCT_VARIANT_SNAPSHOTS TABLE (Historical Data)
|
||||||
|
-- ============================================================================
|
||||||
|
-- Time-series data for variant pricing. One row per variant per crawl.
|
||||||
|
-- CRITICAL: NEVER DELETE from this table.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS product_variant_snapshots (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
product_variant_id INTEGER NOT NULL REFERENCES product_variants(id) ON DELETE CASCADE,
|
||||||
|
store_product_id INTEGER REFERENCES store_products(id) ON DELETE SET NULL,
|
||||||
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
|
||||||
|
|
||||||
|
-- Variant identity (denormalized for query performance)
|
||||||
|
option VARCHAR(100) NOT NULL,
|
||||||
|
|
||||||
|
-- Pricing at time of capture
|
||||||
|
price_rec NUMERIC(10,2),
|
||||||
|
price_med NUMERIC(10,2),
|
||||||
|
price_rec_special NUMERIC(10,2),
|
||||||
|
price_med_special NUMERIC(10,2),
|
||||||
|
|
||||||
|
-- Inventory at time of capture
|
||||||
|
quantity INTEGER,
|
||||||
|
in_stock BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Special status at time of capture
|
||||||
|
is_on_special BOOLEAN DEFAULT FALSE,
|
||||||
|
|
||||||
|
-- Feed presence (FALSE = variant missing from crawl)
|
||||||
|
is_present_in_feed BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Capture timestamp
|
||||||
|
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for time-series queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_variant ON product_variant_snapshots(product_variant_id, captured_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_dispensary ON product_variant_snapshots(dispensary_id, captured_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_crawl ON product_variant_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_captured ON product_variant_snapshots(captured_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_special ON product_variant_snapshots(is_on_special, captured_at DESC) WHERE is_on_special = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_option ON product_variant_snapshots(option, captured_at DESC);
|
||||||
|
|
||||||
|
COMMENT ON TABLE product_variant_snapshots IS 'Historical variant pricing/inventory. One row per variant per crawl. NEVER DELETE.';
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SECTION 3: USEFUL VIEWS
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- View: Current specials across all stores
|
||||||
|
CREATE OR REPLACE VIEW v_current_specials AS
|
||||||
|
SELECT
|
||||||
|
pv.id as variant_id,
|
||||||
|
sp.id as product_id,
|
||||||
|
sp.name_raw as product_name,
|
||||||
|
sp.brand_name_raw as brand_name,
|
||||||
|
sp.category_raw as category,
|
||||||
|
d.id as dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
pv.option,
|
||||||
|
pv.price_rec,
|
||||||
|
pv.price_rec_special,
|
||||||
|
ROUND(((pv.price_rec - pv.price_rec_special) / NULLIF(pv.price_rec, 0)) * 100, 1) as discount_percent,
|
||||||
|
pv.quantity,
|
||||||
|
pv.in_stock,
|
||||||
|
pv.last_seen_at
|
||||||
|
FROM product_variants pv
|
||||||
|
JOIN store_products sp ON sp.id = pv.store_product_id
|
||||||
|
JOIN dispensaries d ON d.id = pv.dispensary_id
|
||||||
|
WHERE pv.is_on_special = TRUE
|
||||||
|
AND pv.in_stock = TRUE
|
||||||
|
AND pv.price_rec_special IS NOT NULL
|
||||||
|
AND pv.price_rec_special < pv.price_rec;
|
||||||
|
|
||||||
|
COMMENT ON VIEW v_current_specials IS 'All products currently on special across all stores';
|
||||||
|
|
||||||
|
|
||||||
|
-- View: Price comparison for a product across stores
|
||||||
|
CREATE OR REPLACE VIEW v_price_comparison AS
|
||||||
|
SELECT
|
||||||
|
sp.name_raw as product_name,
|
||||||
|
sp.brand_name_raw as brand_name,
|
||||||
|
sp.category_raw as category,
|
||||||
|
pv.option,
|
||||||
|
d.id as dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
pv.price_rec,
|
||||||
|
pv.price_rec_special,
|
||||||
|
pv.is_on_special,
|
||||||
|
pv.in_stock,
|
||||||
|
pv.quantity,
|
||||||
|
RANK() OVER (PARTITION BY sp.name_raw, pv.option ORDER BY COALESCE(pv.price_rec_special, pv.price_rec) ASC) as price_rank
|
||||||
|
FROM product_variants pv
|
||||||
|
JOIN store_products sp ON sp.id = pv.store_product_id
|
||||||
|
JOIN dispensaries d ON d.id = pv.dispensary_id
|
||||||
|
WHERE pv.in_stock = TRUE
|
||||||
|
AND (pv.price_rec IS NOT NULL OR pv.price_rec_special IS NOT NULL);
|
||||||
|
|
||||||
|
COMMENT ON VIEW v_price_comparison IS 'Compare prices for same product across stores, ranked by price';
|
||||||
|
|
||||||
|
|
||||||
|
-- View: Latest snapshot per variant
|
||||||
|
CREATE OR REPLACE VIEW v_latest_variant_snapshots AS
|
||||||
|
SELECT DISTINCT ON (product_variant_id)
|
||||||
|
pvs.*
|
||||||
|
FROM product_variant_snapshots pvs
|
||||||
|
ORDER BY product_variant_id, captured_at DESC;
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SECTION 4: HELPER FUNCTION FOR SALE FREQUENCY
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- Function to calculate sale frequency for a variant
|
||||||
|
CREATE OR REPLACE FUNCTION get_variant_sale_stats(p_variant_id INTEGER, p_days INTEGER DEFAULT 30)
|
||||||
|
RETURNS TABLE (
|
||||||
|
total_snapshots BIGINT,
|
||||||
|
times_on_special BIGINT,
|
||||||
|
special_frequency_pct NUMERIC,
|
||||||
|
avg_discount_pct NUMERIC,
|
||||||
|
min_price NUMERIC,
|
||||||
|
max_price NUMERIC,
|
||||||
|
avg_price NUMERIC
|
||||||
|
) AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
COUNT(*)::BIGINT as total_snapshots,
|
||||||
|
COUNT(*) FILTER (WHERE is_on_special)::BIGINT as times_on_special,
|
||||||
|
ROUND((COUNT(*) FILTER (WHERE is_on_special)::NUMERIC / NULLIF(COUNT(*), 0)) * 100, 1) as special_frequency_pct,
|
||||||
|
ROUND(AVG(
|
||||||
|
CASE WHEN is_on_special AND price_rec_special IS NOT NULL AND price_rec IS NOT NULL
|
||||||
|
THEN ((price_rec - price_rec_special) / NULLIF(price_rec, 0)) * 100
|
||||||
|
END
|
||||||
|
), 1) as avg_discount_pct,
|
||||||
|
MIN(COALESCE(price_rec_special, price_rec)) as min_price,
|
||||||
|
MAX(price_rec) as max_price,
|
||||||
|
ROUND(AVG(COALESCE(price_rec_special, price_rec)), 2) as avg_price
|
||||||
|
FROM product_variant_snapshots
|
||||||
|
WHERE product_variant_id = p_variant_id
|
||||||
|
AND captured_at >= NOW() - (p_days || ' days')::INTERVAL;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION get_variant_sale_stats IS 'Get sale frequency and price stats for a variant over N days';
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- DONE
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
SELECT 'Migration 070 completed. Product variants tables ready for time-series analytics.' AS status;
|
||||||
53
backend/migrations/071_harmonize_store_products.sql
Normal file
53
backend/migrations/071_harmonize_store_products.sql
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
-- Migration 071: Harmonize store_products with dutchie_products
|
||||||
|
-- Adds missing columns to store_products to consolidate on a single canonical table
|
||||||
|
|
||||||
|
-- Product details
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS description TEXT;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weight VARCHAR(50);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weights JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS measurements JSONB;
|
||||||
|
|
||||||
|
-- Cannabinoid/terpene data
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS effects JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS terpenes JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabinoids_v2 JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS thc_content NUMERIC(10,4);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cbd_content NUMERIC(10,4);
|
||||||
|
|
||||||
|
-- Images
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS images JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS primary_image_url TEXT;
|
||||||
|
|
||||||
|
-- Inventory
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS total_quantity_available INTEGER DEFAULT 0;
|
||||||
|
|
||||||
|
-- Status/flags
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS status VARCHAR(50);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS featured BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS coming_soon BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_restored_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
|
||||||
|
-- Threshold flags (Dutchie-specific)
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_threshold BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_threshold BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS certificate_of_analysis_enabled BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Platform metadata
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS external_product_id VARCHAR(100);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS c_name VARCHAR(500);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS past_c_names TEXT[];
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS latest_raw_payload JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS created_at_platform TIMESTAMP WITH TIME ZONE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS updated_at_platform TIMESTAMP WITH TIME ZONE;
|
||||||
|
|
||||||
|
-- Indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_external_id ON store_products(external_product_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_visibility_lost ON store_products(visibility_lost) WHERE visibility_lost = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_status ON store_products(status);
|
||||||
|
|
||||||
|
-- Add comment
|
||||||
|
COMMENT ON TABLE store_products IS 'Canonical product table - consolidated from dutchie_products';
|
||||||
74
backend/migrations/072_product_views.sql
Normal file
74
backend/migrations/072_product_views.sql
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
-- Migration 072: Create compatibility views for store_products and store_product_snapshots
|
||||||
|
-- These views provide backward-compatible column names for API routes
|
||||||
|
|
||||||
|
-- v_products view - aliases store_products columns to match legacy dutchie_products naming
|
||||||
|
CREATE OR REPLACE VIEW v_products AS
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
dispensary_id,
|
||||||
|
provider_product_id as external_product_id,
|
||||||
|
provider_product_id as dutchie_id,
|
||||||
|
name_raw as name,
|
||||||
|
brand_name_raw as brand_name,
|
||||||
|
category_raw as type,
|
||||||
|
subcategory_raw as subcategory,
|
||||||
|
strain_type,
|
||||||
|
thc_percent as thc,
|
||||||
|
cbd_percent as cbd,
|
||||||
|
stock_status,
|
||||||
|
is_in_stock,
|
||||||
|
stock_quantity,
|
||||||
|
image_url,
|
||||||
|
primary_image_url,
|
||||||
|
images,
|
||||||
|
effects,
|
||||||
|
description,
|
||||||
|
is_on_special,
|
||||||
|
featured,
|
||||||
|
medical_only,
|
||||||
|
rec_only,
|
||||||
|
external_product_id as external_id,
|
||||||
|
provider,
|
||||||
|
created_at,
|
||||||
|
updated_at
|
||||||
|
FROM store_products;
|
||||||
|
|
||||||
|
-- v_product_snapshots view - aliases store_product_snapshots columns to match legacy naming
|
||||||
|
CREATE OR REPLACE VIEW v_product_snapshots AS
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
store_product_id,
|
||||||
|
dispensary_id,
|
||||||
|
provider,
|
||||||
|
provider_product_id,
|
||||||
|
crawl_run_id,
|
||||||
|
captured_at as crawled_at,
|
||||||
|
name_raw,
|
||||||
|
brand_name_raw,
|
||||||
|
category_raw,
|
||||||
|
subcategory_raw,
|
||||||
|
-- Convert price_rec (dollars) to rec_min_price_cents (cents)
|
||||||
|
CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_min_price_cents,
|
||||||
|
CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_max_price_cents,
|
||||||
|
CASE WHEN price_rec_special IS NOT NULL THEN (price_rec_special * 100)::integer END as rec_min_special_price_cents,
|
||||||
|
CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_min_price_cents,
|
||||||
|
CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_max_price_cents,
|
||||||
|
CASE WHEN price_med_special IS NOT NULL THEN (price_med_special * 100)::integer END as med_min_special_price_cents,
|
||||||
|
is_on_special as special,
|
||||||
|
discount_percent,
|
||||||
|
is_in_stock,
|
||||||
|
stock_quantity,
|
||||||
|
stock_status,
|
||||||
|
stock_quantity as total_quantity_available,
|
||||||
|
thc_percent,
|
||||||
|
cbd_percent,
|
||||||
|
image_url,
|
||||||
|
raw_data as options,
|
||||||
|
created_at
|
||||||
|
FROM store_product_snapshots;
|
||||||
|
|
||||||
|
-- Add indexes for the views' underlying tables
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_dispensary ON store_products(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_stock ON store_products(stock_status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_snapshots_product ON store_product_snapshots(store_product_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_snapshots_captured ON store_product_snapshots(captured_at DESC);
|
||||||
12
backend/migrations/073_proxy_timezone.sql
Normal file
12
backend/migrations/073_proxy_timezone.sql
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
-- Add timezone column to proxies table for geo-consistent fingerprinting
|
||||||
|
-- This allows matching Accept-Language and other headers to proxy location
|
||||||
|
|
||||||
|
ALTER TABLE proxies
|
||||||
|
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add timezone to failed_proxies as well
|
||||||
|
ALTER TABLE failed_proxies
|
||||||
|
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Comment explaining usage
|
||||||
|
COMMENT ON COLUMN proxies.timezone IS 'IANA timezone (e.g., America/Phoenix) for geo-consistent fingerprinting';
|
||||||
27
backend/migrations/074_worker_commands.sql
Normal file
27
backend/migrations/074_worker_commands.sql
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
-- Migration: Worker Commands Table
|
||||||
|
-- Purpose: Store commands for workers (decommission, etc.)
|
||||||
|
-- Workers poll this table after each task to check for commands
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_commands (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id TEXT NOT NULL,
|
||||||
|
command TEXT NOT NULL, -- 'decommission', 'pause', 'resume'
|
||||||
|
reason TEXT,
|
||||||
|
issued_by TEXT,
|
||||||
|
issued_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
acknowledged_at TIMESTAMPTZ,
|
||||||
|
executed_at TIMESTAMPTZ,
|
||||||
|
status TEXT DEFAULT 'pending' -- 'pending', 'acknowledged', 'executed', 'cancelled'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for worker lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_commands_worker_id ON worker_commands(worker_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_commands_pending ON worker_commands(worker_id, status) WHERE status = 'pending';
|
||||||
|
|
||||||
|
-- Add decommission_requested column to worker_registry for quick checks
|
||||||
|
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_reason TEXT;
|
||||||
|
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- Comment
|
||||||
|
COMMENT ON TABLE worker_commands IS 'Commands issued to workers (decommission after task, pause, etc.)';
|
||||||
322
backend/migrations/074_worker_task_queue.sql
Normal file
322
backend/migrations/074_worker_task_queue.sql
Normal file
@@ -0,0 +1,322 @@
|
|||||||
|
-- Migration 074: Worker Task Queue System
|
||||||
|
-- Implements role-based task queue with per-store locking and capacity tracking
|
||||||
|
|
||||||
|
-- Task queue table
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_tasks (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Task identification
|
||||||
|
role VARCHAR(50) NOT NULL, -- store_discovery, entry_point_discovery, product_discovery, product_resync, analytics_refresh
|
||||||
|
dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
platform VARCHAR(20), -- dutchie, jane, treez, etc.
|
||||||
|
|
||||||
|
-- Task state
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'pending',
|
||||||
|
priority INTEGER DEFAULT 0, -- Higher = more urgent
|
||||||
|
|
||||||
|
-- Scheduling
|
||||||
|
scheduled_for TIMESTAMPTZ, -- For batch scheduling (e.g., every 4 hours)
|
||||||
|
|
||||||
|
-- Ownership
|
||||||
|
worker_id VARCHAR(100), -- Pod name or worker ID
|
||||||
|
claimed_at TIMESTAMPTZ,
|
||||||
|
started_at TIMESTAMPTZ,
|
||||||
|
completed_at TIMESTAMPTZ,
|
||||||
|
last_heartbeat_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
-- Results
|
||||||
|
result JSONB, -- Task output data
|
||||||
|
error_message TEXT,
|
||||||
|
retry_count INTEGER DEFAULT 0,
|
||||||
|
max_retries INTEGER DEFAULT 3,
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Constraints
|
||||||
|
CONSTRAINT valid_status CHECK (status IN ('pending', 'claimed', 'running', 'completed', 'failed', 'stale'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for efficient task claiming
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_pending
|
||||||
|
ON worker_tasks(role, priority DESC, created_at ASC)
|
||||||
|
WHERE status = 'pending';
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_claimed
|
||||||
|
ON worker_tasks(worker_id, claimed_at)
|
||||||
|
WHERE status = 'claimed';
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_running
|
||||||
|
ON worker_tasks(worker_id, last_heartbeat_at)
|
||||||
|
WHERE status = 'running';
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_dispensary
|
||||||
|
ON worker_tasks(dispensary_id)
|
||||||
|
WHERE dispensary_id IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_scheduled
|
||||||
|
ON worker_tasks(scheduled_for)
|
||||||
|
WHERE status = 'pending' AND scheduled_for IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_history
|
||||||
|
ON worker_tasks(role, completed_at DESC)
|
||||||
|
WHERE status IN ('completed', 'failed');
|
||||||
|
|
||||||
|
-- Partial unique index to prevent duplicate active tasks per store
|
||||||
|
-- Only one task can be claimed/running for a given dispensary at a time
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_tasks_unique_active_store
|
||||||
|
ON worker_tasks(dispensary_id)
|
||||||
|
WHERE status IN ('claimed', 'running') AND dispensary_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Worker registration table (tracks active workers)
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_registry (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id VARCHAR(100) UNIQUE NOT NULL,
|
||||||
|
role VARCHAR(50) NOT NULL,
|
||||||
|
pod_name VARCHAR(100),
|
||||||
|
hostname VARCHAR(100),
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
tasks_completed INTEGER DEFAULT 0,
|
||||||
|
tasks_failed INTEGER DEFAULT 0,
|
||||||
|
status VARCHAR(20) DEFAULT 'active',
|
||||||
|
|
||||||
|
CONSTRAINT valid_worker_status CHECK (status IN ('active', 'idle', 'offline'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_role
|
||||||
|
ON worker_registry(role, status);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat
|
||||||
|
ON worker_registry(last_heartbeat_at)
|
||||||
|
WHERE status = 'active';
|
||||||
|
|
||||||
|
-- Task completion tracking (summarized history)
|
||||||
|
CREATE TABLE IF NOT EXISTS task_completion_log (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
role VARCHAR(50) NOT NULL,
|
||||||
|
date DATE NOT NULL DEFAULT CURRENT_DATE,
|
||||||
|
hour INTEGER NOT NULL DEFAULT EXTRACT(HOUR FROM NOW()),
|
||||||
|
|
||||||
|
tasks_created INTEGER DEFAULT 0,
|
||||||
|
tasks_completed INTEGER DEFAULT 0,
|
||||||
|
tasks_failed INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
avg_duration_sec NUMERIC(10,2),
|
||||||
|
min_duration_sec NUMERIC(10,2),
|
||||||
|
max_duration_sec NUMERIC(10,2),
|
||||||
|
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(role, date, hour)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Capacity planning view
|
||||||
|
CREATE OR REPLACE VIEW v_worker_capacity AS
|
||||||
|
SELECT
|
||||||
|
role,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending') as pending_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending' AND (scheduled_for IS NULL OR scheduled_for <= NOW())) as ready_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'claimed') as claimed_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'running') as running_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') as completed_last_hour,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '1 hour') as failed_last_hour,
|
||||||
|
COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) as active_workers,
|
||||||
|
AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||||
|
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') as avg_duration_sec,
|
||||||
|
-- Capacity planning metrics
|
||||||
|
CASE
|
||||||
|
WHEN COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') > 0
|
||||||
|
THEN 3600.0 / NULLIF(AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||||
|
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour'), 0)
|
||||||
|
ELSE NULL
|
||||||
|
END as tasks_per_worker_hour,
|
||||||
|
-- Estimated time to drain queue
|
||||||
|
CASE
|
||||||
|
WHEN COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) > 0
|
||||||
|
AND COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') > 0
|
||||||
|
THEN COUNT(*) FILTER (WHERE status = 'pending') / NULLIF(
|
||||||
|
COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) *
|
||||||
|
(3600.0 / NULLIF(AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||||
|
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour'), 0)),
|
||||||
|
0
|
||||||
|
)
|
||||||
|
ELSE NULL
|
||||||
|
END as estimated_hours_to_drain
|
||||||
|
FROM worker_tasks
|
||||||
|
GROUP BY role;
|
||||||
|
|
||||||
|
-- Task history view (for UI)
|
||||||
|
CREATE OR REPLACE VIEW v_task_history AS
|
||||||
|
SELECT
|
||||||
|
t.id,
|
||||||
|
t.role,
|
||||||
|
t.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
t.platform,
|
||||||
|
t.status,
|
||||||
|
t.priority,
|
||||||
|
t.worker_id,
|
||||||
|
t.scheduled_for,
|
||||||
|
t.claimed_at,
|
||||||
|
t.started_at,
|
||||||
|
t.completed_at,
|
||||||
|
t.error_message,
|
||||||
|
t.retry_count,
|
||||||
|
t.created_at,
|
||||||
|
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
|
||||||
|
FROM worker_tasks t
|
||||||
|
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
||||||
|
ORDER BY t.created_at DESC;
|
||||||
|
|
||||||
|
-- Function to claim a task atomically
|
||||||
|
CREATE OR REPLACE FUNCTION claim_task(
|
||||||
|
p_role VARCHAR(50),
|
||||||
|
p_worker_id VARCHAR(100)
|
||||||
|
) RETURNS worker_tasks AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_task worker_tasks;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_tasks
|
||||||
|
WHERE role = p_role
|
||||||
|
AND status = 'pending'
|
||||||
|
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||||
|
-- Exclude stores that already have an active task
|
||||||
|
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||||
|
SELECT dispensary_id FROM worker_tasks
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND dispensary_id IS NOT NULL
|
||||||
|
))
|
||||||
|
ORDER BY priority DESC, created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_task;
|
||||||
|
|
||||||
|
RETURN claimed_task;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to mark stale tasks (workers that died)
|
||||||
|
CREATE OR REPLACE FUNCTION recover_stale_tasks(
|
||||||
|
stale_threshold_minutes INTEGER DEFAULT 10
|
||||||
|
) RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
recovered_count INTEGER;
|
||||||
|
BEGIN
|
||||||
|
WITH stale AS (
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'pending',
|
||||||
|
worker_id = NULL,
|
||||||
|
claimed_at = NULL,
|
||||||
|
started_at = NULL,
|
||||||
|
retry_count = retry_count + 1,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
|
||||||
|
AND retry_count < max_retries
|
||||||
|
RETURNING id
|
||||||
|
)
|
||||||
|
SELECT COUNT(*) INTO recovered_count FROM stale;
|
||||||
|
|
||||||
|
-- Mark tasks that exceeded retries as failed
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'failed',
|
||||||
|
error_message = 'Exceeded max retries after worker failures',
|
||||||
|
completed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
|
||||||
|
AND retry_count >= max_retries;
|
||||||
|
|
||||||
|
RETURN recovered_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to generate daily resync tasks
|
||||||
|
CREATE OR REPLACE FUNCTION generate_resync_tasks(
|
||||||
|
p_batches_per_day INTEGER DEFAULT 6, -- Every 4 hours
|
||||||
|
p_date DATE DEFAULT CURRENT_DATE
|
||||||
|
) RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
store_count INTEGER;
|
||||||
|
stores_per_batch INTEGER;
|
||||||
|
batch_num INTEGER;
|
||||||
|
scheduled_time TIMESTAMPTZ;
|
||||||
|
created_count INTEGER := 0;
|
||||||
|
BEGIN
|
||||||
|
-- Count active stores that need resync
|
||||||
|
SELECT COUNT(*) INTO store_count
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE crawl_enabled = true
|
||||||
|
AND menu_type = 'dutchie'
|
||||||
|
AND platform_dispensary_id IS NOT NULL;
|
||||||
|
|
||||||
|
IF store_count = 0 THEN
|
||||||
|
RETURN 0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
stores_per_batch := CEIL(store_count::NUMERIC / p_batches_per_day);
|
||||||
|
|
||||||
|
FOR batch_num IN 0..(p_batches_per_day - 1) LOOP
|
||||||
|
scheduled_time := p_date + (batch_num * 4 || ' hours')::INTERVAL;
|
||||||
|
|
||||||
|
INSERT INTO worker_tasks (role, dispensary_id, platform, scheduled_for, priority)
|
||||||
|
SELECT
|
||||||
|
'product_resync',
|
||||||
|
d.id,
|
||||||
|
'dutchie',
|
||||||
|
scheduled_time,
|
||||||
|
0
|
||||||
|
FROM (
|
||||||
|
SELECT id, ROW_NUMBER() OVER (ORDER BY id) as rn
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE crawl_enabled = true
|
||||||
|
AND menu_type = 'dutchie'
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
|
) d
|
||||||
|
WHERE d.rn > (batch_num * stores_per_batch)
|
||||||
|
AND d.rn <= ((batch_num + 1) * stores_per_batch)
|
||||||
|
ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
|
GET DIAGNOSTICS created_count = created_count + ROW_COUNT;
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
RETURN created_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Trigger to update timestamp
|
||||||
|
CREATE OR REPLACE FUNCTION update_worker_tasks_timestamp()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.updated_at = NOW();
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS worker_tasks_updated_at ON worker_tasks;
|
||||||
|
CREATE TRIGGER worker_tasks_updated_at
|
||||||
|
BEFORE UPDATE ON worker_tasks
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_worker_tasks_timestamp();
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON TABLE worker_tasks IS 'Central task queue for all worker roles';
|
||||||
|
COMMENT ON TABLE worker_registry IS 'Registry of active workers and their stats';
|
||||||
|
COMMENT ON TABLE task_completion_log IS 'Hourly aggregated task completion metrics';
|
||||||
|
COMMENT ON VIEW v_worker_capacity IS 'Real-time capacity planning metrics per role';
|
||||||
|
COMMENT ON VIEW v_task_history IS 'Task history with dispensary details for UI';
|
||||||
|
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task for a worker, respecting per-store locking';
|
||||||
|
COMMENT ON FUNCTION recover_stale_tasks IS 'Release tasks from dead workers back to pending';
|
||||||
|
COMMENT ON FUNCTION generate_resync_tasks IS 'Generate daily product resync tasks in batches';
|
||||||
13
backend/migrations/075_consecutive_misses.sql
Normal file
13
backend/migrations/075_consecutive_misses.sql
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
-- Migration 075: Add consecutive_misses column to store_products
|
||||||
|
-- Used to track how many consecutive crawls a product has been missing from the feed
|
||||||
|
-- After 3 consecutive misses, product is marked as OOS
|
||||||
|
|
||||||
|
ALTER TABLE store_products
|
||||||
|
ADD COLUMN IF NOT EXISTS consecutive_misses INTEGER NOT NULL DEFAULT 0;
|
||||||
|
|
||||||
|
-- Index for finding products that need OOS check
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_consecutive_misses
|
||||||
|
ON store_products (dispensary_id, consecutive_misses)
|
||||||
|
WHERE consecutive_misses > 0;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN store_products.consecutive_misses IS 'Number of consecutive crawls where product was not in feed. Reset to 0 when seen. At 3, mark OOS.';
|
||||||
71
backend/migrations/076_visitor_analytics.sql
Normal file
71
backend/migrations/076_visitor_analytics.sql
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
-- Visitor location analytics for Findagram
|
||||||
|
-- Tracks visitor locations to understand popular areas
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS visitor_locations (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Location data (from IP lookup)
|
||||||
|
ip_hash VARCHAR(64), -- Hashed IP for privacy (SHA256)
|
||||||
|
city VARCHAR(100),
|
||||||
|
state VARCHAR(100),
|
||||||
|
state_code VARCHAR(10),
|
||||||
|
country VARCHAR(100),
|
||||||
|
country_code VARCHAR(10),
|
||||||
|
latitude DECIMAL(10, 7),
|
||||||
|
longitude DECIMAL(10, 7),
|
||||||
|
|
||||||
|
-- Visit metadata
|
||||||
|
domain VARCHAR(50) NOT NULL, -- 'findagram.co', 'findadispo.com', etc.
|
||||||
|
page_path VARCHAR(255), -- '/products', '/dispensaries/123', etc.
|
||||||
|
referrer VARCHAR(500),
|
||||||
|
user_agent VARCHAR(500),
|
||||||
|
|
||||||
|
-- Session tracking
|
||||||
|
session_id VARCHAR(64), -- For grouping page views in a session
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for analytics queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_domain ON visitor_locations(domain);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_city_state ON visitor_locations(city, state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_created_at ON visitor_locations(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_session ON visitor_locations(session_id);
|
||||||
|
|
||||||
|
-- Aggregated daily stats (materialized for performance)
|
||||||
|
CREATE TABLE IF NOT EXISTS visitor_location_stats (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
date DATE NOT NULL,
|
||||||
|
domain VARCHAR(50) NOT NULL,
|
||||||
|
city VARCHAR(100),
|
||||||
|
state VARCHAR(100),
|
||||||
|
state_code VARCHAR(10),
|
||||||
|
country_code VARCHAR(10),
|
||||||
|
|
||||||
|
-- Metrics
|
||||||
|
visit_count INTEGER DEFAULT 0,
|
||||||
|
unique_sessions INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
UNIQUE(date, domain, city, state_code, country_code)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_date ON visitor_location_stats(date);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_domain ON visitor_location_stats(domain);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_state ON visitor_location_stats(state_code);
|
||||||
|
|
||||||
|
-- View for easy querying of top locations
|
||||||
|
CREATE OR REPLACE VIEW v_top_visitor_locations AS
|
||||||
|
SELECT
|
||||||
|
domain,
|
||||||
|
city,
|
||||||
|
state,
|
||||||
|
state_code,
|
||||||
|
country_code,
|
||||||
|
COUNT(*) as total_visits,
|
||||||
|
COUNT(DISTINCT session_id) as unique_sessions,
|
||||||
|
MAX(created_at) as last_visit
|
||||||
|
FROM visitor_locations
|
||||||
|
WHERE created_at > NOW() - INTERVAL '30 days'
|
||||||
|
GROUP BY domain, city, state, state_code, country_code
|
||||||
|
ORDER BY total_visits DESC;
|
||||||
141
backend/migrations/076_worker_registry.sql
Normal file
141
backend/migrations/076_worker_registry.sql
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
-- Migration 076: Worker Registry for Dynamic Workers
|
||||||
|
-- Workers register on startup, receive a friendly name, and report heartbeats
|
||||||
|
|
||||||
|
-- Name pool for workers (expandable, no hardcoding)
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_name_pool (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(50) UNIQUE NOT NULL,
|
||||||
|
in_use BOOLEAN DEFAULT FALSE,
|
||||||
|
assigned_to VARCHAR(100), -- worker_id
|
||||||
|
assigned_at TIMESTAMPTZ,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Seed with initial names (can add more via API)
|
||||||
|
INSERT INTO worker_name_pool (name) VALUES
|
||||||
|
('Alice'), ('Bella'), ('Clara'), ('Diana'), ('Elena'),
|
||||||
|
('Fiona'), ('Grace'), ('Hazel'), ('Iris'), ('Julia'),
|
||||||
|
('Katie'), ('Luna'), ('Mia'), ('Nora'), ('Olive'),
|
||||||
|
('Pearl'), ('Quinn'), ('Rosa'), ('Sara'), ('Tara'),
|
||||||
|
('Uma'), ('Vera'), ('Wendy'), ('Xena'), ('Yuki'), ('Zara'),
|
||||||
|
('Amber'), ('Blake'), ('Coral'), ('Dawn'), ('Echo'),
|
||||||
|
('Fleur'), ('Gem'), ('Haven'), ('Ivy'), ('Jade'),
|
||||||
|
('Kira'), ('Lotus'), ('Maple'), ('Nova'), ('Onyx'),
|
||||||
|
('Pixel'), ('Quest'), ('Raven'), ('Sage'), ('Terra'),
|
||||||
|
('Unity'), ('Violet'), ('Willow'), ('Xylo'), ('Yara'), ('Zen')
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
|
|
||||||
|
-- Worker registry - tracks active workers
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_registry (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id VARCHAR(100) UNIQUE NOT NULL, -- e.g., "pod-abc123" or uuid
|
||||||
|
friendly_name VARCHAR(50), -- assigned from pool
|
||||||
|
role VARCHAR(50) NOT NULL, -- task role
|
||||||
|
pod_name VARCHAR(100), -- k8s pod name
|
||||||
|
hostname VARCHAR(100), -- machine hostname
|
||||||
|
ip_address VARCHAR(50), -- worker IP
|
||||||
|
status VARCHAR(20) DEFAULT 'starting', -- starting, active, idle, offline, terminated
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_task_at TIMESTAMPTZ,
|
||||||
|
tasks_completed INTEGER DEFAULT 0,
|
||||||
|
tasks_failed INTEGER DEFAULT 0,
|
||||||
|
current_task_id INTEGER,
|
||||||
|
metadata JSONB DEFAULT '{}',
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for worker registry
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_status ON worker_registry(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_role ON worker_registry(role);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat ON worker_registry(last_heartbeat_at);
|
||||||
|
|
||||||
|
-- Function to assign a name to a new worker
|
||||||
|
CREATE OR REPLACE FUNCTION assign_worker_name(p_worker_id VARCHAR(100))
|
||||||
|
RETURNS VARCHAR(50) AS $$
|
||||||
|
DECLARE
|
||||||
|
v_name VARCHAR(50);
|
||||||
|
BEGIN
|
||||||
|
-- Try to get an unused name
|
||||||
|
UPDATE worker_name_pool
|
||||||
|
SET in_use = TRUE, assigned_to = p_worker_id, assigned_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_name_pool
|
||||||
|
WHERE in_use = FALSE
|
||||||
|
ORDER BY RANDOM()
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING name INTO v_name;
|
||||||
|
|
||||||
|
-- If no names available, generate one
|
||||||
|
IF v_name IS NULL THEN
|
||||||
|
v_name := 'Worker-' || SUBSTRING(p_worker_id FROM 1 FOR 8);
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN v_name;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to release a worker's name back to the pool
|
||||||
|
CREATE OR REPLACE FUNCTION release_worker_name(p_worker_id VARCHAR(100))
|
||||||
|
RETURNS VOID AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_name_pool
|
||||||
|
SET in_use = FALSE, assigned_to = NULL, assigned_at = NULL
|
||||||
|
WHERE assigned_to = p_worker_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to mark stale workers as offline
|
||||||
|
CREATE OR REPLACE FUNCTION mark_stale_workers(stale_threshold_minutes INTEGER DEFAULT 5)
|
||||||
|
RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
v_count INTEGER;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET status = 'offline', updated_at = NOW()
|
||||||
|
WHERE status IN ('active', 'idle', 'starting')
|
||||||
|
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
|
||||||
|
RETURNING COUNT(*) INTO v_count;
|
||||||
|
|
||||||
|
-- Release names from offline workers
|
||||||
|
PERFORM release_worker_name(worker_id)
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE status = 'offline'
|
||||||
|
AND last_heartbeat_at < NOW() - INTERVAL '30 minutes';
|
||||||
|
|
||||||
|
RETURN COALESCE(v_count, 0);
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- View for dashboard
|
||||||
|
CREATE OR REPLACE VIEW v_active_workers AS
|
||||||
|
SELECT
|
||||||
|
wr.id,
|
||||||
|
wr.worker_id,
|
||||||
|
wr.friendly_name,
|
||||||
|
wr.role,
|
||||||
|
wr.status,
|
||||||
|
wr.pod_name,
|
||||||
|
wr.hostname,
|
||||||
|
wr.started_at,
|
||||||
|
wr.last_heartbeat_at,
|
||||||
|
wr.last_task_at,
|
||||||
|
wr.tasks_completed,
|
||||||
|
wr.tasks_failed,
|
||||||
|
wr.current_task_id,
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
|
CASE
|
||||||
|
WHEN wr.status = 'offline' THEN 'offline'
|
||||||
|
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
|
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||||
|
ELSE 'ready'
|
||||||
|
END as health_status
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.status != 'terminated'
|
||||||
|
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||||
|
|
||||||
|
COMMENT ON TABLE worker_registry IS 'Tracks all workers that have registered with the system';
|
||||||
|
COMMENT ON TABLE worker_name_pool IS 'Pool of friendly names for workers - expandable via API';
|
||||||
35
backend/migrations/077_click_events_location.sql
Normal file
35
backend/migrations/077_click_events_location.sql
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
-- Migration: Add visitor location and dispensary name to click events
|
||||||
|
-- Captures where visitors are clicking from and which dispensary
|
||||||
|
|
||||||
|
-- Add visitor location columns
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_city VARCHAR(100);
|
||||||
|
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_state VARCHAR(10);
|
||||||
|
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_lat DECIMAL(10, 7);
|
||||||
|
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_lng DECIMAL(10, 7);
|
||||||
|
|
||||||
|
-- Add dispensary name for easier reporting
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS dispensary_name VARCHAR(255);
|
||||||
|
|
||||||
|
-- Create index for location-based analytics
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_state
|
||||||
|
ON product_click_events(visitor_state)
|
||||||
|
WHERE visitor_state IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_city
|
||||||
|
ON product_click_events(visitor_city)
|
||||||
|
WHERE visitor_city IS NOT NULL;
|
||||||
|
|
||||||
|
-- Add comments
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_city IS 'City where the visitor is located (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_state IS 'State where the visitor is located (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_lat IS 'Visitor latitude (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_lng IS 'Visitor longitude (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.dispensary_name IS 'Name of the dispensary (denormalized for easier reporting)';
|
||||||
8
backend/migrations/078_proxy_consecutive_403.sql
Normal file
8
backend/migrations/078_proxy_consecutive_403.sql
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
-- Migration 078: Add consecutive_403_count to proxies table
|
||||||
|
-- Per workflow-12102025.md: Track consecutive 403s per proxy
|
||||||
|
-- After 3 consecutive 403s with different fingerprints → disable proxy
|
||||||
|
|
||||||
|
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS consecutive_403_count INTEGER DEFAULT 0;
|
||||||
|
|
||||||
|
-- Add comment explaining the column
|
||||||
|
COMMENT ON COLUMN proxies.consecutive_403_count IS 'Tracks consecutive 403 blocks. Reset to 0 on success. Proxy disabled at 3.';
|
||||||
49
backend/migrations/079_task_schedules.sql
Normal file
49
backend/migrations/079_task_schedules.sql
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
-- Migration 079: Task Schedules for Database-Driven Scheduler
|
||||||
|
-- Per TASK_WORKFLOW_2024-12-10.md: Replaces node-cron with DB-driven scheduling
|
||||||
|
--
|
||||||
|
-- 2024-12-10: Created for reliable, multi-replica-safe task scheduling
|
||||||
|
|
||||||
|
-- task_schedules: Stores schedule definitions and state
|
||||||
|
CREATE TABLE IF NOT EXISTS task_schedules (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(100) NOT NULL UNIQUE,
|
||||||
|
role VARCHAR(50) NOT NULL, -- TaskRole: product_refresh, store_discovery, etc.
|
||||||
|
description TEXT,
|
||||||
|
|
||||||
|
-- Schedule configuration
|
||||||
|
enabled BOOLEAN DEFAULT TRUE,
|
||||||
|
interval_hours INTEGER NOT NULL DEFAULT 4,
|
||||||
|
priority INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
-- Optional scope filters
|
||||||
|
state_code VARCHAR(2), -- NULL = all states
|
||||||
|
platform VARCHAR(50), -- NULL = all platforms
|
||||||
|
|
||||||
|
-- Execution state (updated by scheduler)
|
||||||
|
last_run_at TIMESTAMPTZ,
|
||||||
|
next_run_at TIMESTAMPTZ,
|
||||||
|
last_task_count INTEGER DEFAULT 0,
|
||||||
|
last_error TEXT,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for scheduler queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_schedules_enabled ON task_schedules(enabled) WHERE enabled = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_schedules_next_run ON task_schedules(next_run_at) WHERE enabled = TRUE;
|
||||||
|
|
||||||
|
-- Insert default schedules
|
||||||
|
INSERT INTO task_schedules (name, role, interval_hours, priority, description, next_run_at)
|
||||||
|
VALUES
|
||||||
|
('product_refresh_all', 'product_refresh', 4, 0, 'Generate product refresh tasks for all crawl-enabled stores every 4 hours', NOW()),
|
||||||
|
('store_discovery_dutchie', 'store_discovery', 24, 5, 'Discover new Dutchie stores daily', NOW()),
|
||||||
|
('analytics_refresh', 'analytics_refresh', 6, 0, 'Refresh analytics materialized views every 6 hours', NOW())
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
|
|
||||||
|
-- Comment for documentation
|
||||||
|
COMMENT ON TABLE task_schedules IS 'Database-driven task scheduler configuration. Per TASK_WORKFLOW_2024-12-10.md:
|
||||||
|
- Schedules persist in DB (survive restarts)
|
||||||
|
- Uses SELECT FOR UPDATE SKIP LOCKED for multi-replica safety
|
||||||
|
- Scheduler polls every 60s and executes due schedules
|
||||||
|
- Creates tasks in worker_tasks for task-worker.ts to process';
|
||||||
58
backend/migrations/080_raw_crawl_payloads.sql
Normal file
58
backend/migrations/080_raw_crawl_payloads.sql
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
-- Migration 080: Raw Crawl Payloads Metadata Table
|
||||||
|
-- Per TASK_WORKFLOW_2024-12-10.md: Store full GraphQL payloads for historical analysis
|
||||||
|
--
|
||||||
|
-- Design Pattern: Metadata/Payload Separation
|
||||||
|
-- - Metadata (this table): Small, indexed, queryable
|
||||||
|
-- - Payload (filesystem): Gzipped JSON at storage_path
|
||||||
|
--
|
||||||
|
-- Benefits:
|
||||||
|
-- - Compare any two crawls to see what changed
|
||||||
|
-- - Replay/re-normalize historical data if logic changes
|
||||||
|
-- - Debug issues by seeing exactly what the API returned
|
||||||
|
-- - DB stays small, backups stay fast
|
||||||
|
--
|
||||||
|
-- Storage location: /storage/payloads/{year}/{month}/{day}/store_{id}_{timestamp}.json.gz
|
||||||
|
-- Compression: ~90% reduction (1.5MB -> 150KB per crawl)
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS raw_crawl_payloads (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Links to crawl tracking
|
||||||
|
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
|
||||||
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
|
||||||
|
-- File location (gzipped JSON)
|
||||||
|
storage_path TEXT NOT NULL,
|
||||||
|
|
||||||
|
-- Metadata for quick queries without loading file
|
||||||
|
product_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
size_bytes INTEGER, -- Compressed size
|
||||||
|
size_bytes_raw INTEGER, -- Uncompressed size
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Optional: checksum for integrity verification
|
||||||
|
checksum_sha256 VARCHAR(64)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary
|
||||||
|
ON raw_crawl_payloads(dispensary_id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary_fetched
|
||||||
|
ON raw_crawl_payloads(dispensary_id, fetched_at DESC);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_fetched
|
||||||
|
ON raw_crawl_payloads(fetched_at DESC);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_crawl_run
|
||||||
|
ON raw_crawl_payloads(crawl_run_id)
|
||||||
|
WHERE crawl_run_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON TABLE raw_crawl_payloads IS 'Metadata for raw GraphQL payloads stored on filesystem. Per TASK_WORKFLOW_2024-12-10.md: Full payloads enable historical diffs and replay.';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.storage_path IS 'Path to gzipped JSON file, e.g. /storage/payloads/2024/12/10/store_123_1702234567.json.gz';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.size_bytes IS 'Compressed file size in bytes';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.size_bytes_raw IS 'Uncompressed payload size in bytes';
|
||||||
37
backend/migrations/081_payload_fetch_columns.sql
Normal file
37
backend/migrations/081_payload_fetch_columns.sql
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
-- Migration 081: Payload Fetch Columns
|
||||||
|
-- Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing
|
||||||
|
--
|
||||||
|
-- New architecture:
|
||||||
|
-- - payload_fetch: Hits Dutchie API, saves raw payload to disk
|
||||||
|
-- - product_refresh: Reads local payload, normalizes, upserts to DB
|
||||||
|
--
|
||||||
|
-- This migration adds:
|
||||||
|
-- 1. payload column to worker_tasks (for task chaining data)
|
||||||
|
-- 2. processed_at column to raw_crawl_payloads (track when payload was processed)
|
||||||
|
-- 3. last_fetch_at column to dispensaries (track when last payload was fetched)
|
||||||
|
|
||||||
|
-- Add payload column to worker_tasks for task chaining
|
||||||
|
-- Used by payload_fetch to pass payload_id to product_refresh
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS payload JSONB DEFAULT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_tasks.payload IS 'Per TASK_WORKFLOW_2024-12-10.md: Task chaining data (e.g., payload_id from payload_fetch to product_refresh)';
|
||||||
|
|
||||||
|
-- Add processed_at to raw_crawl_payloads
|
||||||
|
-- Tracks when the payload was processed by product_refresh
|
||||||
|
ALTER TABLE raw_crawl_payloads
|
||||||
|
ADD COLUMN IF NOT EXISTS processed_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.processed_at IS 'When this payload was processed by product_refresh handler';
|
||||||
|
|
||||||
|
-- Index for finding unprocessed payloads
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_unprocessed
|
||||||
|
ON raw_crawl_payloads(dispensary_id, fetched_at DESC)
|
||||||
|
WHERE processed_at IS NULL;
|
||||||
|
|
||||||
|
-- Add last_fetch_at to dispensaries
|
||||||
|
-- Tracks when the last payload was fetched (separate from last_crawl_at which is when processing completed)
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_fetch_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN dispensaries.last_fetch_at IS 'Per TASK_WORKFLOW_2024-12-10.md: When last payload was fetched from API (separate from last_crawl_at which is when processing completed)';
|
||||||
27
backend/migrations/082_proxy_notification_trigger.sql
Normal file
27
backend/migrations/082_proxy_notification_trigger.sql
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
-- Migration: 082_proxy_notification_trigger
|
||||||
|
-- Date: 2024-12-11
|
||||||
|
-- Description: Add PostgreSQL NOTIFY trigger to alert workers when proxies are added
|
||||||
|
|
||||||
|
-- Create function to notify workers when active proxy is added/activated
|
||||||
|
CREATE OR REPLACE FUNCTION notify_proxy_added()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
-- Only notify if proxy is active
|
||||||
|
IF NEW.active = true THEN
|
||||||
|
PERFORM pg_notify('proxy_added', NEW.id::text);
|
||||||
|
END IF;
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Drop existing trigger if any
|
||||||
|
DROP TRIGGER IF EXISTS proxy_added_trigger ON proxies;
|
||||||
|
|
||||||
|
-- Create trigger on insert and update of active column
|
||||||
|
CREATE TRIGGER proxy_added_trigger
|
||||||
|
AFTER INSERT OR UPDATE OF active ON proxies
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION notify_proxy_added();
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION notify_proxy_added() IS
|
||||||
|
'Sends PostgreSQL NOTIFY to proxy_added channel when an active proxy is added or activated. Workers LISTEN on this channel to wake up immediately.';
|
||||||
88
backend/migrations/083_discovery_runs.sql
Normal file
88
backend/migrations/083_discovery_runs.sql
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
-- Migration 083: Discovery Run Tracking
|
||||||
|
-- Tracks progress of store discovery runs step-by-step
|
||||||
|
|
||||||
|
-- Main discovery runs table
|
||||||
|
CREATE TABLE IF NOT EXISTS discovery_runs (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
platform VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed
|
||||||
|
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
finished_at TIMESTAMPTZ,
|
||||||
|
task_id INTEGER REFERENCES worker_task_queue(id),
|
||||||
|
|
||||||
|
-- Totals
|
||||||
|
states_total INTEGER DEFAULT 0,
|
||||||
|
states_completed INTEGER DEFAULT 0,
|
||||||
|
locations_discovered INTEGER DEFAULT 0,
|
||||||
|
locations_promoted INTEGER DEFAULT 0,
|
||||||
|
new_store_ids INTEGER[] DEFAULT '{}',
|
||||||
|
|
||||||
|
-- Error info
|
||||||
|
error_message TEXT,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Per-state progress within a run
|
||||||
|
CREATE TABLE IF NOT EXISTS discovery_run_states (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
||||||
|
state_code VARCHAR(2) NOT NULL,
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, completed, failed
|
||||||
|
started_at TIMESTAMPTZ,
|
||||||
|
finished_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
-- Results
|
||||||
|
cities_found INTEGER DEFAULT 0,
|
||||||
|
locations_found INTEGER DEFAULT 0,
|
||||||
|
locations_upserted INTEGER DEFAULT 0,
|
||||||
|
new_dispensary_ids INTEGER[] DEFAULT '{}',
|
||||||
|
|
||||||
|
-- Error info
|
||||||
|
error_message TEXT,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(run_id, state_code)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Step-by-step log for detailed progress tracking
|
||||||
|
CREATE TABLE IF NOT EXISTS discovery_run_steps (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
||||||
|
state_code VARCHAR(2),
|
||||||
|
step_name VARCHAR(100) NOT NULL,
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'started', -- started, completed, failed
|
||||||
|
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
finished_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
-- Details (JSON for flexibility)
|
||||||
|
details JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for querying
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_runs_status ON discovery_runs(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_runs_platform ON discovery_runs(platform);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_runs_started_at ON discovery_runs(started_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_run_states_run_id ON discovery_run_states(run_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_run_steps_run_id ON discovery_run_steps(run_id);
|
||||||
|
|
||||||
|
-- View for latest run status per platform
|
||||||
|
CREATE OR REPLACE VIEW v_latest_discovery_runs AS
|
||||||
|
SELECT DISTINCT ON (platform)
|
||||||
|
id,
|
||||||
|
platform,
|
||||||
|
status,
|
||||||
|
started_at,
|
||||||
|
finished_at,
|
||||||
|
states_total,
|
||||||
|
states_completed,
|
||||||
|
locations_discovered,
|
||||||
|
locations_promoted,
|
||||||
|
array_length(new_store_ids, 1) as new_stores_count,
|
||||||
|
error_message,
|
||||||
|
EXTRACT(EPOCH FROM (COALESCE(finished_at, NOW()) - started_at)) as duration_seconds
|
||||||
|
FROM discovery_runs
|
||||||
|
ORDER BY platform, started_at DESC;
|
||||||
253
backend/migrations/084_dual_transport_preflight.sql
Normal file
253
backend/migrations/084_dual_transport_preflight.sql
Normal file
@@ -0,0 +1,253 @@
|
|||||||
|
-- Migration 084: Dual Transport Preflight System
|
||||||
|
-- Workers run both curl and http (Puppeteer) preflights on startup
|
||||||
|
-- Tasks can require a specific transport method
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 1: Add preflight columns to worker_registry
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- Preflight status for curl/axios transport (proxy-based)
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_curl_status VARCHAR(20) DEFAULT 'pending';
|
||||||
|
|
||||||
|
-- Preflight status for http/Puppeteer transport (browser-based)
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_http_status VARCHAR(20) DEFAULT 'pending';
|
||||||
|
|
||||||
|
-- Timestamps for when each preflight completed
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_curl_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_http_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- Error messages for failed preflights
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_curl_error TEXT;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_http_error TEXT;
|
||||||
|
|
||||||
|
-- Response time for successful preflights (ms)
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_curl_ms INTEGER;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_http_ms INTEGER;
|
||||||
|
|
||||||
|
-- Constraints for preflight status values
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
DROP CONSTRAINT IF EXISTS valid_preflight_curl_status;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD CONSTRAINT valid_preflight_curl_status
|
||||||
|
CHECK (preflight_curl_status IN ('pending', 'passed', 'failed', 'skipped'));
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
DROP CONSTRAINT IF EXISTS valid_preflight_http_status;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD CONSTRAINT valid_preflight_http_status
|
||||||
|
CHECK (preflight_http_status IN ('pending', 'passed', 'failed', 'skipped'));
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 2: Add method column to worker_tasks
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- Transport method requirement for the task
|
||||||
|
-- NULL = no preference (any worker can claim)
|
||||||
|
-- 'curl' = requires curl/axios transport (proxy-based, fast)
|
||||||
|
-- 'http' = requires http/Puppeteer transport (browser-based, anti-detect)
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS method VARCHAR(10);
|
||||||
|
|
||||||
|
-- Constraint for valid method values
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
DROP CONSTRAINT IF EXISTS valid_task_method;
|
||||||
|
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD CONSTRAINT valid_task_method
|
||||||
|
CHECK (method IS NULL OR method IN ('curl', 'http'));
|
||||||
|
|
||||||
|
-- Index for method-based task claiming
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_method
|
||||||
|
ON worker_tasks(method)
|
||||||
|
WHERE status = 'pending';
|
||||||
|
|
||||||
|
-- Set default method for all existing pending tasks to 'http'
|
||||||
|
-- ALL current tasks require Puppeteer/browser-based transport
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET method = 'http'
|
||||||
|
WHERE method IS NULL;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 3: Update claim_task function for method compatibility
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION claim_task(
|
||||||
|
p_role VARCHAR(50),
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||||
|
p_http_passed BOOLEAN DEFAULT FALSE
|
||||||
|
) RETURNS worker_tasks AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_task worker_tasks;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_tasks
|
||||||
|
WHERE role = p_role
|
||||||
|
AND status = 'pending'
|
||||||
|
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||||
|
-- Method compatibility: worker must have passed the required preflight
|
||||||
|
AND (
|
||||||
|
method IS NULL -- No preference, any worker can claim
|
||||||
|
OR (method = 'curl' AND p_curl_passed = TRUE)
|
||||||
|
OR (method = 'http' AND p_http_passed = TRUE)
|
||||||
|
)
|
||||||
|
-- Exclude stores that already have an active task
|
||||||
|
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||||
|
SELECT dispensary_id FROM worker_tasks
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND dispensary_id IS NOT NULL
|
||||||
|
))
|
||||||
|
ORDER BY priority DESC, created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_task;
|
||||||
|
|
||||||
|
RETURN claimed_task;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 4: Update v_active_workers view
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
DROP VIEW IF EXISTS v_active_workers;
|
||||||
|
|
||||||
|
CREATE VIEW v_active_workers AS
|
||||||
|
SELECT
|
||||||
|
wr.id,
|
||||||
|
wr.worker_id,
|
||||||
|
wr.friendly_name,
|
||||||
|
wr.role,
|
||||||
|
wr.status,
|
||||||
|
wr.pod_name,
|
||||||
|
wr.hostname,
|
||||||
|
wr.started_at,
|
||||||
|
wr.last_heartbeat_at,
|
||||||
|
wr.last_task_at,
|
||||||
|
wr.tasks_completed,
|
||||||
|
wr.tasks_failed,
|
||||||
|
wr.current_task_id,
|
||||||
|
-- Preflight status
|
||||||
|
wr.preflight_curl_status,
|
||||||
|
wr.preflight_http_status,
|
||||||
|
wr.preflight_curl_at,
|
||||||
|
wr.preflight_http_at,
|
||||||
|
wr.preflight_curl_error,
|
||||||
|
wr.preflight_http_error,
|
||||||
|
wr.preflight_curl_ms,
|
||||||
|
wr.preflight_http_ms,
|
||||||
|
-- Computed fields
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
|
CASE
|
||||||
|
WHEN wr.status = 'offline' THEN 'offline'
|
||||||
|
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
|
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||||
|
ELSE 'ready'
|
||||||
|
END as health_status,
|
||||||
|
-- Capability flags (can this worker handle curl/http tasks?)
|
||||||
|
(wr.preflight_curl_status = 'passed') as can_curl,
|
||||||
|
(wr.preflight_http_status = 'passed') as can_http
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.status != 'terminated'
|
||||||
|
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 5: View for task queue with method info
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
DROP VIEW IF EXISTS v_task_history;
|
||||||
|
|
||||||
|
CREATE VIEW v_task_history AS
|
||||||
|
SELECT
|
||||||
|
t.id,
|
||||||
|
t.role,
|
||||||
|
t.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
t.platform,
|
||||||
|
t.status,
|
||||||
|
t.priority,
|
||||||
|
t.method,
|
||||||
|
t.worker_id,
|
||||||
|
t.scheduled_for,
|
||||||
|
t.claimed_at,
|
||||||
|
t.started_at,
|
||||||
|
t.completed_at,
|
||||||
|
t.error_message,
|
||||||
|
t.retry_count,
|
||||||
|
t.created_at,
|
||||||
|
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
|
||||||
|
FROM worker_tasks t
|
||||||
|
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
||||||
|
ORDER BY t.created_at DESC;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 6: Helper function to update worker preflight status
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_transport VARCHAR(10), -- 'curl' or 'http'
|
||||||
|
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
||||||
|
p_response_ms INTEGER DEFAULT NULL,
|
||||||
|
p_error TEXT DEFAULT NULL
|
||||||
|
) RETURNS VOID AS $$
|
||||||
|
BEGIN
|
||||||
|
IF p_transport = 'curl' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_curl_status = p_status,
|
||||||
|
preflight_curl_at = NOW(),
|
||||||
|
preflight_curl_ms = p_response_ms,
|
||||||
|
preflight_curl_error = p_error,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
ELSIF p_transport = 'http' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_http_status = p_status,
|
||||||
|
preflight_http_at = NOW(),
|
||||||
|
preflight_http_ms = p_response_ms,
|
||||||
|
preflight_http_error = p_error,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- Comments
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_curl_status IS 'Status of curl/axios preflight: pending, passed, failed, skipped';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_http_status IS 'Status of http/Puppeteer preflight: pending, passed, failed, skipped';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_curl_at IS 'When curl preflight completed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_http_at IS 'When http preflight completed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_curl_error IS 'Error message if curl preflight failed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_http_error IS 'Error message if http preflight failed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_curl_ms IS 'Response time of successful curl preflight (ms)';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_http_ms IS 'Response time of successful http preflight (ms)';
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_tasks.method IS 'Transport method required: NULL=any, curl=proxy-based, http=browser-based';
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task, respecting method requirements and per-store locking';
|
||||||
|
COMMENT ON FUNCTION update_worker_preflight IS 'Update a workers preflight status for a given transport';
|
||||||
168
backend/migrations/085_preflight_ip_fingerprint.sql
Normal file
168
backend/migrations/085_preflight_ip_fingerprint.sql
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
-- Migration 085: Add IP and fingerprint columns for preflight reporting
|
||||||
|
-- These columns were missing from migration 084
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 1: Add IP address columns to worker_registry
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- IP address detected during curl/axios preflight
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS curl_ip VARCHAR(45);
|
||||||
|
|
||||||
|
-- IP address detected during http/Puppeteer preflight
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS http_ip VARCHAR(45);
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 2: Add fingerprint data column
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- Browser fingerprint data captured during Puppeteer preflight
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS fingerprint_data JSONB;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 3: Add combined preflight status/timestamp for convenience
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- Overall preflight status (computed from both transports)
|
||||||
|
-- Values: 'pending', 'passed', 'partial', 'failed'
|
||||||
|
-- - 'pending': neither transport tested
|
||||||
|
-- - 'passed': both transports passed (or http passed for browser-only)
|
||||||
|
-- - 'partial': at least one passed
|
||||||
|
-- - 'failed': no transport passed
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_status VARCHAR(20) DEFAULT 'pending';
|
||||||
|
|
||||||
|
-- Most recent preflight completion timestamp
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 4: Update function to set preflight status
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_transport VARCHAR(10), -- 'curl' or 'http'
|
||||||
|
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
||||||
|
p_ip VARCHAR(45) DEFAULT NULL,
|
||||||
|
p_response_ms INTEGER DEFAULT NULL,
|
||||||
|
p_error TEXT DEFAULT NULL,
|
||||||
|
p_fingerprint JSONB DEFAULT NULL
|
||||||
|
) RETURNS VOID AS $$
|
||||||
|
DECLARE
|
||||||
|
v_curl_status VARCHAR(20);
|
||||||
|
v_http_status VARCHAR(20);
|
||||||
|
v_overall_status VARCHAR(20);
|
||||||
|
BEGIN
|
||||||
|
IF p_transport = 'curl' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_curl_status = p_status,
|
||||||
|
preflight_curl_at = NOW(),
|
||||||
|
preflight_curl_ms = p_response_ms,
|
||||||
|
preflight_curl_error = p_error,
|
||||||
|
curl_ip = p_ip,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
ELSIF p_transport = 'http' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_http_status = p_status,
|
||||||
|
preflight_http_at = NOW(),
|
||||||
|
preflight_http_ms = p_response_ms,
|
||||||
|
preflight_http_error = p_error,
|
||||||
|
http_ip = p_ip,
|
||||||
|
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Update overall preflight status
|
||||||
|
SELECT preflight_curl_status, preflight_http_status
|
||||||
|
INTO v_curl_status, v_http_status
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- Compute overall status
|
||||||
|
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
|
||||||
|
v_overall_status := 'passed';
|
||||||
|
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
|
||||||
|
v_overall_status := 'partial';
|
||||||
|
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
|
||||||
|
v_overall_status := 'failed';
|
||||||
|
ELSE
|
||||||
|
v_overall_status := 'pending';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_status = v_overall_status,
|
||||||
|
preflight_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 5: Update v_active_workers view
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
DROP VIEW IF EXISTS v_active_workers;
|
||||||
|
|
||||||
|
CREATE VIEW v_active_workers AS
|
||||||
|
SELECT
|
||||||
|
wr.id,
|
||||||
|
wr.worker_id,
|
||||||
|
wr.friendly_name,
|
||||||
|
wr.role,
|
||||||
|
wr.status,
|
||||||
|
wr.pod_name,
|
||||||
|
wr.hostname,
|
||||||
|
wr.started_at,
|
||||||
|
wr.last_heartbeat_at,
|
||||||
|
wr.last_task_at,
|
||||||
|
wr.tasks_completed,
|
||||||
|
wr.tasks_failed,
|
||||||
|
wr.current_task_id,
|
||||||
|
-- IP addresses from preflights
|
||||||
|
wr.curl_ip,
|
||||||
|
wr.http_ip,
|
||||||
|
-- Combined preflight status
|
||||||
|
wr.preflight_status,
|
||||||
|
wr.preflight_at,
|
||||||
|
-- Detailed preflight status per transport
|
||||||
|
wr.preflight_curl_status,
|
||||||
|
wr.preflight_http_status,
|
||||||
|
wr.preflight_curl_at,
|
||||||
|
wr.preflight_http_at,
|
||||||
|
wr.preflight_curl_error,
|
||||||
|
wr.preflight_http_error,
|
||||||
|
wr.preflight_curl_ms,
|
||||||
|
wr.preflight_http_ms,
|
||||||
|
-- Fingerprint data
|
||||||
|
wr.fingerprint_data,
|
||||||
|
-- Computed fields
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
|
CASE
|
||||||
|
WHEN wr.status = 'offline' THEN 'offline'
|
||||||
|
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
|
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||||
|
ELSE 'ready'
|
||||||
|
END as health_status,
|
||||||
|
-- Capability flags (can this worker handle curl/http tasks?)
|
||||||
|
(wr.preflight_curl_status = 'passed') as can_curl,
|
||||||
|
(wr.preflight_http_status = 'passed') as can_http
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.status != 'terminated'
|
||||||
|
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- Comments
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_registry.curl_ip IS 'IP address detected during curl/axios preflight';
|
||||||
|
COMMENT ON COLUMN worker_registry.http_ip IS 'IP address detected during Puppeteer preflight';
|
||||||
|
COMMENT ON COLUMN worker_registry.fingerprint_data IS 'Browser fingerprint captured during Puppeteer preflight';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_status IS 'Overall preflight status: pending, passed, partial, failed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_at IS 'Most recent preflight completion timestamp';
|
||||||
305
backend/node_modules/.package-lock.json
generated
vendored
305
backend/node_modules/.package-lock.json
generated
vendored
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.5.1",
|
"version": "1.6.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
@@ -46,6 +46,97 @@
|
|||||||
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
||||||
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
||||||
},
|
},
|
||||||
|
"node_modules/@jsep-plugin/assignment": {
|
||||||
|
"version": "1.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
|
||||||
|
"integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"jsep": "^0.4.0||^1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@jsep-plugin/regex": {
|
||||||
|
"version": "1.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
|
||||||
|
"integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"jsep": "^0.4.0||^1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/js-yaml": "^4.0.1",
|
||||||
|
"@types/node": "^24.0.0",
|
||||||
|
"@types/node-fetch": "^2.6.13",
|
||||||
|
"@types/stream-buffers": "^3.0.3",
|
||||||
|
"form-data": "^4.0.0",
|
||||||
|
"hpagent": "^1.2.0",
|
||||||
|
"isomorphic-ws": "^5.0.0",
|
||||||
|
"js-yaml": "^4.1.0",
|
||||||
|
"jsonpath-plus": "^10.3.0",
|
||||||
|
"node-fetch": "^2.7.0",
|
||||||
|
"openid-client": "^6.1.3",
|
||||||
|
"rfc4648": "^1.3.0",
|
||||||
|
"socks-proxy-agent": "^8.0.4",
|
||||||
|
"stream-buffers": "^3.0.2",
|
||||||
|
"tar-fs": "^3.0.9",
|
||||||
|
"ws": "^8.18.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/@types/node": {
|
||||||
|
"version": "24.10.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
|
||||||
|
"integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"undici-types": "~7.16.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/tar-fs": {
|
||||||
|
"version": "3.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
|
||||||
|
"integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
|
||||||
|
"dependencies": {
|
||||||
|
"pump": "^3.0.0",
|
||||||
|
"tar-stream": "^3.1.5"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"bare-fs": "^4.0.1",
|
||||||
|
"bare-path": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/undici-types": {
|
||||||
|
"version": "7.16.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||||
|
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/ws": {
|
||||||
|
"version": "8.18.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
||||||
|
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bufferutil": "^4.0.1",
|
||||||
|
"utf-8-validate": ">=5.0.2"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bufferutil": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"utf-8-validate": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@mapbox/node-pre-gyp": {
|
"node_modules/@mapbox/node-pre-gyp": {
|
||||||
"version": "1.0.11",
|
"version": "1.0.11",
|
||||||
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
||||||
@@ -251,6 +342,11 @@
|
|||||||
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/js-yaml": {
|
||||||
|
"version": "4.0.9",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
||||||
|
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
|
||||||
|
},
|
||||||
"node_modules/@types/jsonwebtoken": {
|
"node_modules/@types/jsonwebtoken": {
|
||||||
"version": "9.0.10",
|
"version": "9.0.10",
|
||||||
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
||||||
@@ -276,7 +372,6 @@
|
|||||||
"version": "20.19.25",
|
"version": "20.19.25",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
||||||
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
||||||
"devOptional": true,
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~6.21.0"
|
"undici-types": "~6.21.0"
|
||||||
}
|
}
|
||||||
@@ -287,6 +382,15 @@
|
|||||||
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/node-fetch": {
|
||||||
|
"version": "2.6.13",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
|
||||||
|
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*",
|
||||||
|
"form-data": "^4.0.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/pg": {
|
"node_modules/@types/pg": {
|
||||||
"version": "8.15.6",
|
"version": "8.15.6",
|
||||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
||||||
@@ -340,6 +444,14 @@
|
|||||||
"@types/node": "*"
|
"@types/node": "*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/stream-buffers": {
|
||||||
|
"version": "3.0.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
|
||||||
|
"integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/uuid": {
|
"node_modules/@types/uuid": {
|
||||||
"version": "9.0.8",
|
"version": "9.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||||
@@ -520,6 +632,78 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/bare-fs": {
|
||||||
|
"version": "4.5.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
|
||||||
|
"integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-events": "^2.5.4",
|
||||||
|
"bare-path": "^3.0.0",
|
||||||
|
"bare-stream": "^2.6.4",
|
||||||
|
"bare-url": "^2.2.2",
|
||||||
|
"fast-fifo": "^1.3.2"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"bare": ">=1.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bare-buffer": "*"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bare-buffer": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-os": {
|
||||||
|
"version": "3.6.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
|
||||||
|
"integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
|
||||||
|
"optional": true,
|
||||||
|
"engines": {
|
||||||
|
"bare": ">=1.14.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-path": {
|
||||||
|
"version": "3.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
|
||||||
|
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-os": "^3.0.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-stream": {
|
||||||
|
"version": "2.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
|
||||||
|
"integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"streamx": "^2.21.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bare-buffer": "*",
|
||||||
|
"bare-events": "*"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bare-buffer": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"bare-events": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-url": {
|
||||||
|
"version": "2.3.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
|
||||||
|
"integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-path": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/base64-js": {
|
"node_modules/base64-js": {
|
||||||
"version": "1.5.1",
|
"version": "1.5.1",
|
||||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||||
@@ -1026,6 +1210,17 @@
|
|||||||
"url": "https://github.com/sponsors/fb55"
|
"url": "https://github.com/sponsors/fb55"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/csv-parser": {
|
||||||
|
"version": "3.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
|
||||||
|
"integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
|
||||||
|
"bin": {
|
||||||
|
"csv-parser": "bin/csv-parser"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/data-uri-to-buffer": {
|
"node_modules/data-uri-to-buffer": {
|
||||||
"version": "6.0.2",
|
"version": "6.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
||||||
@@ -2008,6 +2203,14 @@
|
|||||||
"node": ">=16.0.0"
|
"node": ">=16.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/hpagent": {
|
||||||
|
"version": "1.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
|
||||||
|
"integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=14"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/htmlparser2": {
|
"node_modules/htmlparser2": {
|
||||||
"version": "10.0.0",
|
"version": "10.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
||||||
@@ -2235,6 +2438,14 @@
|
|||||||
"node": ">= 12"
|
"node": ">= 12"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/ip2location-nodejs": {
|
||||||
|
"version": "9.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/ip2location-nodejs/-/ip2location-nodejs-9.7.0.tgz",
|
||||||
|
"integrity": "sha512-eQ4T5TXm1cx0+pQcRycPiuaiRuoDEMd9O89Be7Ugk555qi9UY9enXSznkkqr3kQRyUaXx7zj5dORC5LGTPOttA==",
|
||||||
|
"dependencies": {
|
||||||
|
"csv-parser": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/ipaddr.js": {
|
"node_modules/ipaddr.js": {
|
||||||
"version": "2.2.0",
|
"version": "2.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
||||||
@@ -2363,6 +2574,22 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/isomorphic-ws": {
|
||||||
|
"version": "5.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
|
||||||
|
"integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
|
||||||
|
"peerDependencies": {
|
||||||
|
"ws": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/jose": {
|
||||||
|
"version": "6.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
|
||||||
|
"integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/js-tokens": {
|
"node_modules/js-tokens": {
|
||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||||
@@ -2379,6 +2606,14 @@
|
|||||||
"js-yaml": "bin/js-yaml.js"
|
"js-yaml": "bin/js-yaml.js"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/jsep": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/json-parse-even-better-errors": {
|
"node_modules/json-parse-even-better-errors": {
|
||||||
"version": "2.3.1",
|
"version": "2.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
||||||
@@ -2400,6 +2635,23 @@
|
|||||||
"graceful-fs": "^4.1.6"
|
"graceful-fs": "^4.1.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/jsonpath-plus": {
|
||||||
|
"version": "10.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
|
||||||
|
"integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
|
||||||
|
"dependencies": {
|
||||||
|
"@jsep-plugin/assignment": "^1.3.0",
|
||||||
|
"@jsep-plugin/regex": "^1.0.4",
|
||||||
|
"jsep": "^1.4.0"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"jsonpath": "bin/jsonpath-cli.js",
|
||||||
|
"jsonpath-plus": "bin/jsonpath-cli.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/jsonwebtoken": {
|
"node_modules/jsonwebtoken": {
|
||||||
"version": "9.0.2",
|
"version": "9.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
||||||
@@ -2474,6 +2726,11 @@
|
|||||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/lodash.clonedeep": {
|
||||||
|
"version": "4.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
|
||||||
|
"integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
|
||||||
|
},
|
||||||
"node_modules/lodash.defaults": {
|
"node_modules/lodash.defaults": {
|
||||||
"version": "4.2.0",
|
"version": "4.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||||
@@ -2923,6 +3180,14 @@
|
|||||||
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/oauth4webapi": {
|
||||||
|
"version": "3.8.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
|
||||||
|
"integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/object-assign": {
|
"node_modules/object-assign": {
|
||||||
"version": "4.1.1",
|
"version": "4.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||||
@@ -2961,6 +3226,18 @@
|
|||||||
"wrappy": "1"
|
"wrappy": "1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/openid-client": {
|
||||||
|
"version": "6.8.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
|
||||||
|
"integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
|
||||||
|
"dependencies": {
|
||||||
|
"jose": "^6.1.0",
|
||||||
|
"oauth4webapi": "^3.8.2"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/pac-proxy-agent": {
|
"node_modules/pac-proxy-agent": {
|
||||||
"version": "7.2.0",
|
"version": "7.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||||
@@ -3864,6 +4141,11 @@
|
|||||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/rfc4648": {
|
||||||
|
"version": "1.5.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
|
||||||
|
"integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
|
||||||
|
},
|
||||||
"node_modules/rimraf": {
|
"node_modules/rimraf": {
|
||||||
"version": "3.0.2",
|
"version": "3.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
||||||
@@ -4294,6 +4576,14 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/stream-buffers": {
|
||||||
|
"version": "3.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
|
||||||
|
"integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.10.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/streamx": {
|
"node_modules/streamx": {
|
||||||
"version": "2.23.0",
|
"version": "2.23.0",
|
||||||
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
||||||
@@ -4513,8 +4803,7 @@
|
|||||||
"node_modules/undici-types": {
|
"node_modules/undici-types": {
|
||||||
"version": "6.21.0",
|
"version": "6.21.0",
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
|
||||||
"devOptional": true
|
|
||||||
},
|
},
|
||||||
"node_modules/universalify": {
|
"node_modules/universalify": {
|
||||||
"version": "2.0.1",
|
"version": "2.0.1",
|
||||||
@@ -4537,6 +4826,14 @@
|
|||||||
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
||||||
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/user-agents": {
|
||||||
|
"version": "1.1.669",
|
||||||
|
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
|
||||||
|
"integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
|
||||||
|
"dependencies": {
|
||||||
|
"lodash.clonedeep": "^4.5.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/util": {
|
"node_modules/util": {
|
||||||
"version": "0.12.5",
|
"version": "0.12.5",
|
||||||
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
||||||
|
|||||||
310
backend/package-lock.json
generated
310
backend/package-lock.json
generated
@@ -1,13 +1,14 @@
|
|||||||
{
|
{
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.5.1",
|
"version": "1.6.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.5.1",
|
"version": "1.6.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@kubernetes/client-node": "^1.4.0",
|
||||||
"@types/bcryptjs": "^3.0.0",
|
"@types/bcryptjs": "^3.0.0",
|
||||||
"axios": "^1.6.2",
|
"axios": "^1.6.2",
|
||||||
"bcrypt": "^5.1.1",
|
"bcrypt": "^5.1.1",
|
||||||
@@ -21,6 +22,7 @@
|
|||||||
"helmet": "^7.1.0",
|
"helmet": "^7.1.0",
|
||||||
"https-proxy-agent": "^7.0.2",
|
"https-proxy-agent": "^7.0.2",
|
||||||
"ioredis": "^5.8.2",
|
"ioredis": "^5.8.2",
|
||||||
|
"ip2location-nodejs": "^9.7.0",
|
||||||
"ipaddr.js": "^2.2.0",
|
"ipaddr.js": "^2.2.0",
|
||||||
"jsonwebtoken": "^9.0.2",
|
"jsonwebtoken": "^9.0.2",
|
||||||
"minio": "^7.1.3",
|
"minio": "^7.1.3",
|
||||||
@@ -33,6 +35,7 @@
|
|||||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||||
"sharp": "^0.32.0",
|
"sharp": "^0.32.0",
|
||||||
"socks-proxy-agent": "^8.0.2",
|
"socks-proxy-agent": "^8.0.2",
|
||||||
|
"user-agents": "^1.1.669",
|
||||||
"uuid": "^9.0.1",
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.22.4"
|
"zod": "^3.22.4"
|
||||||
},
|
},
|
||||||
@@ -491,6 +494,97 @@
|
|||||||
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
||||||
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
||||||
},
|
},
|
||||||
|
"node_modules/@jsep-plugin/assignment": {
|
||||||
|
"version": "1.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
|
||||||
|
"integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"jsep": "^0.4.0||^1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@jsep-plugin/regex": {
|
||||||
|
"version": "1.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
|
||||||
|
"integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"jsep": "^0.4.0||^1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/js-yaml": "^4.0.1",
|
||||||
|
"@types/node": "^24.0.0",
|
||||||
|
"@types/node-fetch": "^2.6.13",
|
||||||
|
"@types/stream-buffers": "^3.0.3",
|
||||||
|
"form-data": "^4.0.0",
|
||||||
|
"hpagent": "^1.2.0",
|
||||||
|
"isomorphic-ws": "^5.0.0",
|
||||||
|
"js-yaml": "^4.1.0",
|
||||||
|
"jsonpath-plus": "^10.3.0",
|
||||||
|
"node-fetch": "^2.7.0",
|
||||||
|
"openid-client": "^6.1.3",
|
||||||
|
"rfc4648": "^1.3.0",
|
||||||
|
"socks-proxy-agent": "^8.0.4",
|
||||||
|
"stream-buffers": "^3.0.2",
|
||||||
|
"tar-fs": "^3.0.9",
|
||||||
|
"ws": "^8.18.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/@types/node": {
|
||||||
|
"version": "24.10.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
|
||||||
|
"integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"undici-types": "~7.16.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/tar-fs": {
|
||||||
|
"version": "3.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
|
||||||
|
"integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
|
||||||
|
"dependencies": {
|
||||||
|
"pump": "^3.0.0",
|
||||||
|
"tar-stream": "^3.1.5"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"bare-fs": "^4.0.1",
|
||||||
|
"bare-path": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/undici-types": {
|
||||||
|
"version": "7.16.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||||
|
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/ws": {
|
||||||
|
"version": "8.18.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
||||||
|
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bufferutil": "^4.0.1",
|
||||||
|
"utf-8-validate": ">=5.0.2"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bufferutil": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"utf-8-validate": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@mapbox/node-pre-gyp": {
|
"node_modules/@mapbox/node-pre-gyp": {
|
||||||
"version": "1.0.11",
|
"version": "1.0.11",
|
||||||
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
||||||
@@ -756,6 +850,11 @@
|
|||||||
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/js-yaml": {
|
||||||
|
"version": "4.0.9",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
||||||
|
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
|
||||||
|
},
|
||||||
"node_modules/@types/jsonwebtoken": {
|
"node_modules/@types/jsonwebtoken": {
|
||||||
"version": "9.0.10",
|
"version": "9.0.10",
|
||||||
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
||||||
@@ -781,7 +880,6 @@
|
|||||||
"version": "20.19.25",
|
"version": "20.19.25",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
||||||
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
||||||
"devOptional": true,
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~6.21.0"
|
"undici-types": "~6.21.0"
|
||||||
}
|
}
|
||||||
@@ -792,6 +890,15 @@
|
|||||||
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/node-fetch": {
|
||||||
|
"version": "2.6.13",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
|
||||||
|
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*",
|
||||||
|
"form-data": "^4.0.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/pg": {
|
"node_modules/@types/pg": {
|
||||||
"version": "8.15.6",
|
"version": "8.15.6",
|
||||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
||||||
@@ -845,6 +952,14 @@
|
|||||||
"@types/node": "*"
|
"@types/node": "*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/stream-buffers": {
|
||||||
|
"version": "3.0.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
|
||||||
|
"integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/uuid": {
|
"node_modules/@types/uuid": {
|
||||||
"version": "9.0.8",
|
"version": "9.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||||
@@ -1025,6 +1140,78 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/bare-fs": {
|
||||||
|
"version": "4.5.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
|
||||||
|
"integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-events": "^2.5.4",
|
||||||
|
"bare-path": "^3.0.0",
|
||||||
|
"bare-stream": "^2.6.4",
|
||||||
|
"bare-url": "^2.2.2",
|
||||||
|
"fast-fifo": "^1.3.2"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"bare": ">=1.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bare-buffer": "*"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bare-buffer": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-os": {
|
||||||
|
"version": "3.6.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
|
||||||
|
"integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
|
||||||
|
"optional": true,
|
||||||
|
"engines": {
|
||||||
|
"bare": ">=1.14.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-path": {
|
||||||
|
"version": "3.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
|
||||||
|
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-os": "^3.0.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-stream": {
|
||||||
|
"version": "2.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
|
||||||
|
"integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"streamx": "^2.21.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bare-buffer": "*",
|
||||||
|
"bare-events": "*"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bare-buffer": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"bare-events": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-url": {
|
||||||
|
"version": "2.3.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
|
||||||
|
"integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-path": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/base64-js": {
|
"node_modules/base64-js": {
|
||||||
"version": "1.5.1",
|
"version": "1.5.1",
|
||||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||||
@@ -1531,6 +1718,17 @@
|
|||||||
"url": "https://github.com/sponsors/fb55"
|
"url": "https://github.com/sponsors/fb55"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/csv-parser": {
|
||||||
|
"version": "3.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
|
||||||
|
"integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
|
||||||
|
"bin": {
|
||||||
|
"csv-parser": "bin/csv-parser"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/data-uri-to-buffer": {
|
"node_modules/data-uri-to-buffer": {
|
||||||
"version": "6.0.2",
|
"version": "6.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
||||||
@@ -2527,6 +2725,14 @@
|
|||||||
"node": ">=16.0.0"
|
"node": ">=16.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/hpagent": {
|
||||||
|
"version": "1.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
|
||||||
|
"integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=14"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/htmlparser2": {
|
"node_modules/htmlparser2": {
|
||||||
"version": "10.0.0",
|
"version": "10.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
||||||
@@ -2754,6 +2960,14 @@
|
|||||||
"node": ">= 12"
|
"node": ">= 12"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/ip2location-nodejs": {
|
||||||
|
"version": "9.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/ip2location-nodejs/-/ip2location-nodejs-9.7.0.tgz",
|
||||||
|
"integrity": "sha512-eQ4T5TXm1cx0+pQcRycPiuaiRuoDEMd9O89Be7Ugk555qi9UY9enXSznkkqr3kQRyUaXx7zj5dORC5LGTPOttA==",
|
||||||
|
"dependencies": {
|
||||||
|
"csv-parser": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/ipaddr.js": {
|
"node_modules/ipaddr.js": {
|
||||||
"version": "2.2.0",
|
"version": "2.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
||||||
@@ -2882,6 +3096,22 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/isomorphic-ws": {
|
||||||
|
"version": "5.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
|
||||||
|
"integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
|
||||||
|
"peerDependencies": {
|
||||||
|
"ws": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/jose": {
|
||||||
|
"version": "6.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
|
||||||
|
"integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/js-tokens": {
|
"node_modules/js-tokens": {
|
||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||||
@@ -2898,6 +3128,14 @@
|
|||||||
"js-yaml": "bin/js-yaml.js"
|
"js-yaml": "bin/js-yaml.js"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/jsep": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/json-parse-even-better-errors": {
|
"node_modules/json-parse-even-better-errors": {
|
||||||
"version": "2.3.1",
|
"version": "2.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
||||||
@@ -2919,6 +3157,23 @@
|
|||||||
"graceful-fs": "^4.1.6"
|
"graceful-fs": "^4.1.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/jsonpath-plus": {
|
||||||
|
"version": "10.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
|
||||||
|
"integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
|
||||||
|
"dependencies": {
|
||||||
|
"@jsep-plugin/assignment": "^1.3.0",
|
||||||
|
"@jsep-plugin/regex": "^1.0.4",
|
||||||
|
"jsep": "^1.4.0"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"jsonpath": "bin/jsonpath-cli.js",
|
||||||
|
"jsonpath-plus": "bin/jsonpath-cli.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/jsonwebtoken": {
|
"node_modules/jsonwebtoken": {
|
||||||
"version": "9.0.2",
|
"version": "9.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
||||||
@@ -2993,6 +3248,11 @@
|
|||||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/lodash.clonedeep": {
|
||||||
|
"version": "4.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
|
||||||
|
"integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
|
||||||
|
},
|
||||||
"node_modules/lodash.defaults": {
|
"node_modules/lodash.defaults": {
|
||||||
"version": "4.2.0",
|
"version": "4.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||||
@@ -3442,6 +3702,14 @@
|
|||||||
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/oauth4webapi": {
|
||||||
|
"version": "3.8.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
|
||||||
|
"integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/object-assign": {
|
"node_modules/object-assign": {
|
||||||
"version": "4.1.1",
|
"version": "4.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||||
@@ -3480,6 +3748,18 @@
|
|||||||
"wrappy": "1"
|
"wrappy": "1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/openid-client": {
|
||||||
|
"version": "6.8.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
|
||||||
|
"integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
|
||||||
|
"dependencies": {
|
||||||
|
"jose": "^6.1.0",
|
||||||
|
"oauth4webapi": "^3.8.2"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/pac-proxy-agent": {
|
"node_modules/pac-proxy-agent": {
|
||||||
"version": "7.2.0",
|
"version": "7.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||||
@@ -4396,6 +4676,11 @@
|
|||||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/rfc4648": {
|
||||||
|
"version": "1.5.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
|
||||||
|
"integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
|
||||||
|
},
|
||||||
"node_modules/rimraf": {
|
"node_modules/rimraf": {
|
||||||
"version": "3.0.2",
|
"version": "3.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
||||||
@@ -4826,6 +5111,14 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/stream-buffers": {
|
||||||
|
"version": "3.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
|
||||||
|
"integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.10.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/streamx": {
|
"node_modules/streamx": {
|
||||||
"version": "2.23.0",
|
"version": "2.23.0",
|
||||||
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
||||||
@@ -5045,8 +5338,7 @@
|
|||||||
"node_modules/undici-types": {
|
"node_modules/undici-types": {
|
||||||
"version": "6.21.0",
|
"version": "6.21.0",
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
|
||||||
"devOptional": true
|
|
||||||
},
|
},
|
||||||
"node_modules/universalify": {
|
"node_modules/universalify": {
|
||||||
"version": "2.0.1",
|
"version": "2.0.1",
|
||||||
@@ -5069,6 +5361,14 @@
|
|||||||
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
||||||
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/user-agents": {
|
||||||
|
"version": "1.1.669",
|
||||||
|
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
|
||||||
|
"integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
|
||||||
|
"dependencies": {
|
||||||
|
"lodash.clonedeep": "^4.5.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/util": {
|
"node_modules/util": {
|
||||||
"version": "0.12.5",
|
"version": "0.12.5",
|
||||||
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.5.1",
|
"version": "1.6.0",
|
||||||
"description": "Backend API for Dutchie Menus scraper and management",
|
"description": "Backend API for Dutchie Menus scraper and management",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
@@ -22,6 +22,7 @@
|
|||||||
"seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
|
"seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@kubernetes/client-node": "^1.4.0",
|
||||||
"@types/bcryptjs": "^3.0.0",
|
"@types/bcryptjs": "^3.0.0",
|
||||||
"axios": "^1.6.2",
|
"axios": "^1.6.2",
|
||||||
"bcrypt": "^5.1.1",
|
"bcrypt": "^5.1.1",
|
||||||
@@ -35,6 +36,7 @@
|
|||||||
"helmet": "^7.1.0",
|
"helmet": "^7.1.0",
|
||||||
"https-proxy-agent": "^7.0.2",
|
"https-proxy-agent": "^7.0.2",
|
||||||
"ioredis": "^5.8.2",
|
"ioredis": "^5.8.2",
|
||||||
|
"ip2location-nodejs": "^9.7.0",
|
||||||
"ipaddr.js": "^2.2.0",
|
"ipaddr.js": "^2.2.0",
|
||||||
"jsonwebtoken": "^9.0.2",
|
"jsonwebtoken": "^9.0.2",
|
||||||
"minio": "^7.1.3",
|
"minio": "^7.1.3",
|
||||||
@@ -47,6 +49,7 @@
|
|||||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||||
"sharp": "^0.32.0",
|
"sharp": "^0.32.0",
|
||||||
"socks-proxy-agent": "^8.0.2",
|
"socks-proxy-agent": "^8.0.2",
|
||||||
|
"user-agents": "^1.1.669",
|
||||||
"uuid": "^9.0.1",
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.22.4"
|
"zod": "^3.22.4"
|
||||||
},
|
},
|
||||||
|
|||||||
BIN
backend/public/downloads/cannaiq-menus-1.5.4.zip
Normal file
BIN
backend/public/downloads/cannaiq-menus-1.5.4.zip
Normal file
Binary file not shown.
BIN
backend/public/downloads/cannaiq-menus-1.6.0.zip
Normal file
BIN
backend/public/downloads/cannaiq-menus-1.6.0.zip
Normal file
Binary file not shown.
1
backend/public/downloads/cannaiq-menus-latest.zip
Symbolic link
1
backend/public/downloads/cannaiq-menus-latest.zip
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
cannaiq-menus-1.6.0.zip
|
||||||
65
backend/scripts/download-ip2location.sh
Executable file
65
backend/scripts/download-ip2location.sh
Executable file
@@ -0,0 +1,65 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Download IP2Location LITE DB3 (City-level) database
|
||||||
|
# Free for commercial use with attribution
|
||||||
|
# https://lite.ip2location.com/database/db3-ip-country-region-city
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
DATA_DIR="${1:-./data/ip2location}"
|
||||||
|
DB_FILE="IP2LOCATION-LITE-DB3.BIN"
|
||||||
|
|
||||||
|
mkdir -p "$DATA_DIR"
|
||||||
|
cd "$DATA_DIR"
|
||||||
|
|
||||||
|
echo "Downloading IP2Location LITE DB3 database..."
|
||||||
|
|
||||||
|
# IP2Location LITE DB3 - includes city, region, country, lat/lng
|
||||||
|
# You need to register at https://lite.ip2location.com/ to get a download token
|
||||||
|
# Then set IP2LOCATION_TOKEN environment variable
|
||||||
|
|
||||||
|
if [ -z "$IP2LOCATION_TOKEN" ]; then
|
||||||
|
echo ""
|
||||||
|
echo "ERROR: IP2LOCATION_TOKEN not set"
|
||||||
|
echo ""
|
||||||
|
echo "To download the database:"
|
||||||
|
echo "1. Register free at https://lite.ip2location.com/"
|
||||||
|
echo "2. Get your download token from the dashboard"
|
||||||
|
echo "3. Run: IP2LOCATION_TOKEN=your_token ./scripts/download-ip2location.sh"
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Download DB3.LITE (IPv4 + City)
|
||||||
|
DOWNLOAD_URL="https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB3LITEBIN"
|
||||||
|
|
||||||
|
echo "Downloading from IP2Location..."
|
||||||
|
curl -L -o ip2location.zip "$DOWNLOAD_URL"
|
||||||
|
|
||||||
|
echo "Extracting..."
|
||||||
|
unzip -o ip2location.zip
|
||||||
|
|
||||||
|
# Rename to standard name
|
||||||
|
if [ -f "IP2LOCATION-LITE-DB3.BIN" ]; then
|
||||||
|
echo "Database ready: $DATA_DIR/IP2LOCATION-LITE-DB3.BIN"
|
||||||
|
elif [ -f "IP-COUNTRY-REGION-CITY.BIN" ]; then
|
||||||
|
mv "IP-COUNTRY-REGION-CITY.BIN" "$DB_FILE"
|
||||||
|
echo "Database ready: $DATA_DIR/$DB_FILE"
|
||||||
|
else
|
||||||
|
# Find whatever BIN file was extracted
|
||||||
|
BIN_FILE=$(ls *.BIN 2>/dev/null | head -1)
|
||||||
|
if [ -n "$BIN_FILE" ]; then
|
||||||
|
mv "$BIN_FILE" "$DB_FILE"
|
||||||
|
echo "Database ready: $DATA_DIR/$DB_FILE"
|
||||||
|
else
|
||||||
|
echo "ERROR: No BIN file found in archive"
|
||||||
|
ls -la
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
rm -f ip2location.zip *.txt LICENSE* README*
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Done! Database saved to: $DATA_DIR/$DB_FILE"
|
||||||
|
echo "Update monthly by re-running this script."
|
||||||
46
backend/src/_deprecated/DONT_USE.md
Normal file
46
backend/src/_deprecated/DONT_USE.md
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# DEPRECATED CODE - DO NOT USE
|
||||||
|
|
||||||
|
**These directories contain OLD, ABANDONED code.**
|
||||||
|
|
||||||
|
## What's Here
|
||||||
|
|
||||||
|
| Directory | What It Was | Why Deprecated |
|
||||||
|
|-----------|-------------|----------------|
|
||||||
|
| `hydration/` | Old pipeline for processing crawl data | Replaced by `src/tasks/handlers/` |
|
||||||
|
| `scraper-v2/` | Old Puppeteer-based scraper engine | Replaced by curl-based `src/platforms/dutchie/client.ts` |
|
||||||
|
| `canonical-hydration/` | Intermediate step toward canonical schema | Merged into task handlers |
|
||||||
|
|
||||||
|
## What to Use Instead
|
||||||
|
|
||||||
|
| Old (DONT USE) | New (USE THIS) |
|
||||||
|
|----------------|----------------|
|
||||||
|
| `hydration/normalizers/dutchie.ts` | `src/tasks/handlers/product-refresh.ts` |
|
||||||
|
| `hydration/producer.ts` | `src/tasks/handlers/payload-fetch.ts` |
|
||||||
|
| `scraper-v2/engine.ts` | `src/platforms/dutchie/client.ts` |
|
||||||
|
| `scraper-v2/scheduler.ts` | `src/services/task-scheduler.ts` |
|
||||||
|
|
||||||
|
## Why Keep This Code?
|
||||||
|
|
||||||
|
- Historical reference only
|
||||||
|
- Some patterns may be useful for debugging
|
||||||
|
- Will be deleted once confirmed not needed
|
||||||
|
|
||||||
|
## Claude Instructions
|
||||||
|
|
||||||
|
**IF YOU ARE CLAUDE:**
|
||||||
|
|
||||||
|
1. NEVER import from `src/_deprecated/`
|
||||||
|
2. NEVER reference these files as examples
|
||||||
|
3. NEVER try to "fix" or "update" code in here
|
||||||
|
4. If you see imports from these directories, suggest replacing them
|
||||||
|
|
||||||
|
**Correct imports:**
|
||||||
|
```typescript
|
||||||
|
// GOOD
|
||||||
|
import { executeGraphQL } from '../platforms/dutchie/client';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
|
||||||
|
// BAD - DO NOT USE
|
||||||
|
import { something } from '../_deprecated/hydration/...';
|
||||||
|
import { something } from '../_deprecated/scraper-v2/...';
|
||||||
|
```
|
||||||
353
backend/src/_deprecated/scraper-v2/canonical-pipeline.ts
Normal file
353
backend/src/_deprecated/scraper-v2/canonical-pipeline.ts
Normal file
@@ -0,0 +1,353 @@
|
|||||||
|
/**
|
||||||
|
* Canonical Database Pipeline
|
||||||
|
*
|
||||||
|
* Writes scraped products to the canonical tables:
|
||||||
|
* - store_products (current state)
|
||||||
|
* - store_product_snapshots (historical)
|
||||||
|
* - product_variants (per-weight pricing)
|
||||||
|
* - product_variant_snapshots (variant history)
|
||||||
|
*
|
||||||
|
* This replaces the legacy DatabasePipeline that wrote to `products` table.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { ItemPipeline, Product } from './types';
|
||||||
|
import { logger } from '../services/logger';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
import { v4 as uuidv4 } from 'uuid';
|
||||||
|
|
||||||
|
interface VariantData {
|
||||||
|
option: string;
|
||||||
|
priceRec: number | null;
|
||||||
|
priceMed: number | null;
|
||||||
|
priceRecSpecial: number | null;
|
||||||
|
priceMedSpecial: number | null;
|
||||||
|
quantity: number | null;
|
||||||
|
inStock: boolean;
|
||||||
|
isOnSpecial: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse weight string like "1g", "3.5g", "1/8oz" into value and unit
|
||||||
|
*/
|
||||||
|
function parseWeight(option: string): { value: number | null; unit: string | null } {
|
||||||
|
if (!option) return { value: null, unit: null };
|
||||||
|
|
||||||
|
// Match patterns like "1g", "3.5g", "1/8oz", "100mg"
|
||||||
|
const match = option.match(/^([\d.\/]+)\s*(g|oz|mg|ml|each|pk|ct)?$/i);
|
||||||
|
if (!match) return { value: null, unit: null };
|
||||||
|
|
||||||
|
let value: number | null = null;
|
||||||
|
const rawValue = match[1];
|
||||||
|
const unit = match[2]?.toLowerCase() || null;
|
||||||
|
|
||||||
|
// Handle fractions like "1/8"
|
||||||
|
if (rawValue.includes('/')) {
|
||||||
|
const [num, denom] = rawValue.split('/');
|
||||||
|
value = parseFloat(num) / parseFloat(denom);
|
||||||
|
} else {
|
||||||
|
value = parseFloat(rawValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isNaN(value)) value = null;
|
||||||
|
|
||||||
|
return { value, unit };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Canonical Database Pipeline - saves items to canonical tables
|
||||||
|
*
|
||||||
|
* TABLES:
|
||||||
|
* - store_products: Current product state per store
|
||||||
|
* - store_product_snapshots: Historical snapshot per crawl
|
||||||
|
* - product_variants: Current variant state (per-weight pricing)
|
||||||
|
* - product_variant_snapshots: Historical variant snapshots
|
||||||
|
*/
|
||||||
|
export class CanonicalDatabasePipeline implements ItemPipeline<Product> {
|
||||||
|
name = 'CanonicalDatabasePipeline';
|
||||||
|
priority = 10; // Low priority - runs last
|
||||||
|
|
||||||
|
private crawlRunId: number | null = null;
|
||||||
|
|
||||||
|
setCrawlRunId(id: number): void {
|
||||||
|
this.crawlRunId = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
async process(item: Product, spider: string): Promise<Product | null> {
|
||||||
|
const client = await pool.connect();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Extract metadata set by spider
|
||||||
|
const dispensaryId = (item as any).dispensaryId;
|
||||||
|
const categoryName = (item as any).categoryName;
|
||||||
|
const variants: VariantData[] = (item as any).variants || [];
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
logger.error('pipeline', `Missing dispensaryId for ${item.name}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const externalProductId = item.dutchieProductId || null;
|
||||||
|
const provider = 'dutchie';
|
||||||
|
|
||||||
|
// Determine stock status
|
||||||
|
const isInStock = (item as any).inStock !== false;
|
||||||
|
const stockQuantity = (item as any).stockQuantity || null;
|
||||||
|
|
||||||
|
// Extract pricing
|
||||||
|
const priceRec = item.price || null;
|
||||||
|
const priceMed = (item as any).priceMed || null;
|
||||||
|
|
||||||
|
let storeProductId: number | null = null;
|
||||||
|
let isNewProduct = false;
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// UPSERT store_products
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
const upsertResult = await client.query(`
|
||||||
|
INSERT INTO store_products (
|
||||||
|
dispensary_id, provider, provider_product_id,
|
||||||
|
name_raw, brand_name_raw, category_raw,
|
||||||
|
price_rec, price_med,
|
||||||
|
thc_percent, cbd_percent,
|
||||||
|
is_in_stock, stock_quantity,
|
||||||
|
image_url, source_url,
|
||||||
|
raw_data,
|
||||||
|
first_seen_at, last_seen_at,
|
||||||
|
created_at, updated_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2, $3,
|
||||||
|
$4, $5, $6,
|
||||||
|
$7, $8,
|
||||||
|
$9, $10,
|
||||||
|
$11, $12,
|
||||||
|
$13, $14,
|
||||||
|
$15,
|
||||||
|
NOW(), NOW(),
|
||||||
|
NOW(), NOW()
|
||||||
|
)
|
||||||
|
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
||||||
|
DO UPDATE SET
|
||||||
|
name_raw = EXCLUDED.name_raw,
|
||||||
|
brand_name_raw = EXCLUDED.brand_name_raw,
|
||||||
|
category_raw = EXCLUDED.category_raw,
|
||||||
|
price_rec = EXCLUDED.price_rec,
|
||||||
|
price_med = EXCLUDED.price_med,
|
||||||
|
thc_percent = EXCLUDED.thc_percent,
|
||||||
|
cbd_percent = EXCLUDED.cbd_percent,
|
||||||
|
is_in_stock = EXCLUDED.is_in_stock,
|
||||||
|
stock_quantity = EXCLUDED.stock_quantity,
|
||||||
|
image_url = COALESCE(EXCLUDED.image_url, store_products.image_url),
|
||||||
|
source_url = EXCLUDED.source_url,
|
||||||
|
raw_data = EXCLUDED.raw_data,
|
||||||
|
last_seen_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
RETURNING id, (xmax = 0) as is_new
|
||||||
|
`, [
|
||||||
|
dispensaryId, provider, externalProductId,
|
||||||
|
item.name, item.brand || null, categoryName || null,
|
||||||
|
priceRec, priceMed,
|
||||||
|
item.thcPercentage || null, item.cbdPercentage || null,
|
||||||
|
isInStock, stockQuantity,
|
||||||
|
item.imageUrl || null, item.dutchieUrl || null,
|
||||||
|
JSON.stringify(item.metadata || {}),
|
||||||
|
]);
|
||||||
|
|
||||||
|
storeProductId = upsertResult.rows[0].id;
|
||||||
|
isNewProduct = upsertResult.rows[0].is_new;
|
||||||
|
|
||||||
|
logger.debug('pipeline', `${isNewProduct ? 'Inserted' : 'Updated'} canonical product: ${item.name} (ID: ${storeProductId})`);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// INSERT store_product_snapshots
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
await client.query(`
|
||||||
|
INSERT INTO store_product_snapshots (
|
||||||
|
store_product_id, dispensary_id, crawl_run_id,
|
||||||
|
price_rec, price_med,
|
||||||
|
is_in_stock, stock_quantity,
|
||||||
|
is_present_in_feed,
|
||||||
|
captured_at, created_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2, $3,
|
||||||
|
$4, $5,
|
||||||
|
$6, $7,
|
||||||
|
TRUE,
|
||||||
|
NOW(), NOW()
|
||||||
|
)
|
||||||
|
ON CONFLICT (store_product_id, crawl_run_id) WHERE crawl_run_id IS NOT NULL
|
||||||
|
DO UPDATE SET
|
||||||
|
price_rec = EXCLUDED.price_rec,
|
||||||
|
price_med = EXCLUDED.price_med,
|
||||||
|
is_in_stock = EXCLUDED.is_in_stock,
|
||||||
|
stock_quantity = EXCLUDED.stock_quantity
|
||||||
|
`, [
|
||||||
|
storeProductId, dispensaryId, this.crawlRunId,
|
||||||
|
priceRec, priceMed,
|
||||||
|
isInStock, stockQuantity,
|
||||||
|
]);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// UPSERT product_variants (if variants exist)
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
if (variants.length > 0) {
|
||||||
|
for (const variant of variants) {
|
||||||
|
const { value: weightValue, unit: weightUnit } = parseWeight(variant.option);
|
||||||
|
|
||||||
|
const variantResult = await client.query(`
|
||||||
|
INSERT INTO product_variants (
|
||||||
|
store_product_id, dispensary_id,
|
||||||
|
option,
|
||||||
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
|
quantity, quantity_available, in_stock, is_on_special,
|
||||||
|
weight_value, weight_unit,
|
||||||
|
first_seen_at, last_seen_at,
|
||||||
|
created_at, updated_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2,
|
||||||
|
$3,
|
||||||
|
$4, $5, $6, $7,
|
||||||
|
$8, $8, $9, $10,
|
||||||
|
$11, $12,
|
||||||
|
NOW(), NOW(),
|
||||||
|
NOW(), NOW()
|
||||||
|
)
|
||||||
|
ON CONFLICT (store_product_id, option)
|
||||||
|
DO UPDATE SET
|
||||||
|
price_rec = EXCLUDED.price_rec,
|
||||||
|
price_med = EXCLUDED.price_med,
|
||||||
|
price_rec_special = EXCLUDED.price_rec_special,
|
||||||
|
price_med_special = EXCLUDED.price_med_special,
|
||||||
|
quantity = EXCLUDED.quantity,
|
||||||
|
quantity_available = EXCLUDED.quantity_available,
|
||||||
|
in_stock = EXCLUDED.in_stock,
|
||||||
|
is_on_special = EXCLUDED.is_on_special,
|
||||||
|
weight_value = EXCLUDED.weight_value,
|
||||||
|
weight_unit = EXCLUDED.weight_unit,
|
||||||
|
last_seen_at = NOW(),
|
||||||
|
last_price_change_at = CASE
|
||||||
|
WHEN product_variants.price_rec IS DISTINCT FROM EXCLUDED.price_rec
|
||||||
|
OR product_variants.price_rec_special IS DISTINCT FROM EXCLUDED.price_rec_special
|
||||||
|
THEN NOW()
|
||||||
|
ELSE product_variants.last_price_change_at
|
||||||
|
END,
|
||||||
|
last_stock_change_at = CASE
|
||||||
|
WHEN product_variants.in_stock IS DISTINCT FROM EXCLUDED.in_stock
|
||||||
|
THEN NOW()
|
||||||
|
ELSE product_variants.last_stock_change_at
|
||||||
|
END,
|
||||||
|
updated_at = NOW()
|
||||||
|
RETURNING id
|
||||||
|
`, [
|
||||||
|
storeProductId, dispensaryId,
|
||||||
|
variant.option,
|
||||||
|
variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
|
||||||
|
variant.quantity, variant.inStock, variant.isOnSpecial,
|
||||||
|
weightValue, weightUnit,
|
||||||
|
]);
|
||||||
|
|
||||||
|
const variantId = variantResult.rows[0].id;
|
||||||
|
|
||||||
|
// Insert variant snapshot
|
||||||
|
await client.query(`
|
||||||
|
INSERT INTO product_variant_snapshots (
|
||||||
|
product_variant_id, store_product_id, dispensary_id, crawl_run_id,
|
||||||
|
option,
|
||||||
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
|
quantity, in_stock, is_on_special,
|
||||||
|
is_present_in_feed,
|
||||||
|
captured_at, created_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2, $3, $4,
|
||||||
|
$5,
|
||||||
|
$6, $7, $8, $9,
|
||||||
|
$10, $11, $12,
|
||||||
|
TRUE,
|
||||||
|
NOW(), NOW()
|
||||||
|
)
|
||||||
|
`, [
|
||||||
|
variantId, storeProductId, dispensaryId, this.crawlRunId,
|
||||||
|
variant.option,
|
||||||
|
variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
|
||||||
|
variant.quantity, variant.inStock, variant.isOnSpecial,
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug('pipeline', `Upserted ${variants.length} variants for ${item.name}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attach metadata for stats tracking
|
||||||
|
(item as any).isNewProduct = isNewProduct;
|
||||||
|
(item as any).storeProductId = storeProductId;
|
||||||
|
|
||||||
|
return item;
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('pipeline', `Failed to save canonical product ${item.name}: ${error}`);
|
||||||
|
return null;
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a crawl run record before starting crawl
|
||||||
|
*/
|
||||||
|
export async function createCrawlRun(
|
||||||
|
dispensaryId: number,
|
||||||
|
provider: string = 'dutchie',
|
||||||
|
triggerType: string = 'manual'
|
||||||
|
): Promise<number> {
|
||||||
|
const result = await pool.query(`
|
||||||
|
INSERT INTO crawl_runs (
|
||||||
|
dispensary_id, provider,
|
||||||
|
started_at, status, trigger_type
|
||||||
|
) VALUES ($1, $2, NOW(), 'running', $3)
|
||||||
|
RETURNING id
|
||||||
|
`, [dispensaryId, provider, triggerType]);
|
||||||
|
|
||||||
|
return result.rows[0].id;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Complete a crawl run with stats
|
||||||
|
*/
|
||||||
|
export async function completeCrawlRun(
|
||||||
|
crawlRunId: number,
|
||||||
|
stats: {
|
||||||
|
productsFound: number;
|
||||||
|
productsNew: number;
|
||||||
|
productsUpdated: number;
|
||||||
|
snapshotsWritten: number;
|
||||||
|
variantsUpserted?: number;
|
||||||
|
status?: 'completed' | 'failed' | 'partial';
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
): Promise<void> {
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE crawl_runs SET
|
||||||
|
finished_at = NOW(),
|
||||||
|
status = $2,
|
||||||
|
products_found = $3,
|
||||||
|
products_new = $4,
|
||||||
|
products_updated = $5,
|
||||||
|
snapshots_written = $6,
|
||||||
|
metadata = jsonb_build_object(
|
||||||
|
'variants_upserted', $7,
|
||||||
|
'error', $8
|
||||||
|
)
|
||||||
|
WHERE id = $1
|
||||||
|
`, [
|
||||||
|
crawlRunId,
|
||||||
|
stats.status || 'completed',
|
||||||
|
stats.productsFound,
|
||||||
|
stats.productsNew,
|
||||||
|
stats.productsUpdated,
|
||||||
|
stats.snapshotsWritten,
|
||||||
|
stats.variantsUpserted || 0,
|
||||||
|
stats.error || null,
|
||||||
|
]);
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@ import { RequestScheduler } from './scheduler';
|
|||||||
import { Downloader } from './downloader';
|
import { Downloader } from './downloader';
|
||||||
import { MiddlewareEngine, UserAgentMiddleware, ProxyMiddleware, RateLimitMiddleware, RetryMiddleware, BotDetectionMiddleware, StealthMiddleware } from './middlewares';
|
import { MiddlewareEngine, UserAgentMiddleware, ProxyMiddleware, RateLimitMiddleware, RetryMiddleware, BotDetectionMiddleware, StealthMiddleware } from './middlewares';
|
||||||
import { PipelineEngine, ValidationPipeline, SanitizationPipeline, DeduplicationPipeline, ImagePipeline, DatabasePipeline, StatsPipeline } from './pipelines';
|
import { PipelineEngine, ValidationPipeline, SanitizationPipeline, DeduplicationPipeline, ImagePipeline, DatabasePipeline, StatsPipeline } from './pipelines';
|
||||||
|
import { CanonicalDatabasePipeline, createCrawlRun, completeCrawlRun } from './canonical-pipeline';
|
||||||
import { ScraperRequest, ScraperResponse, ParseResult, Product, ScraperStats } from './types';
|
import { ScraperRequest, ScraperResponse, ParseResult, Product, ScraperStats } from './types';
|
||||||
import { logger } from '../services/logger';
|
import { logger } from '../services/logger';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
@@ -65,6 +66,9 @@ export class ScraperEngine {
|
|||||||
this.pipelineEngine.use(new DeduplicationPipeline());
|
this.pipelineEngine.use(new DeduplicationPipeline());
|
||||||
this.pipelineEngine.use(new ImagePipeline());
|
this.pipelineEngine.use(new ImagePipeline());
|
||||||
this.pipelineEngine.use(new StatsPipeline());
|
this.pipelineEngine.use(new StatsPipeline());
|
||||||
|
// Use canonical pipeline for writing to store_products/product_variants
|
||||||
|
this.pipelineEngine.use(new CanonicalDatabasePipeline());
|
||||||
|
// Keep legacy pipeline for backwards compatibility with existing stores table
|
||||||
this.pipelineEngine.use(new DatabasePipeline());
|
this.pipelineEngine.use(new DatabasePipeline());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,6 +39,11 @@ export {
|
|||||||
DatabasePipeline,
|
DatabasePipeline,
|
||||||
StatsPipeline
|
StatsPipeline
|
||||||
} from './pipelines';
|
} from './pipelines';
|
||||||
|
export {
|
||||||
|
CanonicalDatabasePipeline,
|
||||||
|
createCrawlRun,
|
||||||
|
completeCrawlRun
|
||||||
|
} from './canonical-pipeline';
|
||||||
export * from './types';
|
export * from './types';
|
||||||
|
|
||||||
// Main API functions
|
// Main API functions
|
||||||
277
backend/src/_deprecated/scripts/test-crawl-to-canonical.ts
Normal file
277
backend/src/_deprecated/scripts/test-crawl-to-canonical.ts
Normal file
@@ -0,0 +1,277 @@
|
|||||||
|
#!/usr/bin/env npx tsx
|
||||||
|
/**
|
||||||
|
* Test Script: Crawl a single dispensary and write to canonical tables
|
||||||
|
*
|
||||||
|
* This script:
|
||||||
|
* 1. Fetches products from Dutchie GraphQL
|
||||||
|
* 2. Normalizes via DutchieNormalizer
|
||||||
|
* 3. Writes to store_products, product_variants, snapshots via hydrateToCanonical
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>
|
||||||
|
* npx tsx src/scripts/test-crawl-to-canonical.ts 235
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
import {
|
||||||
|
executeGraphQL,
|
||||||
|
GRAPHQL_HASHES,
|
||||||
|
DUTCHIE_CONFIG,
|
||||||
|
} from '../platforms/dutchie';
|
||||||
|
import {
|
||||||
|
DutchieNormalizer,
|
||||||
|
hydrateToCanonical,
|
||||||
|
} from '../hydration';
|
||||||
|
import { initializeImageStorage } from '../utils/image-storage';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// DATABASE CONNECTION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
function getConnectionString(): string {
|
||||||
|
if (process.env.CANNAIQ_DB_URL) {
|
||||||
|
return process.env.CANNAIQ_DB_URL;
|
||||||
|
}
|
||||||
|
if (process.env.DATABASE_URL) {
|
||||||
|
return process.env.DATABASE_URL;
|
||||||
|
}
|
||||||
|
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||||
|
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||||
|
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||||
|
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||||
|
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||||
|
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pool = new Pool({ connectionString: getConnectionString() });
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// FETCH PRODUCTS FROM DUTCHIE
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
interface FetchResult {
|
||||||
|
products: any[];
|
||||||
|
totalPages: number;
|
||||||
|
totalProducts: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchAllProducts(platformDispensaryId: string, cName: string): Promise<FetchResult> {
|
||||||
|
const allProducts: any[] = [];
|
||||||
|
let page = 0;
|
||||||
|
let totalPages = 1;
|
||||||
|
let totalProducts = 0;
|
||||||
|
|
||||||
|
console.log(`[Fetch] Starting fetch for ${platformDispensaryId} (cName: ${cName})`);
|
||||||
|
|
||||||
|
while (page < totalPages && page < DUTCHIE_CONFIG.maxPages) {
|
||||||
|
const variables = {
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId: platformDispensaryId,
|
||||||
|
pricingType: 'rec',
|
||||||
|
Status: 'Active', // 'Active' = in-stock products with pricing
|
||||||
|
types: [],
|
||||||
|
useCache: true,
|
||||||
|
isDefaultSort: true,
|
||||||
|
sortBy: 'popularSortIdx',
|
||||||
|
sortDirection: 1,
|
||||||
|
bypassOnlineThresholds: true,
|
||||||
|
isKioskMenu: false,
|
||||||
|
removeProductsBelowOptionThresholds: false,
|
||||||
|
},
|
||||||
|
page,
|
||||||
|
perPage: DUTCHIE_CONFIG.perPage,
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await executeGraphQL(
|
||||||
|
'FilteredProducts',
|
||||||
|
variables,
|
||||||
|
GRAPHQL_HASHES.FilteredProducts,
|
||||||
|
{ cName, maxRetries: 3 }
|
||||||
|
);
|
||||||
|
|
||||||
|
const data = result?.data?.filteredProducts;
|
||||||
|
if (!data) {
|
||||||
|
console.error(`[Fetch] No data returned for page ${page}`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const products = data.products || [];
|
||||||
|
totalProducts = data.queryInfo?.totalCount || 0;
|
||||||
|
totalPages = Math.ceil(totalProducts / DUTCHIE_CONFIG.perPage);
|
||||||
|
|
||||||
|
allProducts.push(...products);
|
||||||
|
console.log(`[Fetch] Page ${page + 1}/${totalPages}: ${products.length} products (total so far: ${allProducts.length})`);
|
||||||
|
|
||||||
|
page++;
|
||||||
|
|
||||||
|
if (page < totalPages) {
|
||||||
|
await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs));
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`[Fetch] Error on page ${page}: ${error.message}`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { products: allProducts, totalPages, totalProducts };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// MAIN
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const dispensaryId = parseInt(process.argv[2], 10);
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
console.error('Usage: npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>');
|
||||||
|
console.error('Example: npx tsx src/scripts/test-crawl-to-canonical.ts 235');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('============================================================');
|
||||||
|
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
|
||||||
|
console.log('============================================================\n');
|
||||||
|
|
||||||
|
// Initialize image storage
|
||||||
|
console.log('[Init] Initializing image storage...');
|
||||||
|
await initializeImageStorage();
|
||||||
|
console.log(' Image storage ready\n');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Step 1: Get dispensary info
|
||||||
|
console.log('[Step 1] Getting dispensary info...');
|
||||||
|
const dispResult = await pool.query(`
|
||||||
|
SELECT id, name, platform_dispensary_id, menu_url
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (dispResult.rows.length === 0) {
|
||||||
|
throw new Error(`Dispensary ${dispensaryId} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const disp = dispResult.rows[0];
|
||||||
|
console.log(` Name: ${disp.name}`);
|
||||||
|
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
|
||||||
|
console.log(` Menu URL: ${disp.menu_url}`);
|
||||||
|
|
||||||
|
if (!disp.platform_dispensary_id) {
|
||||||
|
throw new Error('Dispensary does not have a platform_dispensary_id');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract cName from menu_url
|
||||||
|
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||||
|
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||||
|
console.log(` cName: ${cName}\n`);
|
||||||
|
|
||||||
|
// Step 2: Fetch products from Dutchie
|
||||||
|
console.log('[Step 2] Fetching products from Dutchie GraphQL...');
|
||||||
|
const fetchResult = await fetchAllProducts(disp.platform_dispensary_id, cName);
|
||||||
|
console.log(` Total products fetched: ${fetchResult.products.length}\n`);
|
||||||
|
|
||||||
|
if (fetchResult.products.length === 0) {
|
||||||
|
console.log('No products fetched. Exiting.');
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: Normalize
|
||||||
|
console.log('[Step 3] Normalizing products...');
|
||||||
|
const normalizer = new DutchieNormalizer();
|
||||||
|
|
||||||
|
// Construct a RawPayload structure that the normalizer expects
|
||||||
|
// The normalizer.normalize() expects: { raw_json, dispensary_id, ... }
|
||||||
|
const rawPayloadForValidation = {
|
||||||
|
products: fetchResult.products,
|
||||||
|
queryInfo: {
|
||||||
|
totalCount: fetchResult.totalProducts,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const validation = normalizer.validatePayload(rawPayloadForValidation);
|
||||||
|
if (!validation.valid) {
|
||||||
|
console.error(` Validation failed: ${validation.errors?.join(', ')}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
console.log(` Validation: PASS`);
|
||||||
|
|
||||||
|
// Build proper RawPayload for normalize()
|
||||||
|
const rawPayload = {
|
||||||
|
id: `test-${Date.now()}`,
|
||||||
|
dispensary_id: dispensaryId,
|
||||||
|
crawl_run_id: null,
|
||||||
|
platform: 'dutchie',
|
||||||
|
payload_version: 1,
|
||||||
|
raw_json: rawPayloadForValidation,
|
||||||
|
product_count: fetchResult.totalProducts,
|
||||||
|
pricing_type: 'rec',
|
||||||
|
crawl_mode: 'active',
|
||||||
|
fetched_at: new Date(),
|
||||||
|
processed: false,
|
||||||
|
normalized_at: null,
|
||||||
|
hydration_error: null,
|
||||||
|
hydration_attempts: 0,
|
||||||
|
created_at: new Date(),
|
||||||
|
};
|
||||||
|
|
||||||
|
const normResult = normalizer.normalize(rawPayload);
|
||||||
|
console.log(` Normalized products: ${normResult.products.length}`);
|
||||||
|
console.log(` Brands extracted: ${normResult.brands.length}`);
|
||||||
|
console.log(` Sample product: ${normResult.products[0]?.name}\n`);
|
||||||
|
|
||||||
|
// Step 4: Write to canonical tables
|
||||||
|
console.log('[Step 4] Writing to canonical tables via hydrateToCanonical...');
|
||||||
|
const hydrateResult = await hydrateToCanonical(
|
||||||
|
pool,
|
||||||
|
dispensaryId,
|
||||||
|
normResult,
|
||||||
|
null // no crawl_run_id for this test
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` Products upserted: ${hydrateResult.productsUpserted}`);
|
||||||
|
console.log(` Products new: ${hydrateResult.productsNew}`);
|
||||||
|
console.log(` Snapshots created: ${hydrateResult.snapshotsCreated}`);
|
||||||
|
console.log(` Variants upserted: ${hydrateResult.variantsUpserted}`);
|
||||||
|
console.log(` Brands created: ${hydrateResult.brandsCreated}\n`);
|
||||||
|
|
||||||
|
// Step 5: Verify
|
||||||
|
console.log('[Step 5] Verifying data in canonical tables...');
|
||||||
|
|
||||||
|
const productCount = await pool.query(`
|
||||||
|
SELECT COUNT(*) as count FROM store_products WHERE dispensary_id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
console.log(` store_products count: ${productCount.rows[0].count}`);
|
||||||
|
|
||||||
|
const variantCount = await pool.query(`
|
||||||
|
SELECT COUNT(*) as count FROM product_variants WHERE dispensary_id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
console.log(` product_variants count: ${variantCount.rows[0].count}`);
|
||||||
|
|
||||||
|
const snapshotCount = await pool.query(`
|
||||||
|
SELECT COUNT(*) as count FROM store_product_snapshots WHERE dispensary_id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
console.log(` store_product_snapshots count: ${snapshotCount.rows[0].count}`);
|
||||||
|
|
||||||
|
console.log('\n============================================================');
|
||||||
|
console.log('SUCCESS - Crawl and hydration complete!');
|
||||||
|
console.log('============================================================');
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('\n============================================================');
|
||||||
|
console.error('ERROR:', error.message);
|
||||||
|
console.error('============================================================');
|
||||||
|
if (error.stack) {
|
||||||
|
console.error(error.stack);
|
||||||
|
}
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
@@ -3,7 +3,7 @@ import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|||||||
import { Browser, Page } from 'puppeteer';
|
import { Browser, Page } from 'puppeteer';
|
||||||
import { SocksProxyAgent } from 'socks-proxy-agent';
|
import { SocksProxyAgent } from 'socks-proxy-agent';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import { uploadImageFromUrl, getImageUrl } from '../utils/minio';
|
import { downloadProductImageLegacy } from '../utils/image-storage';
|
||||||
import { logger } from './logger';
|
import { logger } from './logger';
|
||||||
import { registerScraper, updateScraperStats, completeScraper } from '../routes/scraper-monitor';
|
import { registerScraper, updateScraperStats, completeScraper } from '../routes/scraper-monitor';
|
||||||
import { incrementProxyFailure, getActiveProxy, isBotDetectionError, putProxyInTimeout } from './proxy';
|
import { incrementProxyFailure, getActiveProxy, isBotDetectionError, putProxyInTimeout } from './proxy';
|
||||||
@@ -767,7 +767,8 @@ export async function saveProducts(storeId: number, categoryId: number, products
|
|||||||
|
|
||||||
if (product.imageUrl && !localImagePath) {
|
if (product.imageUrl && !localImagePath) {
|
||||||
try {
|
try {
|
||||||
localImagePath = await uploadImageFromUrl(product.imageUrl, productId);
|
const result = await downloadProductImageLegacy(product.imageUrl, 0, productId);
|
||||||
|
localImagePath = result.urls?.original || null;
|
||||||
await client.query(`
|
await client.query(`
|
||||||
UPDATE products
|
UPDATE products
|
||||||
SET local_image_path = $1
|
SET local_image_path = $1
|
||||||
584
backend/src/_deprecated/system/routes/index.ts
Normal file
584
backend/src/_deprecated/system/routes/index.ts
Normal file
@@ -0,0 +1,584 @@
|
|||||||
|
/**
|
||||||
|
* System API Routes
|
||||||
|
*
|
||||||
|
* Provides REST API endpoints for system monitoring and control:
|
||||||
|
* - /api/system/sync/* - Sync orchestrator
|
||||||
|
* - /api/system/dlq/* - Dead-letter queue
|
||||||
|
* - /api/system/integrity/* - Integrity checks
|
||||||
|
* - /api/system/fix/* - Auto-fix routines
|
||||||
|
* - /api/system/alerts/* - System alerts
|
||||||
|
* - /metrics - Prometheus metrics
|
||||||
|
*
|
||||||
|
* Phase 5: Full Production Sync + Monitoring
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import {
|
||||||
|
SyncOrchestrator,
|
||||||
|
MetricsService,
|
||||||
|
DLQService,
|
||||||
|
AlertService,
|
||||||
|
IntegrityService,
|
||||||
|
AutoFixService,
|
||||||
|
} from '../services';
|
||||||
|
|
||||||
|
export function createSystemRouter(pool: Pool): Router {
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
// Initialize services
|
||||||
|
const metrics = new MetricsService(pool);
|
||||||
|
const dlq = new DLQService(pool);
|
||||||
|
const alerts = new AlertService(pool);
|
||||||
|
const integrity = new IntegrityService(pool, alerts);
|
||||||
|
const autoFix = new AutoFixService(pool, alerts);
|
||||||
|
const orchestrator = new SyncOrchestrator(pool, metrics, dlq, alerts);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// SYNC ORCHESTRATOR ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/sync/status
|
||||||
|
* Get current sync status
|
||||||
|
*/
|
||||||
|
router.get('/sync/status', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const status = await orchestrator.getStatus();
|
||||||
|
res.json(status);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Sync status error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get sync status' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/sync/run
|
||||||
|
* Trigger a sync run
|
||||||
|
*/
|
||||||
|
router.post('/sync/run', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const triggeredBy = req.body.triggeredBy || 'api';
|
||||||
|
const result = await orchestrator.runSync();
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
triggeredBy,
|
||||||
|
metrics: result,
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Sync run error:', error);
|
||||||
|
res.status(500).json({
|
||||||
|
success: false,
|
||||||
|
error: error instanceof Error ? error.message : 'Sync run failed',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/sync/queue-depth
|
||||||
|
* Get queue depth information
|
||||||
|
*/
|
||||||
|
router.get('/sync/queue-depth', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const depth = await orchestrator.getQueueDepth();
|
||||||
|
res.json(depth);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Queue depth error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get queue depth' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/sync/health
|
||||||
|
* Get sync health status
|
||||||
|
*/
|
||||||
|
router.get('/sync/health', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const health = await orchestrator.getHealth();
|
||||||
|
res.status(health.healthy ? 200 : 503).json(health);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Health check error:', error);
|
||||||
|
res.status(500).json({ healthy: false, error: 'Health check failed' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/sync/pause
|
||||||
|
* Pause the orchestrator
|
||||||
|
*/
|
||||||
|
router.post('/sync/pause', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const reason = req.body.reason || 'Manual pause';
|
||||||
|
await orchestrator.pause(reason);
|
||||||
|
res.json({ success: true, message: 'Orchestrator paused' });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Pause error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to pause orchestrator' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/sync/resume
|
||||||
|
* Resume the orchestrator
|
||||||
|
*/
|
||||||
|
router.post('/sync/resume', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
await orchestrator.resume();
|
||||||
|
res.json({ success: true, message: 'Orchestrator resumed' });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Resume error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to resume orchestrator' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// DLQ ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/dlq
|
||||||
|
* List DLQ payloads
|
||||||
|
*/
|
||||||
|
router.get('/dlq', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const options = {
|
||||||
|
status: req.query.status as string,
|
||||||
|
errorType: req.query.errorType as string,
|
||||||
|
dispensaryId: req.query.dispensaryId ? parseInt(req.query.dispensaryId as string) : undefined,
|
||||||
|
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
|
||||||
|
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await dlq.listPayloads(options);
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ list error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to list DLQ payloads' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/dlq/stats
|
||||||
|
* Get DLQ statistics
|
||||||
|
*/
|
||||||
|
router.get('/dlq/stats', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const stats = await dlq.getStats();
|
||||||
|
res.json(stats);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ stats error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get DLQ stats' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/dlq/summary
|
||||||
|
* Get DLQ summary by error type
|
||||||
|
*/
|
||||||
|
router.get('/dlq/summary', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const summary = await dlq.getSummary();
|
||||||
|
res.json(summary);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ summary error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get DLQ summary' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/dlq/:id
|
||||||
|
* Get a specific DLQ payload
|
||||||
|
*/
|
||||||
|
router.get('/dlq/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const payload = await dlq.getPayload(req.params.id);
|
||||||
|
if (!payload) {
|
||||||
|
return res.status(404).json({ error: 'Payload not found' });
|
||||||
|
}
|
||||||
|
res.json(payload);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ get error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get DLQ payload' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/dlq/:id/retry
|
||||||
|
* Retry a DLQ payload
|
||||||
|
*/
|
||||||
|
router.post('/dlq/:id/retry', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const result = await dlq.retryPayload(req.params.id);
|
||||||
|
if (result.success) {
|
||||||
|
res.json(result);
|
||||||
|
} else {
|
||||||
|
res.status(400).json(result);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ retry error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to retry payload' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/dlq/:id/abandon
|
||||||
|
* Abandon a DLQ payload
|
||||||
|
*/
|
||||||
|
router.post('/dlq/:id/abandon', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const reason = req.body.reason || 'Manually abandoned';
|
||||||
|
const abandonedBy = req.body.abandonedBy || 'api';
|
||||||
|
const success = await dlq.abandonPayload(req.params.id, reason, abandonedBy);
|
||||||
|
res.json({ success });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ abandon error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to abandon payload' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/dlq/bulk-retry
|
||||||
|
* Bulk retry payloads by error type
|
||||||
|
*/
|
||||||
|
router.post('/dlq/bulk-retry', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { errorType } = req.body;
|
||||||
|
if (!errorType) {
|
||||||
|
return res.status(400).json({ error: 'errorType is required' });
|
||||||
|
}
|
||||||
|
const result = await dlq.bulkRetryByErrorType(errorType);
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] DLQ bulk retry error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to bulk retry' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// INTEGRITY CHECK ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/integrity/run
|
||||||
|
* Run all integrity checks
|
||||||
|
*/
|
||||||
|
router.post('/integrity/run', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const triggeredBy = req.body.triggeredBy || 'api';
|
||||||
|
const result = await integrity.runAllChecks(triggeredBy);
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Integrity run error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to run integrity checks' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/integrity/runs
|
||||||
|
* Get recent integrity check runs
|
||||||
|
*/
|
||||||
|
router.get('/integrity/runs', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const limit = req.query.limit ? parseInt(req.query.limit as string) : 10;
|
||||||
|
const runs = await integrity.getRecentRuns(limit);
|
||||||
|
res.json(runs);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Integrity runs error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get integrity runs' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/integrity/runs/:runId
|
||||||
|
* Get results for a specific integrity run
|
||||||
|
*/
|
||||||
|
router.get('/integrity/runs/:runId', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const results = await integrity.getRunResults(req.params.runId);
|
||||||
|
res.json(results);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Integrity run results error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get run results' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// AUTO-FIX ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/fix/routines
|
||||||
|
* Get available fix routines
|
||||||
|
*/
|
||||||
|
router.get('/fix/routines', (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const routines = autoFix.getAvailableRoutines();
|
||||||
|
res.json(routines);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Get routines error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get routines' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/fix/:routine
|
||||||
|
* Run a fix routine
|
||||||
|
*/
|
||||||
|
router.post('/fix/:routine', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const routineName = req.params.routine;
|
||||||
|
const dryRun = req.body.dryRun === true;
|
||||||
|
const triggeredBy = req.body.triggeredBy || 'api';
|
||||||
|
|
||||||
|
const result = await autoFix.runRoutine(routineName as any, triggeredBy, { dryRun });
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Fix routine error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to run fix routine' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/fix/runs
|
||||||
|
* Get recent fix runs
|
||||||
|
*/
|
||||||
|
router.get('/fix/runs', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const limit = req.query.limit ? parseInt(req.query.limit as string) : 20;
|
||||||
|
const runs = await autoFix.getRecentRuns(limit);
|
||||||
|
res.json(runs);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Fix runs error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get fix runs' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// ALERTS ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/alerts
|
||||||
|
* List alerts
|
||||||
|
*/
|
||||||
|
router.get('/alerts', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const options = {
|
||||||
|
status: req.query.status as any,
|
||||||
|
severity: req.query.severity as any,
|
||||||
|
type: req.query.type as string,
|
||||||
|
limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
|
||||||
|
offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await alerts.listAlerts(options);
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Alerts list error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to list alerts' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/alerts/active
|
||||||
|
* Get active alerts
|
||||||
|
*/
|
||||||
|
router.get('/alerts/active', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const activeAlerts = await alerts.getActiveAlerts();
|
||||||
|
res.json(activeAlerts);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Active alerts error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get active alerts' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/alerts/summary
|
||||||
|
* Get alert summary
|
||||||
|
*/
|
||||||
|
router.get('/alerts/summary', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const summary = await alerts.getSummary();
|
||||||
|
res.json(summary);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Alerts summary error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get alerts summary' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/alerts/:id/acknowledge
|
||||||
|
* Acknowledge an alert
|
||||||
|
*/
|
||||||
|
router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const alertId = parseInt(req.params.id);
|
||||||
|
const acknowledgedBy = req.body.acknowledgedBy || 'api';
|
||||||
|
const success = await alerts.acknowledgeAlert(alertId, acknowledgedBy);
|
||||||
|
res.json({ success });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Acknowledge alert error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to acknowledge alert' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/alerts/:id/resolve
|
||||||
|
* Resolve an alert
|
||||||
|
*/
|
||||||
|
router.post('/alerts/:id/resolve', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const alertId = parseInt(req.params.id);
|
||||||
|
const resolvedBy = req.body.resolvedBy || 'api';
|
||||||
|
const success = await alerts.resolveAlert(alertId, resolvedBy);
|
||||||
|
res.json({ success });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Resolve alert error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to resolve alert' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/alerts/bulk-acknowledge
|
||||||
|
* Bulk acknowledge alerts
|
||||||
|
*/
|
||||||
|
router.post('/alerts/bulk-acknowledge', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { ids, acknowledgedBy } = req.body;
|
||||||
|
if (!ids || !Array.isArray(ids)) {
|
||||||
|
return res.status(400).json({ error: 'ids array is required' });
|
||||||
|
}
|
||||||
|
const count = await alerts.bulkAcknowledge(ids, acknowledgedBy || 'api');
|
||||||
|
res.json({ acknowledged: count });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Bulk acknowledge error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to bulk acknowledge' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// METRICS ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/metrics
|
||||||
|
* Get all current metrics
|
||||||
|
*/
|
||||||
|
router.get('/metrics', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const allMetrics = await metrics.getAllMetrics();
|
||||||
|
res.json(allMetrics);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Metrics error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get metrics' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/metrics/:name
|
||||||
|
* Get a specific metric
|
||||||
|
*/
|
||||||
|
router.get('/metrics/:name', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const metric = await metrics.getMetric(req.params.name);
|
||||||
|
if (!metric) {
|
||||||
|
return res.status(404).json({ error: 'Metric not found' });
|
||||||
|
}
|
||||||
|
res.json(metric);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Metric error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get metric' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/metrics/:name/history
|
||||||
|
* Get metric time series
|
||||||
|
*/
|
||||||
|
router.get('/metrics/:name/history', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const hours = req.query.hours ? parseInt(req.query.hours as string) : 24;
|
||||||
|
const history = await metrics.getMetricHistory(req.params.name, hours);
|
||||||
|
res.json(history);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Metric history error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get metric history' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/errors
|
||||||
|
* Get error summary
|
||||||
|
*/
|
||||||
|
router.get('/errors', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const summary = await metrics.getErrorSummary();
|
||||||
|
res.json(summary);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Error summary error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get error summary' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/system/errors/recent
|
||||||
|
* Get recent errors
|
||||||
|
*/
|
||||||
|
router.get('/errors/recent', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const limit = req.query.limit ? parseInt(req.query.limit as string) : 50;
|
||||||
|
const errorType = req.query.type as string;
|
||||||
|
const errors = await metrics.getRecentErrors(limit, errorType);
|
||||||
|
res.json(errors);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Recent errors error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get recent errors' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/system/errors/acknowledge
|
||||||
|
* Acknowledge errors
|
||||||
|
*/
|
||||||
|
router.post('/errors/acknowledge', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { ids, acknowledgedBy } = req.body;
|
||||||
|
if (!ids || !Array.isArray(ids)) {
|
||||||
|
return res.status(400).json({ error: 'ids array is required' });
|
||||||
|
}
|
||||||
|
const count = await metrics.acknowledgeErrors(ids, acknowledgedBy || 'api');
|
||||||
|
res.json({ acknowledged: count });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[System] Acknowledge errors error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to acknowledge errors' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return router;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create Prometheus metrics endpoint (standalone)
|
||||||
|
*/
|
||||||
|
export function createPrometheusRouter(pool: Pool): Router {
|
||||||
|
const router = Router();
|
||||||
|
const metrics = new MetricsService(pool);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /metrics
|
||||||
|
* Prometheus-compatible metrics endpoint
|
||||||
|
*/
|
||||||
|
router.get('/', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const prometheusOutput = await metrics.getPrometheusMetrics();
|
||||||
|
res.set('Content-Type', 'text/plain; version=0.0.4');
|
||||||
|
res.send(prometheusOutput);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[Prometheus] Metrics error:', error);
|
||||||
|
res.status(500).send('# Error generating metrics');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return router;
|
||||||
|
}
|
||||||
@@ -17,6 +17,7 @@ import { Pool } from 'pg';
|
|||||||
import { MetricsService } from './metrics';
|
import { MetricsService } from './metrics';
|
||||||
import { DLQService } from './dlq';
|
import { DLQService } from './dlq';
|
||||||
import { AlertService } from './alerts';
|
import { AlertService } from './alerts';
|
||||||
|
import { DutchieNormalizer, hydrateToCanonical } from '../../hydration';
|
||||||
|
|
||||||
export type OrchestratorStatus = 'RUNNING' | 'SLEEPING' | 'LOCKED' | 'PAUSED' | 'ERROR';
|
export type OrchestratorStatus = 'RUNNING' | 'SLEEPING' | 'LOCKED' | 'PAUSED' | 'ERROR';
|
||||||
|
|
||||||
@@ -90,6 +91,7 @@ export class SyncOrchestrator {
|
|||||||
private workerId: string;
|
private workerId: string;
|
||||||
private isRunning: boolean = false;
|
private isRunning: boolean = false;
|
||||||
private pollInterval: NodeJS.Timeout | null = null;
|
private pollInterval: NodeJS.Timeout | null = null;
|
||||||
|
private normalizer: DutchieNormalizer;
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
pool: Pool,
|
pool: Pool,
|
||||||
@@ -103,6 +105,7 @@ export class SyncOrchestrator {
|
|||||||
this.dlq = dlq;
|
this.dlq = dlq;
|
||||||
this.alerts = alerts;
|
this.alerts = alerts;
|
||||||
this.workerId = workerId || `orchestrator-${process.env.HOSTNAME || process.pid}`;
|
this.workerId = workerId || `orchestrator-${process.env.HOSTNAME || process.pid}`;
|
||||||
|
this.normalizer = new DutchieNormalizer();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -503,7 +506,7 @@ export class SyncOrchestrator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process a single payload
|
* Process a single payload - now uses canonical tables via hydration pipeline
|
||||||
*/
|
*/
|
||||||
private async processPayload(
|
private async processPayload(
|
||||||
payload: any,
|
payload: any,
|
||||||
@@ -518,25 +521,52 @@ export class SyncOrchestrator {
|
|||||||
|
|
||||||
// Parse products from raw JSON
|
// Parse products from raw JSON
|
||||||
const rawData = payload.raw_json;
|
const rawData = payload.raw_json;
|
||||||
const products = this.extractProducts(rawData);
|
|
||||||
|
|
||||||
if (!products || products.length === 0) {
|
// Validate the payload using normalizer
|
||||||
|
const validation = this.normalizer.validatePayload(rawData);
|
||||||
|
if (!validation.valid) {
|
||||||
// Mark as processed with warning
|
// Mark as processed with warning
|
||||||
await this.pool.query(`
|
await this.pool.query(`
|
||||||
UPDATE raw_payloads
|
UPDATE raw_payloads
|
||||||
SET processed = TRUE,
|
SET processed = TRUE,
|
||||||
normalized_at = NOW(),
|
normalized_at = NOW(),
|
||||||
hydration_error = 'No products found in payload'
|
hydration_error = $2
|
||||||
|
WHERE id = $1
|
||||||
|
`, [payload.id, validation.errors.join('; ')]);
|
||||||
|
|
||||||
|
return { productsUpserted: 0, productsInserted: 0, productsUpdated: 0, snapshotsCreated: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize the payload using the hydration normalizer
|
||||||
|
const normResult = this.normalizer.normalize(rawData);
|
||||||
|
|
||||||
|
if (normResult.products.length === 0) {
|
||||||
|
// Mark as processed with warning
|
||||||
|
await this.pool.query(`
|
||||||
|
UPDATE raw_payloads
|
||||||
|
SET processed = TRUE,
|
||||||
|
normalized_at = NOW(),
|
||||||
|
hydration_error = 'No products found in payload after normalization'
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, [payload.id]);
|
`, [payload.id]);
|
||||||
|
|
||||||
return { productsUpserted: 0, productsInserted: 0, productsUpdated: 0, snapshotsCreated: 0 };
|
return { productsUpserted: 0, productsInserted: 0, productsUpdated: 0, snapshotsCreated: 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
// Upsert products to canonical table
|
// Get or create crawl_run for this payload
|
||||||
const result = await this.upsertProducts(payload.dispensary_id, products);
|
const crawlRunId = await this.getOrCreateCrawlRun(payload.dispensary_id, payload.id);
|
||||||
|
|
||||||
// Create snapshots
|
// Use canonical hydration to write to store_products, product_variants, etc.
|
||||||
|
const hydrateResult = await hydrateToCanonical(
|
||||||
|
this.pool,
|
||||||
|
payload.dispensary_id,
|
||||||
|
normResult,
|
||||||
|
crawlRunId
|
||||||
|
);
|
||||||
|
|
||||||
|
// Also write to legacy tables for backwards compatibility
|
||||||
|
const products = this.extractProducts(rawData);
|
||||||
|
await this.upsertProducts(payload.dispensary_id, products);
|
||||||
const snapshotsCreated = await this.createSnapshots(payload.dispensary_id, products, payload.id);
|
const snapshotsCreated = await this.createSnapshots(payload.dispensary_id, products, payload.id);
|
||||||
|
|
||||||
// Calculate latency
|
// Calculate latency
|
||||||
@@ -551,14 +581,32 @@ export class SyncOrchestrator {
|
|||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, [payload.id]);
|
`, [payload.id]);
|
||||||
|
|
||||||
|
// Return combined metrics (canonical + legacy)
|
||||||
return {
|
return {
|
||||||
productsUpserted: result.upserted,
|
productsUpserted: hydrateResult.productsUpserted,
|
||||||
productsInserted: result.inserted,
|
productsInserted: hydrateResult.productsNew,
|
||||||
productsUpdated: result.updated,
|
productsUpdated: hydrateResult.productsUpdated,
|
||||||
snapshotsCreated,
|
snapshotsCreated: hydrateResult.snapshotsCreated + snapshotsCreated,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get or create a crawl_run record for tracking
|
||||||
|
*/
|
||||||
|
private async getOrCreateCrawlRun(dispensaryId: number, payloadId: string): Promise<number | null> {
|
||||||
|
try {
|
||||||
|
const result = await this.pool.query(`
|
||||||
|
INSERT INTO crawl_runs (dispensary_id, provider, started_at, status, trigger_type, metadata)
|
||||||
|
VALUES ($1, 'dutchie', NOW(), 'running', 'hydration', jsonb_build_object('payload_id', $2))
|
||||||
|
RETURNING id
|
||||||
|
`, [dispensaryId, payloadId]);
|
||||||
|
return result.rows[0].id;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('[SyncOrchestrator] Could not create crawl_run:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract products from raw payload
|
* Extract products from raw payload
|
||||||
*/
|
*/
|
||||||
@@ -29,6 +29,12 @@ const TRUSTED_ORIGINS = [
|
|||||||
'http://localhost:5173',
|
'http://localhost:5173',
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// Pattern-based trusted origins (wildcards)
|
||||||
|
const TRUSTED_ORIGIN_PATTERNS = [
|
||||||
|
/^https:\/\/.*\.cannabrands\.app$/, // *.cannabrands.app
|
||||||
|
/^https:\/\/.*\.cannaiq\.co$/, // *.cannaiq.co
|
||||||
|
];
|
||||||
|
|
||||||
// Trusted IPs for internal pod-to-pod communication
|
// Trusted IPs for internal pod-to-pod communication
|
||||||
const TRUSTED_IPS = [
|
const TRUSTED_IPS = [
|
||||||
'127.0.0.1',
|
'127.0.0.1',
|
||||||
@@ -42,8 +48,16 @@ const TRUSTED_IPS = [
|
|||||||
function isTrustedRequest(req: Request): boolean {
|
function isTrustedRequest(req: Request): boolean {
|
||||||
// Check origin header
|
// Check origin header
|
||||||
const origin = req.headers.origin;
|
const origin = req.headers.origin;
|
||||||
if (origin && TRUSTED_ORIGINS.includes(origin)) {
|
if (origin) {
|
||||||
return true;
|
if (TRUSTED_ORIGINS.includes(origin)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// Check pattern-based origins (wildcards like *.cannabrands.app)
|
||||||
|
for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
|
||||||
|
if (pattern.test(origin)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check referer header (for same-origin requests without CORS)
|
// Check referer header (for same-origin requests without CORS)
|
||||||
@@ -54,6 +68,18 @@ function isTrustedRequest(req: Request): boolean {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Check pattern-based referers
|
||||||
|
try {
|
||||||
|
const refererUrl = new URL(referer);
|
||||||
|
const refererOrigin = refererUrl.origin;
|
||||||
|
for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
|
||||||
|
if (pattern.test(refererOrigin)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Invalid referer URL, skip
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check IP for internal requests (pod-to-pod, localhost)
|
// Check IP for internal requests (pod-to-pod, localhost)
|
||||||
@@ -127,7 +153,53 @@ export async function authenticateUser(email: string, password: string): Promise
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function authMiddleware(req: AuthRequest, res: Response, next: NextFunction) {
|
export async function authMiddleware(req: AuthRequest, res: Response, next: NextFunction) {
|
||||||
// Allow trusted origins/IPs to bypass auth (internal services, same-origin)
|
const authHeader = req.headers.authorization;
|
||||||
|
|
||||||
|
// If a Bearer token is provided, always try to use it first (logged-in user)
|
||||||
|
if (authHeader && authHeader.startsWith('Bearer ')) {
|
||||||
|
const token = authHeader.substring(7);
|
||||||
|
|
||||||
|
// Try JWT first
|
||||||
|
const jwtUser = verifyToken(token);
|
||||||
|
|
||||||
|
if (jwtUser) {
|
||||||
|
req.user = jwtUser;
|
||||||
|
return next();
|
||||||
|
}
|
||||||
|
|
||||||
|
// If JWT fails, try API token
|
||||||
|
try {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT id, name, rate_limit, active, expires_at, allowed_endpoints
|
||||||
|
FROM api_tokens
|
||||||
|
WHERE token = $1
|
||||||
|
`, [token]);
|
||||||
|
|
||||||
|
if (result.rows.length > 0) {
|
||||||
|
const apiToken = result.rows[0];
|
||||||
|
if (!apiToken.active) {
|
||||||
|
return res.status(401).json({ error: 'API token is inactive' });
|
||||||
|
}
|
||||||
|
if (apiToken.expires_at && new Date(apiToken.expires_at) < new Date()) {
|
||||||
|
return res.status(401).json({ error: 'API token has expired' });
|
||||||
|
}
|
||||||
|
req.user = {
|
||||||
|
id: 0,
|
||||||
|
email: `api:${apiToken.name}`,
|
||||||
|
role: 'api_token'
|
||||||
|
};
|
||||||
|
req.apiToken = apiToken;
|
||||||
|
return next();
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error('API token lookup error:', err);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Token provided but invalid
|
||||||
|
return res.status(401).json({ error: 'Invalid token' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// No token provided - check trusted origins for API access (WordPress, etc.)
|
||||||
if (isTrustedRequest(req)) {
|
if (isTrustedRequest(req)) {
|
||||||
req.user = {
|
req.user = {
|
||||||
id: 0,
|
id: 0,
|
||||||
@@ -137,80 +209,10 @@ export async function authMiddleware(req: AuthRequest, res: Response, next: Next
|
|||||||
return next();
|
return next();
|
||||||
}
|
}
|
||||||
|
|
||||||
const authHeader = req.headers.authorization;
|
return res.status(401).json({ error: 'No token provided' });
|
||||||
|
|
||||||
if (!authHeader || !authHeader.startsWith('Bearer ')) {
|
|
||||||
return res.status(401).json({ error: 'No token provided' });
|
|
||||||
}
|
|
||||||
|
|
||||||
const token = authHeader.substring(7);
|
|
||||||
|
|
||||||
// Try JWT first
|
|
||||||
const jwtUser = verifyToken(token);
|
|
||||||
|
|
||||||
if (jwtUser) {
|
|
||||||
req.user = jwtUser;
|
|
||||||
return next();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If JWT fails, try API token
|
|
||||||
try {
|
|
||||||
const result = await pool.query(`
|
|
||||||
SELECT id, name, rate_limit, active, expires_at, allowed_endpoints
|
|
||||||
FROM api_tokens
|
|
||||||
WHERE token = $1
|
|
||||||
`, [token]);
|
|
||||||
|
|
||||||
if (result.rows.length === 0) {
|
|
||||||
return res.status(401).json({ error: 'Invalid token' });
|
|
||||||
}
|
|
||||||
|
|
||||||
const apiToken = result.rows[0];
|
|
||||||
|
|
||||||
// Check if token is active
|
|
||||||
if (!apiToken.active) {
|
|
||||||
return res.status(401).json({ error: 'Token is disabled' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if token is expired
|
|
||||||
if (apiToken.expires_at && new Date(apiToken.expires_at) < new Date()) {
|
|
||||||
return res.status(401).json({ error: 'Token has expired' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check allowed endpoints
|
|
||||||
if (apiToken.allowed_endpoints && apiToken.allowed_endpoints.length > 0) {
|
|
||||||
const isAllowed = apiToken.allowed_endpoints.some((pattern: string) => {
|
|
||||||
// Simple wildcard matching
|
|
||||||
const regex = new RegExp('^' + pattern.replace('*', '.*') + '$');
|
|
||||||
return regex.test(req.path);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!isAllowed) {
|
|
||||||
return res.status(403).json({ error: 'Endpoint not allowed for this token' });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set API token on request for tracking
|
|
||||||
req.apiToken = {
|
|
||||||
id: apiToken.id,
|
|
||||||
name: apiToken.name,
|
|
||||||
rate_limit: apiToken.rate_limit
|
|
||||||
};
|
|
||||||
|
|
||||||
// Set a generic user for compatibility with existing code
|
|
||||||
req.user = {
|
|
||||||
id: apiToken.id,
|
|
||||||
email: `api-token-${apiToken.id}@system`,
|
|
||||||
role: 'api'
|
|
||||||
};
|
|
||||||
|
|
||||||
next();
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Error verifying API token:', error);
|
|
||||||
return res.status(500).json({ error: 'Authentication failed' });
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Require specific role(s) to access endpoint.
|
* Require specific role(s) to access endpoint.
|
||||||
*
|
*
|
||||||
|
|||||||
141
backend/src/db/auto-migrate.ts
Normal file
141
backend/src/db/auto-migrate.ts
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
/**
|
||||||
|
* Auto-Migration System
|
||||||
|
*
|
||||||
|
* Runs SQL migration files from the migrations/ folder automatically on server startup.
|
||||||
|
* Uses a schema_migrations table to track which migrations have been applied.
|
||||||
|
*
|
||||||
|
* Safe to run multiple times - only applies new migrations.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import fs from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
|
||||||
|
const MIGRATIONS_DIR = path.join(__dirname, '../../migrations');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure schema_migrations table exists
|
||||||
|
*/
|
||||||
|
async function ensureMigrationsTable(pool: Pool): Promise<void> {
|
||||||
|
await pool.query(`
|
||||||
|
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get list of already-applied migrations
|
||||||
|
*/
|
||||||
|
async function getAppliedMigrations(pool: Pool): Promise<Set<string>> {
|
||||||
|
const result = await pool.query('SELECT name FROM schema_migrations');
|
||||||
|
return new Set(result.rows.map(row => row.name));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get list of migration files from disk
|
||||||
|
*/
|
||||||
|
function getMigrationFiles(): string[] {
|
||||||
|
if (!fs.existsSync(MIGRATIONS_DIR)) {
|
||||||
|
console.log('[AutoMigrate] No migrations directory found');
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return fs.readdirSync(MIGRATIONS_DIR)
|
||||||
|
.filter(f => f.endsWith('.sql'))
|
||||||
|
.sort(); // Sort alphabetically (001_, 002_, etc.)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run a single migration file
|
||||||
|
*/
|
||||||
|
async function runMigration(pool: Pool, filename: string): Promise<void> {
|
||||||
|
const filepath = path.join(MIGRATIONS_DIR, filename);
|
||||||
|
const sql = fs.readFileSync(filepath, 'utf8');
|
||||||
|
|
||||||
|
const client = await pool.connect();
|
||||||
|
try {
|
||||||
|
await client.query('BEGIN');
|
||||||
|
|
||||||
|
// Run the migration SQL
|
||||||
|
await client.query(sql);
|
||||||
|
|
||||||
|
// Record that this migration was applied
|
||||||
|
await client.query(
|
||||||
|
'INSERT INTO schema_migrations (name) VALUES ($1) ON CONFLICT (name) DO NOTHING',
|
||||||
|
[filename]
|
||||||
|
);
|
||||||
|
|
||||||
|
await client.query('COMMIT');
|
||||||
|
console.log(`[AutoMigrate] ✓ Applied: ${filename}`);
|
||||||
|
} catch (error: any) {
|
||||||
|
await client.query('ROLLBACK');
|
||||||
|
console.error(`[AutoMigrate] ✗ Failed: ${filename}`);
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run all pending migrations
|
||||||
|
*
|
||||||
|
* @param pool - Database connection pool
|
||||||
|
* @returns Number of migrations applied
|
||||||
|
*/
|
||||||
|
export async function runAutoMigrations(pool: Pool): Promise<number> {
|
||||||
|
console.log('[AutoMigrate] Checking for pending migrations...');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Ensure migrations table exists
|
||||||
|
await ensureMigrationsTable(pool);
|
||||||
|
|
||||||
|
// Get applied and available migrations
|
||||||
|
const applied = await getAppliedMigrations(pool);
|
||||||
|
const available = getMigrationFiles();
|
||||||
|
|
||||||
|
// Find pending migrations
|
||||||
|
const pending = available.filter(f => !applied.has(f));
|
||||||
|
|
||||||
|
if (pending.length === 0) {
|
||||||
|
console.log('[AutoMigrate] No pending migrations');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[AutoMigrate] Found ${pending.length} pending migrations`);
|
||||||
|
|
||||||
|
// Run each pending migration in order
|
||||||
|
for (const filename of pending) {
|
||||||
|
await runMigration(pool, filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[AutoMigrate] Successfully applied ${pending.length} migrations`);
|
||||||
|
return pending.length;
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[AutoMigrate] Migration failed:', error.message);
|
||||||
|
// Don't crash the server - log and continue
|
||||||
|
// The specific failing migration will have been rolled back
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check migration status without running anything
|
||||||
|
*/
|
||||||
|
export async function checkMigrationStatus(pool: Pool): Promise<{
|
||||||
|
applied: string[];
|
||||||
|
pending: string[];
|
||||||
|
}> {
|
||||||
|
await ensureMigrationsTable(pool);
|
||||||
|
|
||||||
|
const applied = await getAppliedMigrations(pool);
|
||||||
|
const available = getMigrationFiles();
|
||||||
|
|
||||||
|
return {
|
||||||
|
applied: available.filter(f => applied.has(f)),
|
||||||
|
pending: available.filter(f => !applied.has(f)),
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -372,6 +372,51 @@ async function runMigrations() {
|
|||||||
ON CONFLICT (key) DO NOTHING;
|
ON CONFLICT (key) DO NOTHING;
|
||||||
`);
|
`);
|
||||||
|
|
||||||
|
// SEO Pages table
|
||||||
|
await client.query(`
|
||||||
|
CREATE TABLE IF NOT EXISTS seo_pages (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
type VARCHAR(50) NOT NULL,
|
||||||
|
slug VARCHAR(255) NOT NULL UNIQUE,
|
||||||
|
page_key VARCHAR(255) NOT NULL,
|
||||||
|
primary_keyword VARCHAR(255),
|
||||||
|
status VARCHAR(50) DEFAULT 'pending_generation',
|
||||||
|
data_source VARCHAR(100),
|
||||||
|
meta_title VARCHAR(255),
|
||||||
|
meta_description TEXT,
|
||||||
|
last_generated_at TIMESTAMPTZ,
|
||||||
|
last_reviewed_at TIMESTAMPTZ,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_seo_pages_type ON seo_pages(type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_seo_pages_status ON seo_pages(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_seo_pages_slug ON seo_pages(slug);
|
||||||
|
`);
|
||||||
|
|
||||||
|
// SEO Page Contents table
|
||||||
|
await client.query(`
|
||||||
|
CREATE TABLE IF NOT EXISTS seo_page_contents (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
page_id INTEGER NOT NULL REFERENCES seo_pages(id) ON DELETE CASCADE,
|
||||||
|
version INTEGER DEFAULT 1,
|
||||||
|
blocks JSONB NOT NULL DEFAULT '[]',
|
||||||
|
meta JSONB NOT NULL DEFAULT '{}',
|
||||||
|
meta_title VARCHAR(255),
|
||||||
|
meta_description TEXT,
|
||||||
|
h1 VARCHAR(255),
|
||||||
|
canonical_url TEXT,
|
||||||
|
og_title VARCHAR(255),
|
||||||
|
og_description TEXT,
|
||||||
|
og_image_url TEXT,
|
||||||
|
generated_by VARCHAR(50) DEFAULT 'claude',
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
UNIQUE(page_id, version)
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_seo_page_contents_page ON seo_page_contents(page_id);
|
||||||
|
`);
|
||||||
|
|
||||||
await client.query('COMMIT');
|
await client.query('COMMIT');
|
||||||
console.log('✅ Migrations completed successfully');
|
console.log('✅ Migrations completed successfully');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|||||||
200
backend/src/db/run-migrations.ts
Normal file
200
backend/src/db/run-migrations.ts
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
#!/usr/bin/env npx tsx
|
||||||
|
/**
|
||||||
|
* Database Migration Runner
|
||||||
|
*
|
||||||
|
* Runs SQL migrations from backend/migrations/*.sql in order.
|
||||||
|
* Tracks applied migrations in schema_migrations table.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/db/run-migrations.ts
|
||||||
|
*
|
||||||
|
* Environment:
|
||||||
|
* DATABASE_URL or CANNAIQ_DB_* variables
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import * as fs from 'fs/promises';
|
||||||
|
import * as path from 'path';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
function getConnectionString(): string {
|
||||||
|
if (process.env.DATABASE_URL) {
|
||||||
|
return process.env.DATABASE_URL;
|
||||||
|
}
|
||||||
|
if (process.env.CANNAIQ_DB_URL) {
|
||||||
|
return process.env.CANNAIQ_DB_URL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||||
|
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||||
|
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||||
|
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||||
|
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||||
|
|
||||||
|
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MigrationFile {
|
||||||
|
filename: string;
|
||||||
|
number: number;
|
||||||
|
path: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getMigrationFiles(migrationsDir: string): Promise<MigrationFile[]> {
|
||||||
|
const files = await fs.readdir(migrationsDir);
|
||||||
|
|
||||||
|
const migrations: MigrationFile[] = files
|
||||||
|
.filter(f => f.endsWith('.sql'))
|
||||||
|
.map(filename => {
|
||||||
|
// Extract number from filename like "005_api_tokens.sql" or "073_proxy_timezone.sql"
|
||||||
|
const match = filename.match(/^(\d+)_/);
|
||||||
|
if (!match) return null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
filename,
|
||||||
|
number: parseInt(match[1], 10),
|
||||||
|
path: path.join(migrationsDir, filename),
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter((m): m is MigrationFile => m !== null)
|
||||||
|
.sort((a, b) => a.number - b.number);
|
||||||
|
|
||||||
|
return migrations;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function ensureMigrationsTable(pool: Pool): Promise<void> {
|
||||||
|
// Migrate to filename-based tracking (handles duplicate version numbers)
|
||||||
|
// Check if old version-based PK exists
|
||||||
|
const pkCheck = await pool.query(`
|
||||||
|
SELECT constraint_name FROM information_schema.table_constraints
|
||||||
|
WHERE table_name = 'schema_migrations' AND constraint_type = 'PRIMARY KEY'
|
||||||
|
`);
|
||||||
|
|
||||||
|
if (pkCheck.rows.length === 0) {
|
||||||
|
// Table doesn't exist, create with filename as PK
|
||||||
|
await pool.query(`
|
||||||
|
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||||
|
filename VARCHAR(255) NOT NULL PRIMARY KEY,
|
||||||
|
version VARCHAR(10),
|
||||||
|
name VARCHAR(255),
|
||||||
|
applied_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
} else {
|
||||||
|
// Table exists - add filename column if missing
|
||||||
|
await pool.query(`
|
||||||
|
ALTER TABLE schema_migrations ADD COLUMN IF NOT EXISTS filename VARCHAR(255)
|
||||||
|
`);
|
||||||
|
// Populate filename from version+name for existing rows
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE schema_migrations SET filename = version || '_' || name || '.sql'
|
||||||
|
WHERE filename IS NULL
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getAppliedMigrations(pool: Pool): Promise<Set<string>> {
|
||||||
|
// Try filename first, fall back to version_name combo
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT COALESCE(filename, version || '_' || name || '.sql') as filename
|
||||||
|
FROM schema_migrations
|
||||||
|
`);
|
||||||
|
return new Set(result.rows.map(r => r.filename));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function applyMigration(pool: Pool, migration: MigrationFile): Promise<void> {
|
||||||
|
const sql = await fs.readFile(migration.path, 'utf-8');
|
||||||
|
|
||||||
|
// Extract version and name from filename like "005_api_tokens.sql"
|
||||||
|
const version = String(migration.number).padStart(3, '0');
|
||||||
|
const name = migration.filename.replace(/^\d+_/, '').replace(/\.sql$/, '');
|
||||||
|
|
||||||
|
const client = await pool.connect();
|
||||||
|
try {
|
||||||
|
await client.query('BEGIN');
|
||||||
|
|
||||||
|
// Run the migration SQL
|
||||||
|
await client.query(sql);
|
||||||
|
|
||||||
|
// Record that it was applied - use INSERT with ON CONFLICT for safety
|
||||||
|
await client.query(`
|
||||||
|
INSERT INTO schema_migrations (filename, version, name)
|
||||||
|
VALUES ($1, $2, $3)
|
||||||
|
ON CONFLICT DO NOTHING
|
||||||
|
`, [migration.filename, version, name]);
|
||||||
|
|
||||||
|
await client.query('COMMIT');
|
||||||
|
} catch (error) {
|
||||||
|
await client.query('ROLLBACK');
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const pool = new Pool({ connectionString: getConnectionString() });
|
||||||
|
|
||||||
|
// Migrations directory relative to this file
|
||||||
|
const migrationsDir = path.resolve(__dirname, '../../migrations');
|
||||||
|
|
||||||
|
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||||
|
console.log('║ DATABASE MIGRATION RUNNER ║');
|
||||||
|
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||||
|
console.log(`Migrations dir: ${migrationsDir}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Ensure tracking table exists
|
||||||
|
await ensureMigrationsTable(pool);
|
||||||
|
|
||||||
|
// Get all migration files
|
||||||
|
const allMigrations = await getMigrationFiles(migrationsDir);
|
||||||
|
console.log(`Found ${allMigrations.length} migration files`);
|
||||||
|
|
||||||
|
// Get already-applied migrations
|
||||||
|
const applied = await getAppliedMigrations(pool);
|
||||||
|
console.log(`Already applied: ${applied.size} migrations`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Find pending migrations (compare by filename)
|
||||||
|
const pending = allMigrations.filter(m => !applied.has(m.filename));
|
||||||
|
|
||||||
|
if (pending.length === 0) {
|
||||||
|
console.log('✅ No pending migrations. Database is up to date.');
|
||||||
|
await pool.end();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Pending migrations: ${pending.length}`);
|
||||||
|
console.log('─'.repeat(60));
|
||||||
|
|
||||||
|
// Apply each pending migration
|
||||||
|
for (const migration of pending) {
|
||||||
|
process.stdout.write(` ${migration.filename}... `);
|
||||||
|
try {
|
||||||
|
await applyMigration(pool, migration);
|
||||||
|
console.log('✅');
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log('❌');
|
||||||
|
console.error(`\nError applying ${migration.filename}:`);
|
||||||
|
console.error(error.message);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('');
|
||||||
|
console.log('═'.repeat(60));
|
||||||
|
console.log(`✅ Applied ${pending.length} migrations successfully`);
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Migration runner failed:', error.message);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
@@ -3,14 +3,23 @@
|
|||||||
*
|
*
|
||||||
* Main orchestrator for the Dutchie store discovery pipeline.
|
* Main orchestrator for the Dutchie store discovery pipeline.
|
||||||
*
|
*
|
||||||
* Flow:
|
* AUTOMATED FLOW (as of 2025-01):
|
||||||
* 1. Discover cities from Dutchie (or use seeded cities)
|
* 1. Fetch cities dynamically from Dutchie GraphQL (getAllCitiesByState)
|
||||||
* 2. For each city, discover store locations
|
* 2. For each city, discover store locations via ConsumerDispensaries query
|
||||||
* 3. Upsert all data to discovery tables
|
* 3. Upsert locations to dutchie_discovery_locations (keyed by platform_location_id)
|
||||||
* 4. Admin verifies locations manually
|
* 4. AUTO-VALIDATE: Check required fields (name, city, state, platform_menu_url, platform_location_id)
|
||||||
* 5. Verified locations are promoted to canonical dispensaries
|
* 5. AUTO-PROMOTE: Valid locations are upserted to dispensaries table with crawl_enabled=true
|
||||||
|
* 6. All actions logged to dutchie_promotion_log for audit
|
||||||
*
|
*
|
||||||
* This module does NOT create canonical dispensaries automatically.
|
* Tables involved:
|
||||||
|
* - dutchie_discovery_cities: Known cities for each state
|
||||||
|
* - dutchie_discovery_locations: Raw discovered store data
|
||||||
|
* - dispensaries: Canonical store records (promoted from discovery)
|
||||||
|
* - dutchie_promotion_log: Audit trail for validation/promotion
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/scripts/run-discovery.ts discover:state AZ
|
||||||
|
* npx tsx src/scripts/run-discovery.ts discover:state CA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Pool } from 'pg';
|
import { Pool } from 'pg';
|
||||||
@@ -24,11 +33,12 @@ import {
|
|||||||
getCitiesToCrawl,
|
getCitiesToCrawl,
|
||||||
getCityBySlug,
|
getCityBySlug,
|
||||||
seedKnownCities,
|
seedKnownCities,
|
||||||
ARIZONA_CITIES,
|
|
||||||
} from './city-discovery';
|
} from './city-discovery';
|
||||||
import {
|
import {
|
||||||
discoverLocationsForCity,
|
discoverLocationsForCity,
|
||||||
|
getCitiesForState,
|
||||||
} from './location-discovery';
|
} from './location-discovery';
|
||||||
|
import { promoteDiscoveredLocations } from './promotion';
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// FULL DISCOVERY
|
// FULL DISCOVERY
|
||||||
@@ -162,12 +172,161 @@ export async function runFullDiscovery(
|
|||||||
console.log(`Errors: ${totalErrors}`);
|
console.log(`Errors: ${totalErrors}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||||
|
let newDispensaryIds: number[] = [];
|
||||||
|
|
||||||
|
// Step 4: Auto-validate and promote discovered locations
|
||||||
|
if (!dryRun && totalLocationsUpserted > 0) {
|
||||||
|
console.log('\n[Discovery] Step 4: Auto-promoting discovered locations...');
|
||||||
|
const promotionResult = await promoteDiscoveredLocations(stateCode, false);
|
||||||
|
console.log(`[Discovery] Promotion complete:`);
|
||||||
|
console.log(` Created: ${promotionResult.created} new dispensaries`);
|
||||||
|
console.log(` Updated: ${promotionResult.updated} existing dispensaries`);
|
||||||
|
console.log(` Rejected: ${promotionResult.rejected} (validation failed)`);
|
||||||
|
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Capture new IDs for task chaining
|
||||||
|
newDispensaryIds = promotionResult.newDispensaryIds;
|
||||||
|
if (newDispensaryIds.length > 0) {
|
||||||
|
console.log(` New store IDs for crawl: [${newDispensaryIds.join(', ')}]`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (promotionResult.rejectedRecords.length > 0) {
|
||||||
|
console.log(` Rejection reasons:`);
|
||||||
|
promotionResult.rejectedRecords.slice(0, 5).forEach(r => {
|
||||||
|
console.log(` - ${r.name}: ${r.errors.join(', ')}`);
|
||||||
|
});
|
||||||
|
if (promotionResult.rejectedRecords.length > 5) {
|
||||||
|
console.log(` ... and ${promotionResult.rejectedRecords.length - 5} more`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 5: Detect dropped stores (in DB but not in discovery results)
|
||||||
|
if (!dryRun) {
|
||||||
|
console.log('\n[Discovery] Step 5: Detecting dropped stores...');
|
||||||
|
const droppedResult = await detectDroppedStores(pool, stateCode);
|
||||||
|
if (droppedResult.droppedCount > 0) {
|
||||||
|
console.log(`[Discovery] Found ${droppedResult.droppedCount} dropped stores:`);
|
||||||
|
droppedResult.droppedStores.slice(0, 10).forEach(s => {
|
||||||
|
console.log(` - ${s.name} (${s.city}, ${s.state}) - last seen: ${s.lastSeenAt}`);
|
||||||
|
});
|
||||||
|
if (droppedResult.droppedCount > 10) {
|
||||||
|
console.log(` ... and ${droppedResult.droppedCount - 10} more`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(`[Discovery] No dropped stores detected`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
cities: cityResult,
|
cities: cityResult,
|
||||||
locations: locationResults,
|
locations: locationResults,
|
||||||
totalLocationsFound,
|
totalLocationsFound,
|
||||||
totalLocationsUpserted,
|
totalLocationsUpserted,
|
||||||
durationMs,
|
durationMs,
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
|
||||||
|
newDispensaryIds,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// DROPPED STORE DETECTION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface DroppedStoreResult {
|
||||||
|
droppedCount: number;
|
||||||
|
droppedStores: Array<{
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
city: string;
|
||||||
|
state: string;
|
||||||
|
platformDispensaryId: string;
|
||||||
|
lastSeenAt: string;
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect stores that exist in dispensaries but were not found in discovery.
|
||||||
|
* Marks them as status='dropped' for manual review.
|
||||||
|
*
|
||||||
|
* A store is considered "dropped" if:
|
||||||
|
* 1. It has a platform_dispensary_id (was verified via Dutchie)
|
||||||
|
* 2. It was NOT seen in the latest discovery crawl (last_seen_at in discovery < 24h ago)
|
||||||
|
* 3. It's currently marked as 'open' status
|
||||||
|
*/
|
||||||
|
export async function detectDroppedStores(
|
||||||
|
pool: Pool,
|
||||||
|
stateCode?: string
|
||||||
|
): Promise<DroppedStoreResult> {
|
||||||
|
// Find dispensaries that:
|
||||||
|
// 1. Have platform_dispensary_id (verified Dutchie stores)
|
||||||
|
// 2. Are currently 'open' status
|
||||||
|
// 3. Have a linked discovery record that wasn't seen in the last discovery run
|
||||||
|
// (last_seen_at in dutchie_discovery_locations is older than 24 hours)
|
||||||
|
const params: any[] = [];
|
||||||
|
let stateFilter = '';
|
||||||
|
|
||||||
|
if (stateCode) {
|
||||||
|
stateFilter = ` AND d.state = $1`;
|
||||||
|
params.push(stateCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
const query = `
|
||||||
|
WITH recently_seen AS (
|
||||||
|
SELECT DISTINCT platform_location_id
|
||||||
|
FROM dutchie_discovery_locations
|
||||||
|
WHERE last_seen_at > NOW() - INTERVAL '24 hours'
|
||||||
|
AND active = true
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
d.id,
|
||||||
|
d.name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
d.platform_dispensary_id,
|
||||||
|
d.updated_at as last_seen_at
|
||||||
|
FROM dispensaries d
|
||||||
|
WHERE d.platform_dispensary_id IS NOT NULL
|
||||||
|
AND d.platform = 'dutchie'
|
||||||
|
AND (d.status = 'open' OR d.status IS NULL)
|
||||||
|
AND d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id NOT IN (SELECT platform_location_id FROM recently_seen)
|
||||||
|
${stateFilter}
|
||||||
|
ORDER BY d.name
|
||||||
|
`;
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
const droppedStores = result.rows;
|
||||||
|
|
||||||
|
// Mark these stores as 'dropped' status
|
||||||
|
if (droppedStores.length > 0) {
|
||||||
|
const ids = droppedStores.map(s => s.id);
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET status = 'dropped', updated_at = NOW()
|
||||||
|
WHERE id = ANY($1::int[])
|
||||||
|
`, [ids]);
|
||||||
|
|
||||||
|
// Log to promotion log for audit
|
||||||
|
for (const store of droppedStores) {
|
||||||
|
await pool.query(`
|
||||||
|
INSERT INTO dutchie_promotion_log
|
||||||
|
(dispensary_id, action, state_code, store_name, triggered_by)
|
||||||
|
VALUES ($1, 'dropped', $2, $3, 'discovery_detection')
|
||||||
|
`, [store.id, store.state, store.name]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
droppedCount: droppedStores.length,
|
||||||
|
droppedStores: droppedStores.map(s => ({
|
||||||
|
id: s.id,
|
||||||
|
name: s.name,
|
||||||
|
city: s.city,
|
||||||
|
state: s.state,
|
||||||
|
platformDispensaryId: s.platform_dispensary_id,
|
||||||
|
lastSeenAt: s.last_seen_at,
|
||||||
|
})),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -235,11 +394,19 @@ export async function discoverState(
|
|||||||
|
|
||||||
console.log(`[Discovery] Discovering state: ${stateCode}`);
|
console.log(`[Discovery] Discovering state: ${stateCode}`);
|
||||||
|
|
||||||
// Seed known cities for this state
|
// Dynamically fetch and seed cities for this state
|
||||||
if (stateCode === 'AZ') {
|
console.log(`[Discovery] Fetching cities for ${stateCode} from Dutchie...`);
|
||||||
console.log('[Discovery] Seeding Arizona cities...');
|
const cityNames = await getCitiesForState(stateCode);
|
||||||
const seeded = await seedKnownCities(pool, ARIZONA_CITIES);
|
if (cityNames.length > 0) {
|
||||||
console.log(`[Discovery] Seeded ${seeded.created} new cities, ${seeded.updated} updated`);
|
const cities = cityNames.map(name => ({
|
||||||
|
name,
|
||||||
|
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
|
||||||
|
stateCode,
|
||||||
|
}));
|
||||||
|
const seeded = await seedKnownCities(pool, cities);
|
||||||
|
console.log(`[Discovery] Seeded ${seeded.created} new cities, ${seeded.updated} updated for ${stateCode}`);
|
||||||
|
} else {
|
||||||
|
console.log(`[Discovery] No cities found for ${stateCode}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run full discovery for this state
|
// Run full discovery for this state
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ export {
|
|||||||
getCitiesToCrawl,
|
getCitiesToCrawl,
|
||||||
getCityBySlug,
|
getCityBySlug,
|
||||||
seedKnownCities,
|
seedKnownCities,
|
||||||
ARIZONA_CITIES,
|
|
||||||
} from './city-discovery';
|
} from './city-discovery';
|
||||||
|
|
||||||
// Location Discovery
|
// Location Discovery
|
||||||
@@ -33,5 +32,17 @@ export {
|
|||||||
DiscoveryStats,
|
DiscoveryStats,
|
||||||
} from './discovery-crawler';
|
} from './discovery-crawler';
|
||||||
|
|
||||||
|
// Promotion
|
||||||
|
export {
|
||||||
|
validateForPromotion,
|
||||||
|
validateDiscoveredLocations,
|
||||||
|
promoteDiscoveredLocations,
|
||||||
|
promoteSingleLocation,
|
||||||
|
ValidationResult,
|
||||||
|
ValidationSummary,
|
||||||
|
PromotionResult,
|
||||||
|
PromotionSummary,
|
||||||
|
} from './promotion';
|
||||||
|
|
||||||
// Routes
|
// Routes
|
||||||
export { createDiscoveryRoutes } from './routes';
|
export { createDiscoveryRoutes } from './routes';
|
||||||
|
|||||||
@@ -134,10 +134,10 @@ export interface StateWithCities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetch all states with their cities from Dutchie's __NEXT_DATA__
|
* Fetch all states with their cities via direct GraphQL query
|
||||||
*
|
*
|
||||||
* This fetches a city page and extracts the statesWithDispensaries data
|
* Uses the getAllCitiesByState persisted query which returns all states
|
||||||
* which contains all states and their cities where Dutchie has dispensaries.
|
* and cities where Dutchie has dispensaries.
|
||||||
*/
|
*/
|
||||||
export async function fetchStatesWithDispensaries(
|
export async function fetchStatesWithDispensaries(
|
||||||
options: { verbose?: boolean } = {}
|
options: { verbose?: boolean } = {}
|
||||||
@@ -147,84 +147,53 @@ export async function fetchStatesWithDispensaries(
|
|||||||
// Initialize proxy if USE_PROXY=true
|
// Initialize proxy if USE_PROXY=true
|
||||||
await initDiscoveryProxy();
|
await initDiscoveryProxy();
|
||||||
|
|
||||||
console.log('[LocationDiscovery] Fetching statesWithDispensaries from Dutchie...');
|
console.log('[LocationDiscovery] Fetching statesWithDispensaries via GraphQL...');
|
||||||
|
|
||||||
// Fetch any city page to get the __NEXT_DATA__ with statesWithDispensaries
|
try {
|
||||||
// Using a known city that's likely to exist
|
// Use direct GraphQL query - much cleaner than scraping __NEXT_DATA__
|
||||||
const result = await fetchPage('/dispensaries/az/phoenix', { maxRetries: 3 });
|
const result = await executeGraphQL(
|
||||||
|
'getAllCitiesByState',
|
||||||
|
{}, // No variables needed
|
||||||
|
GRAPHQL_HASHES.GetAllCitiesByState,
|
||||||
|
{ maxRetries: 3, retryOn403: true }
|
||||||
|
);
|
||||||
|
|
||||||
if (!result || result.status !== 200) {
|
const statesData = result?.data?.statesWithDispensaries;
|
||||||
console.error('[LocationDiscovery] Failed to fetch city page');
|
if (!Array.isArray(statesData)) {
|
||||||
return [];
|
console.error('[LocationDiscovery] statesWithDispensaries not found in response');
|
||||||
}
|
return [];
|
||||||
|
|
||||||
const nextData = extractNextData(result.html);
|
|
||||||
if (!nextData) {
|
|
||||||
console.error('[LocationDiscovery] No __NEXT_DATA__ found');
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract statesWithDispensaries from Apollo state
|
|
||||||
const apolloState = nextData.props?.pageProps?.initialApolloState;
|
|
||||||
if (!apolloState) {
|
|
||||||
console.error('[LocationDiscovery] No initialApolloState found');
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find ROOT_QUERY.statesWithDispensaries
|
|
||||||
const rootQuery = apolloState['ROOT_QUERY'];
|
|
||||||
if (!rootQuery) {
|
|
||||||
console.error('[LocationDiscovery] No ROOT_QUERY found');
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// The statesWithDispensaries is at ROOT_QUERY.statesWithDispensaries
|
|
||||||
const statesRefs = rootQuery.statesWithDispensaries;
|
|
||||||
if (!Array.isArray(statesRefs)) {
|
|
||||||
console.error('[LocationDiscovery] statesWithDispensaries not found or not an array');
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resolve the references to actual state data
|
|
||||||
const states: StateWithCities[] = [];
|
|
||||||
for (const ref of statesRefs) {
|
|
||||||
// ref might be { __ref: "StateWithDispensaries:0" } or direct object
|
|
||||||
let stateData: any;
|
|
||||||
|
|
||||||
if (ref && ref.__ref) {
|
|
||||||
stateData = apolloState[ref.__ref];
|
|
||||||
} else {
|
|
||||||
stateData = ref;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stateData && stateData.name) {
|
// Map to our StateWithCities format
|
||||||
// Parse cities JSON array if it's a string
|
const states: StateWithCities[] = [];
|
||||||
let cities = stateData.cities;
|
for (const state of statesData) {
|
||||||
if (typeof cities === 'string') {
|
if (state && state.name) {
|
||||||
try {
|
// Filter out null cities
|
||||||
cities = JSON.parse(cities);
|
const cities = Array.isArray(state.cities)
|
||||||
} catch {
|
? state.cities.filter((c: string | null) => c !== null)
|
||||||
cities = [];
|
: [];
|
||||||
}
|
|
||||||
|
states.push({
|
||||||
|
name: state.name,
|
||||||
|
country: state.country || 'US',
|
||||||
|
cities,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
states.push({
|
|
||||||
name: stateData.name,
|
|
||||||
country: stateData.country || 'US',
|
|
||||||
cities: Array.isArray(cities) ? cities : [],
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
console.log(`[LocationDiscovery] Found ${states.length} states`);
|
console.log(`[LocationDiscovery] Found ${states.length} states`);
|
||||||
for (const state of states) {
|
for (const state of states) {
|
||||||
console.log(` ${state.name}: ${state.cities.length} cities`);
|
console.log(` ${state.name}: ${state.cities.length} cities`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
|
console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
|
||||||
return states;
|
return states;
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`[LocationDiscovery] Failed to fetch states: ${error.message}`);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -751,31 +720,57 @@ async function scrapeLocationCards(
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Normalize a raw location response to a consistent format.
|
* Normalize a raw location response to a consistent format.
|
||||||
|
* Maps Dutchie camelCase fields to our snake_case equivalents.
|
||||||
*/
|
*/
|
||||||
function normalizeLocationResponse(raw: any): DutchieLocationResponse {
|
function normalizeLocationResponse(raw: any): DutchieLocationResponse {
|
||||||
const slug = raw.slug || raw.cName || raw.urlSlug || '';
|
const slug = raw.slug || raw.cName || raw.urlSlug || '';
|
||||||
const id = raw.id || raw._id || raw.dispensaryId || '';
|
const id = raw.id || raw._id || raw.dispensaryId || '';
|
||||||
|
|
||||||
|
// Extract location data - GraphQL response nests address info in .location
|
||||||
|
const loc = raw.location || {};
|
||||||
|
|
||||||
|
// Extract coordinates from geometry.coordinates [longitude, latitude]
|
||||||
|
const coords = loc.geometry?.coordinates || [];
|
||||||
|
const longitude = coords[0] || raw.longitude || raw.lng || loc.longitude || loc.lng;
|
||||||
|
const latitude = coords[1] || raw.latitude || raw.lat || loc.latitude || loc.lat;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id,
|
id,
|
||||||
name: raw.name || raw.dispensaryName || '',
|
name: raw.name || raw.dispensaryName || '',
|
||||||
slug,
|
slug,
|
||||||
address: raw.address || raw.fullAddress || '',
|
cName: raw.cName || raw.slug || '',
|
||||||
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || '',
|
address: raw.address || raw.fullAddress || loc.ln1 || '',
|
||||||
address2: raw.address2 || raw.addressLine2 || '',
|
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || loc.ln1 || '',
|
||||||
city: raw.city || '',
|
address2: raw.address2 || raw.addressLine2 || loc.ln2 || '',
|
||||||
state: raw.state || raw.stateCode || '',
|
city: raw.city || loc.city || '',
|
||||||
zip: raw.zip || raw.zipCode || raw.postalCode || '',
|
state: raw.state || raw.stateCode || loc.state || '',
|
||||||
country: raw.country || raw.countryCode || 'US',
|
zip: raw.zip || raw.zipCode || raw.postalCode || loc.zipcode || loc.zip || '',
|
||||||
latitude: raw.latitude || raw.lat || raw.location?.latitude,
|
country: raw.country || raw.countryCode || loc.country || 'United States',
|
||||||
longitude: raw.longitude || raw.lng || raw.location?.longitude,
|
latitude,
|
||||||
|
longitude,
|
||||||
timezone: raw.timezone || raw.tz || '',
|
timezone: raw.timezone || raw.tz || '',
|
||||||
menuUrl: raw.menuUrl || (slug ? `https://dutchie.com/dispensary/${slug}` : ''),
|
menuUrl: raw.menuUrl || (slug ? `https://dutchie.com/dispensary/${slug}` : ''),
|
||||||
retailType: raw.retailType || raw.type || '',
|
retailType: raw.retailType || raw.type || '',
|
||||||
|
// Service offerings
|
||||||
offerPickup: raw.offerPickup ?? raw.storeSettings?.offerPickup ?? true,
|
offerPickup: raw.offerPickup ?? raw.storeSettings?.offerPickup ?? true,
|
||||||
offerDelivery: raw.offerDelivery ?? raw.storeSettings?.offerDelivery ?? false,
|
offerDelivery: raw.offerDelivery ?? raw.storeSettings?.offerDelivery ?? false,
|
||||||
isRecreational: raw.isRecreational ?? raw.retailType?.includes('Recreational') ?? true,
|
offerCurbsidePickup: raw.offerCurbsidePickup ?? false,
|
||||||
isMedical: raw.isMedical ?? raw.retailType?.includes('Medical') ?? true,
|
// License types
|
||||||
|
isRecreational: raw.isRecreational ?? raw.recDispensary ?? raw.retailType?.includes('Recreational') ?? true,
|
||||||
|
isMedical: raw.isMedical ?? raw.medicalDispensary ?? raw.retailType?.includes('Medical') ?? true,
|
||||||
|
// Contact info
|
||||||
|
phone: raw.phone || '',
|
||||||
|
email: raw.email || '',
|
||||||
|
website: raw.embedBackUrl || '',
|
||||||
|
// Branding
|
||||||
|
description: raw.description || '',
|
||||||
|
logoImage: raw.logoImage || '',
|
||||||
|
bannerImage: raw.bannerImage || '',
|
||||||
|
// Chain/enterprise info
|
||||||
|
chainSlug: raw.chain || '',
|
||||||
|
enterpriseId: raw.retailer?.enterpriseId || '',
|
||||||
|
// Status
|
||||||
|
status: raw.status || '',
|
||||||
// Preserve raw data
|
// Preserve raw data
|
||||||
...raw,
|
...raw,
|
||||||
};
|
};
|
||||||
@@ -826,15 +821,27 @@ export async function upsertLocation(
|
|||||||
offers_pickup,
|
offers_pickup,
|
||||||
is_recreational,
|
is_recreational,
|
||||||
is_medical,
|
is_medical,
|
||||||
|
phone,
|
||||||
|
website,
|
||||||
|
email,
|
||||||
|
description,
|
||||||
|
logo_image,
|
||||||
|
banner_image,
|
||||||
|
chain_slug,
|
||||||
|
enterprise_id,
|
||||||
|
c_name,
|
||||||
|
country,
|
||||||
|
store_status,
|
||||||
last_seen_at,
|
last_seen_at,
|
||||||
updated_at
|
updated_at
|
||||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, NOW(), NOW())
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, NOW(), NOW())
|
||||||
ON CONFLICT (platform, platform_location_id)
|
ON CONFLICT (platform, platform_location_id)
|
||||||
DO UPDATE SET
|
DO UPDATE SET
|
||||||
name = EXCLUDED.name,
|
name = EXCLUDED.name,
|
||||||
platform_menu_url = EXCLUDED.platform_menu_url,
|
platform_menu_url = EXCLUDED.platform_menu_url,
|
||||||
raw_address = COALESCE(EXCLUDED.raw_address, dutchie_discovery_locations.raw_address),
|
raw_address = COALESCE(EXCLUDED.raw_address, dutchie_discovery_locations.raw_address),
|
||||||
address_line1 = COALESCE(EXCLUDED.address_line1, dutchie_discovery_locations.address_line1),
|
address_line1 = COALESCE(EXCLUDED.address_line1, dutchie_discovery_locations.address_line1),
|
||||||
|
address_line2 = COALESCE(EXCLUDED.address_line2, dutchie_discovery_locations.address_line2),
|
||||||
city = COALESCE(EXCLUDED.city, dutchie_discovery_locations.city),
|
city = COALESCE(EXCLUDED.city, dutchie_discovery_locations.city),
|
||||||
state_code = COALESCE(EXCLUDED.state_code, dutchie_discovery_locations.state_code),
|
state_code = COALESCE(EXCLUDED.state_code, dutchie_discovery_locations.state_code),
|
||||||
postal_code = COALESCE(EXCLUDED.postal_code, dutchie_discovery_locations.postal_code),
|
postal_code = COALESCE(EXCLUDED.postal_code, dutchie_discovery_locations.postal_code),
|
||||||
@@ -846,6 +853,17 @@ export async function upsertLocation(
|
|||||||
offers_pickup = COALESCE(EXCLUDED.offers_pickup, dutchie_discovery_locations.offers_pickup),
|
offers_pickup = COALESCE(EXCLUDED.offers_pickup, dutchie_discovery_locations.offers_pickup),
|
||||||
is_recreational = COALESCE(EXCLUDED.is_recreational, dutchie_discovery_locations.is_recreational),
|
is_recreational = COALESCE(EXCLUDED.is_recreational, dutchie_discovery_locations.is_recreational),
|
||||||
is_medical = COALESCE(EXCLUDED.is_medical, dutchie_discovery_locations.is_medical),
|
is_medical = COALESCE(EXCLUDED.is_medical, dutchie_discovery_locations.is_medical),
|
||||||
|
phone = COALESCE(EXCLUDED.phone, dutchie_discovery_locations.phone),
|
||||||
|
website = COALESCE(EXCLUDED.website, dutchie_discovery_locations.website),
|
||||||
|
email = COALESCE(EXCLUDED.email, dutchie_discovery_locations.email),
|
||||||
|
description = COALESCE(EXCLUDED.description, dutchie_discovery_locations.description),
|
||||||
|
logo_image = COALESCE(EXCLUDED.logo_image, dutchie_discovery_locations.logo_image),
|
||||||
|
banner_image = COALESCE(EXCLUDED.banner_image, dutchie_discovery_locations.banner_image),
|
||||||
|
chain_slug = COALESCE(EXCLUDED.chain_slug, dutchie_discovery_locations.chain_slug),
|
||||||
|
enterprise_id = COALESCE(EXCLUDED.enterprise_id, dutchie_discovery_locations.enterprise_id),
|
||||||
|
c_name = COALESCE(EXCLUDED.c_name, dutchie_discovery_locations.c_name),
|
||||||
|
country = COALESCE(EXCLUDED.country, dutchie_discovery_locations.country),
|
||||||
|
store_status = COALESCE(EXCLUDED.store_status, dutchie_discovery_locations.store_status),
|
||||||
last_seen_at = NOW(),
|
last_seen_at = NOW(),
|
||||||
updated_at = NOW()
|
updated_at = NOW()
|
||||||
RETURNING id, (xmax = 0) as is_new`,
|
RETURNING id, (xmax = 0) as is_new`,
|
||||||
@@ -861,7 +879,7 @@ export async function upsertLocation(
|
|||||||
location.city || null,
|
location.city || null,
|
||||||
location.state || null,
|
location.state || null,
|
||||||
location.zip || null,
|
location.zip || null,
|
||||||
location.country || 'US',
|
location.country || 'United States',
|
||||||
location.latitude || null,
|
location.latitude || null,
|
||||||
location.longitude || null,
|
location.longitude || null,
|
||||||
location.timezone || null,
|
location.timezone || null,
|
||||||
@@ -871,6 +889,17 @@ export async function upsertLocation(
|
|||||||
location.offerPickup ?? null,
|
location.offerPickup ?? null,
|
||||||
location.isRecreational ?? null,
|
location.isRecreational ?? null,
|
||||||
location.isMedical ?? null,
|
location.isMedical ?? null,
|
||||||
|
location.phone || null,
|
||||||
|
location.website || null,
|
||||||
|
location.email || null,
|
||||||
|
location.description || null,
|
||||||
|
location.logoImage || null,
|
||||||
|
location.bannerImage || null,
|
||||||
|
location.chainSlug || null,
|
||||||
|
location.enterpriseId || null,
|
||||||
|
location.cName || null,
|
||||||
|
location.country || 'United States',
|
||||||
|
location.status || null,
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
587
backend/src/discovery/promotion.ts
Normal file
587
backend/src/discovery/promotion.ts
Normal file
@@ -0,0 +1,587 @@
|
|||||||
|
/**
|
||||||
|
* Discovery Promotion Service
|
||||||
|
*
|
||||||
|
* Handles the promotion of discovery locations to dispensaries:
|
||||||
|
* 1. Discovery → Raw data in dutchie_discovery_locations (status='discovered')
|
||||||
|
* 2. Validation → Check required fields, reject incomplete records
|
||||||
|
* 3. Promotion → Idempotent upsert to dispensaries, link back via dispensary_id
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
import { DiscoveryLocationRow, DiscoveryStatus } from './types';
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// VALIDATION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface ValidationResult {
|
||||||
|
valid: boolean;
|
||||||
|
errors: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ValidationSummary {
|
||||||
|
totalChecked: number;
|
||||||
|
validCount: number;
|
||||||
|
invalidCount: number;
|
||||||
|
invalidRecords: Array<{
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
errors: string[];
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate a single discovery location has all required fields for promotion
|
||||||
|
*/
|
||||||
|
export function validateForPromotion(loc: DiscoveryLocationRow): ValidationResult {
|
||||||
|
const errors: string[] = [];
|
||||||
|
|
||||||
|
// Required fields
|
||||||
|
if (!loc.platform_location_id) {
|
||||||
|
errors.push('Missing platform_location_id');
|
||||||
|
}
|
||||||
|
if (!loc.name || loc.name.trim() === '') {
|
||||||
|
errors.push('Missing name');
|
||||||
|
}
|
||||||
|
if (!loc.city || loc.city.trim() === '') {
|
||||||
|
errors.push('Missing city');
|
||||||
|
}
|
||||||
|
if (!loc.state_code || loc.state_code.trim() === '') {
|
||||||
|
errors.push('Missing state_code');
|
||||||
|
}
|
||||||
|
if (!loc.platform_menu_url) {
|
||||||
|
errors.push('Missing platform_menu_url');
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
valid: errors.length === 0,
|
||||||
|
errors,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate all discovered locations and return summary
|
||||||
|
*/
|
||||||
|
export async function validateDiscoveredLocations(
|
||||||
|
stateCode?: string
|
||||||
|
): Promise<ValidationSummary> {
|
||||||
|
let query = `
|
||||||
|
SELECT * FROM dutchie_discovery_locations
|
||||||
|
WHERE status = 'discovered'
|
||||||
|
`;
|
||||||
|
const params: string[] = [];
|
||||||
|
|
||||||
|
if (stateCode) {
|
||||||
|
query += ` AND state_code = $1`;
|
||||||
|
params.push(stateCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
const locations = result.rows as DiscoveryLocationRow[];
|
||||||
|
|
||||||
|
const invalidRecords: ValidationSummary['invalidRecords'] = [];
|
||||||
|
let validCount = 0;
|
||||||
|
|
||||||
|
for (const loc of locations) {
|
||||||
|
const validation = validateForPromotion(loc);
|
||||||
|
if (validation.valid) {
|
||||||
|
validCount++;
|
||||||
|
} else {
|
||||||
|
invalidRecords.push({
|
||||||
|
id: loc.id,
|
||||||
|
name: loc.name,
|
||||||
|
errors: validation.errors,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
totalChecked: locations.length,
|
||||||
|
validCount,
|
||||||
|
invalidCount: invalidRecords.length,
|
||||||
|
invalidRecords,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// PROMOTION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface PromotionResult {
|
||||||
|
discoveryId: number;
|
||||||
|
dispensaryId: number;
|
||||||
|
action: 'created' | 'updated' | 'skipped';
|
||||||
|
name: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PromotionSummary {
|
||||||
|
totalProcessed: number;
|
||||||
|
created: number;
|
||||||
|
updated: number;
|
||||||
|
skipped: number;
|
||||||
|
rejected: number;
|
||||||
|
results: PromotionResult[];
|
||||||
|
rejectedRecords: Array<{
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
errors: string[];
|
||||||
|
}>;
|
||||||
|
durationMs: number;
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||||
|
newDispensaryIds: number[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a URL-safe slug from name and city
|
||||||
|
*/
|
||||||
|
function generateSlug(name: string, city: string, state: string): string {
|
||||||
|
const base = `${name}-${city}-${state}`
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[^a-z0-9]+/g, '-')
|
||||||
|
.replace(/^-|-$/g, '')
|
||||||
|
.substring(0, 100);
|
||||||
|
return base;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log a promotion action to dutchie_promotion_log
|
||||||
|
*/
|
||||||
|
async function logPromotionAction(
|
||||||
|
action: string,
|
||||||
|
discoveryId: number | null,
|
||||||
|
dispensaryId: number | null,
|
||||||
|
stateCode: string | null,
|
||||||
|
storeName: string | null,
|
||||||
|
validationErrors: string[] | null = null,
|
||||||
|
fieldChanges: Record<string, any> | null = null,
|
||||||
|
triggeredBy: string = 'auto'
|
||||||
|
): Promise<void> {
|
||||||
|
await pool.query(`
|
||||||
|
INSERT INTO dutchie_promotion_log
|
||||||
|
(discovery_id, dispensary_id, action, state_code, store_name, validation_errors, field_changes, triggered_by)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||||
|
`, [
|
||||||
|
discoveryId,
|
||||||
|
dispensaryId,
|
||||||
|
action,
|
||||||
|
stateCode,
|
||||||
|
storeName,
|
||||||
|
validationErrors,
|
||||||
|
fieldChanges ? JSON.stringify(fieldChanges) : null,
|
||||||
|
triggeredBy,
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a status alert for the dashboard
|
||||||
|
*/
|
||||||
|
export async function createStatusAlert(
|
||||||
|
dispensaryId: number,
|
||||||
|
profileId: number | null,
|
||||||
|
alertType: string,
|
||||||
|
severity: 'info' | 'warning' | 'error' | 'critical',
|
||||||
|
message: string,
|
||||||
|
previousStatus?: string | null,
|
||||||
|
newStatus?: string | null,
|
||||||
|
metadata?: Record<string, any>
|
||||||
|
): Promise<number> {
|
||||||
|
const result = await pool.query(`
|
||||||
|
INSERT INTO crawler_status_alerts
|
||||||
|
(dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||||
|
RETURNING id
|
||||||
|
`, [
|
||||||
|
dispensaryId,
|
||||||
|
profileId,
|
||||||
|
alertType,
|
||||||
|
severity,
|
||||||
|
message,
|
||||||
|
previousStatus || null,
|
||||||
|
newStatus || null,
|
||||||
|
metadata ? JSON.stringify(metadata) : null,
|
||||||
|
]);
|
||||||
|
return result.rows[0].id;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create or update crawler profile for a dispensary with initial sandbox status
|
||||||
|
*/
|
||||||
|
async function ensureCrawlerProfile(
|
||||||
|
dispensaryId: number,
|
||||||
|
dispensaryName: string,
|
||||||
|
platformDispensaryId: string
|
||||||
|
): Promise<{ profileId: number; created: boolean }> {
|
||||||
|
// Check if profile already exists
|
||||||
|
const existingResult = await pool.query(`
|
||||||
|
SELECT id FROM dispensary_crawler_profiles
|
||||||
|
WHERE dispensary_id = $1 AND enabled = true
|
||||||
|
LIMIT 1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (existingResult.rows.length > 0) {
|
||||||
|
return { profileId: existingResult.rows[0].id, created: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new profile with sandbox status
|
||||||
|
const profileKey = dispensaryName
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[^a-z0-9]+/g, '-')
|
||||||
|
.replace(/^-|-$/g, '')
|
||||||
|
.substring(0, 50);
|
||||||
|
|
||||||
|
const insertResult = await pool.query(`
|
||||||
|
INSERT INTO dispensary_crawler_profiles (
|
||||||
|
dispensary_id,
|
||||||
|
profile_name,
|
||||||
|
profile_key,
|
||||||
|
crawler_type,
|
||||||
|
status,
|
||||||
|
status_reason,
|
||||||
|
status_changed_at,
|
||||||
|
config,
|
||||||
|
enabled,
|
||||||
|
consecutive_successes,
|
||||||
|
consecutive_failures,
|
||||||
|
created_at,
|
||||||
|
updated_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2, $3, 'dutchie', 'sandbox', 'Newly promoted from discovery', CURRENT_TIMESTAMP,
|
||||||
|
$4::jsonb, true, 0, 0, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
|
||||||
|
)
|
||||||
|
RETURNING id
|
||||||
|
`, [
|
||||||
|
dispensaryId,
|
||||||
|
dispensaryName,
|
||||||
|
profileKey,
|
||||||
|
JSON.stringify({
|
||||||
|
platformDispensaryId,
|
||||||
|
useBothModes: true,
|
||||||
|
downloadImages: true,
|
||||||
|
trackStock: true,
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const profileId = insertResult.rows[0].id;
|
||||||
|
|
||||||
|
// Create status alert for new sandbox store
|
||||||
|
await createStatusAlert(
|
||||||
|
dispensaryId,
|
||||||
|
profileId,
|
||||||
|
'promoted',
|
||||||
|
'info',
|
||||||
|
`${dispensaryName} promoted to sandbox - awaiting first successful crawl`,
|
||||||
|
null,
|
||||||
|
'sandbox',
|
||||||
|
{ source: 'discovery_promotion', platformDispensaryId }
|
||||||
|
);
|
||||||
|
|
||||||
|
return { profileId, created: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Promote a single discovery location to dispensaries table
|
||||||
|
* Idempotent: uses ON CONFLICT on platform_dispensary_id
|
||||||
|
*/
|
||||||
|
async function promoteLocation(
|
||||||
|
loc: DiscoveryLocationRow
|
||||||
|
): Promise<PromotionResult> {
|
||||||
|
const slug = loc.platform_slug || generateSlug(loc.name, loc.city || '', loc.state_code || '');
|
||||||
|
|
||||||
|
// Upsert into dispensaries
|
||||||
|
// ON CONFLICT by platform_dispensary_id ensures idempotency
|
||||||
|
const upsertResult = await pool.query(`
|
||||||
|
INSERT INTO dispensaries (
|
||||||
|
platform,
|
||||||
|
name,
|
||||||
|
slug,
|
||||||
|
city,
|
||||||
|
state,
|
||||||
|
address1,
|
||||||
|
address2,
|
||||||
|
zipcode,
|
||||||
|
postal_code,
|
||||||
|
phone,
|
||||||
|
website,
|
||||||
|
email,
|
||||||
|
latitude,
|
||||||
|
longitude,
|
||||||
|
timezone,
|
||||||
|
platform_dispensary_id,
|
||||||
|
menu_url,
|
||||||
|
menu_type,
|
||||||
|
description,
|
||||||
|
logo_image,
|
||||||
|
banner_image,
|
||||||
|
offer_pickup,
|
||||||
|
offer_delivery,
|
||||||
|
is_medical,
|
||||||
|
is_recreational,
|
||||||
|
chain_slug,
|
||||||
|
enterprise_id,
|
||||||
|
c_name,
|
||||||
|
country,
|
||||||
|
status,
|
||||||
|
crawl_enabled,
|
||||||
|
dutchie_verified,
|
||||||
|
dutchie_verified_at,
|
||||||
|
dutchie_discovery_id,
|
||||||
|
created_at,
|
||||||
|
updated_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
|
||||||
|
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
|
||||||
|
$21, $22, $23, $24, $25, $26, $27, $28, $29, $30,
|
||||||
|
$31, $32, $33, $34, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
|
||||||
|
)
|
||||||
|
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
|
||||||
|
DO UPDATE SET
|
||||||
|
name = EXCLUDED.name,
|
||||||
|
city = EXCLUDED.city,
|
||||||
|
state = EXCLUDED.state,
|
||||||
|
address1 = EXCLUDED.address1,
|
||||||
|
address2 = EXCLUDED.address2,
|
||||||
|
zipcode = EXCLUDED.zipcode,
|
||||||
|
postal_code = EXCLUDED.postal_code,
|
||||||
|
phone = EXCLUDED.phone,
|
||||||
|
website = EXCLUDED.website,
|
||||||
|
email = EXCLUDED.email,
|
||||||
|
latitude = EXCLUDED.latitude,
|
||||||
|
longitude = EXCLUDED.longitude,
|
||||||
|
timezone = EXCLUDED.timezone,
|
||||||
|
menu_url = EXCLUDED.menu_url,
|
||||||
|
description = EXCLUDED.description,
|
||||||
|
logo_image = EXCLUDED.logo_image,
|
||||||
|
banner_image = EXCLUDED.banner_image,
|
||||||
|
offer_pickup = EXCLUDED.offer_pickup,
|
||||||
|
offer_delivery = EXCLUDED.offer_delivery,
|
||||||
|
is_medical = EXCLUDED.is_medical,
|
||||||
|
is_recreational = EXCLUDED.is_recreational,
|
||||||
|
chain_slug = EXCLUDED.chain_slug,
|
||||||
|
enterprise_id = EXCLUDED.enterprise_id,
|
||||||
|
c_name = EXCLUDED.c_name,
|
||||||
|
country = EXCLUDED.country,
|
||||||
|
status = EXCLUDED.status,
|
||||||
|
dutchie_discovery_id = EXCLUDED.dutchie_discovery_id,
|
||||||
|
updated_at = CURRENT_TIMESTAMP
|
||||||
|
RETURNING id, (xmax = 0) AS inserted
|
||||||
|
`, [
|
||||||
|
loc.platform || 'dutchie', // $1 platform
|
||||||
|
loc.name, // $2 name
|
||||||
|
slug, // $3 slug
|
||||||
|
loc.city, // $4 city
|
||||||
|
loc.state_code, // $5 state
|
||||||
|
loc.address_line1, // $6 address1
|
||||||
|
loc.address_line2, // $7 address2
|
||||||
|
loc.postal_code, // $8 zipcode
|
||||||
|
loc.postal_code, // $9 postal_code
|
||||||
|
loc.phone, // $10 phone
|
||||||
|
loc.website, // $11 website
|
||||||
|
loc.email, // $12 email
|
||||||
|
loc.latitude, // $13 latitude
|
||||||
|
loc.longitude, // $14 longitude
|
||||||
|
loc.timezone, // $15 timezone
|
||||||
|
loc.platform_location_id, // $16 platform_dispensary_id
|
||||||
|
loc.platform_menu_url, // $17 menu_url
|
||||||
|
'dutchie', // $18 menu_type
|
||||||
|
loc.description, // $19 description
|
||||||
|
loc.logo_image, // $20 logo_image
|
||||||
|
loc.banner_image, // $21 banner_image
|
||||||
|
loc.offers_pickup ?? true, // $22 offer_pickup
|
||||||
|
loc.offers_delivery ?? false, // $23 offer_delivery
|
||||||
|
loc.is_medical ?? false, // $24 is_medical
|
||||||
|
loc.is_recreational ?? true, // $25 is_recreational
|
||||||
|
loc.chain_slug, // $26 chain_slug
|
||||||
|
loc.enterprise_id, // $27 enterprise_id
|
||||||
|
loc.c_name, // $28 c_name
|
||||||
|
loc.country || 'United States', // $29 country
|
||||||
|
loc.store_status || 'open', // $30 status
|
||||||
|
true, // $31 crawl_enabled
|
||||||
|
true, // $32 dutchie_verified
|
||||||
|
new Date(), // $33 dutchie_verified_at
|
||||||
|
loc.id, // $34 dutchie_discovery_id
|
||||||
|
]);
|
||||||
|
|
||||||
|
const dispensaryId = upsertResult.rows[0].id;
|
||||||
|
const wasInserted = upsertResult.rows[0].inserted;
|
||||||
|
|
||||||
|
// Link discovery location back to dispensary and update status
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dutchie_discovery_locations
|
||||||
|
SET
|
||||||
|
dispensary_id = $1,
|
||||||
|
status = 'verified',
|
||||||
|
verified_at = CURRENT_TIMESTAMP,
|
||||||
|
verified_by = 'auto-promotion'
|
||||||
|
WHERE id = $2
|
||||||
|
`, [dispensaryId, loc.id]);
|
||||||
|
|
||||||
|
// Create crawler profile with sandbox status for new dispensaries
|
||||||
|
if (wasInserted && loc.platform_location_id) {
|
||||||
|
await ensureCrawlerProfile(dispensaryId, loc.name, loc.platform_location_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
const action = wasInserted ? 'promoted_create' : 'promoted_update';
|
||||||
|
|
||||||
|
// Log the promotion
|
||||||
|
await logPromotionAction(
|
||||||
|
action,
|
||||||
|
loc.id,
|
||||||
|
dispensaryId,
|
||||||
|
loc.state_code,
|
||||||
|
loc.name,
|
||||||
|
null,
|
||||||
|
{ slug, city: loc.city, platform_location_id: loc.platform_location_id }
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
discoveryId: loc.id,
|
||||||
|
dispensaryId,
|
||||||
|
action: wasInserted ? 'created' : 'updated',
|
||||||
|
name: loc.name,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Promote all valid discovered locations to dispensaries
|
||||||
|
*
|
||||||
|
* @param stateCode Optional filter by state (e.g., 'CA', 'AZ')
|
||||||
|
* @param dryRun If true, only validate without making changes
|
||||||
|
*/
|
||||||
|
export async function promoteDiscoveredLocations(
|
||||||
|
stateCode?: string,
|
||||||
|
dryRun = false
|
||||||
|
): Promise<PromotionSummary> {
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT * FROM dutchie_discovery_locations
|
||||||
|
WHERE status = 'discovered'
|
||||||
|
`;
|
||||||
|
const params: string[] = [];
|
||||||
|
|
||||||
|
if (stateCode) {
|
||||||
|
query += ` AND state_code = $1`;
|
||||||
|
params.push(stateCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
query += ` ORDER BY id`;
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
const locations = result.rows as DiscoveryLocationRow[];
|
||||||
|
|
||||||
|
const results: PromotionResult[] = [];
|
||||||
|
const rejectedRecords: PromotionSummary['rejectedRecords'] = [];
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||||
|
const newDispensaryIds: number[] = [];
|
||||||
|
let created = 0;
|
||||||
|
let updated = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
let rejected = 0;
|
||||||
|
|
||||||
|
for (const loc of locations) {
|
||||||
|
// Step 2: Validation
|
||||||
|
const validation = validateForPromotion(loc);
|
||||||
|
|
||||||
|
if (!validation.valid) {
|
||||||
|
rejected++;
|
||||||
|
rejectedRecords.push({
|
||||||
|
id: loc.id,
|
||||||
|
name: loc.name,
|
||||||
|
errors: validation.errors,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Mark as rejected if not dry run
|
||||||
|
if (!dryRun) {
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dutchie_discovery_locations
|
||||||
|
SET status = 'rejected', notes = $1
|
||||||
|
WHERE id = $2
|
||||||
|
`, [validation.errors.join('; '), loc.id]);
|
||||||
|
|
||||||
|
// Log the rejection
|
||||||
|
await logPromotionAction(
|
||||||
|
'rejected',
|
||||||
|
loc.id,
|
||||||
|
null,
|
||||||
|
loc.state_code,
|
||||||
|
loc.name,
|
||||||
|
validation.errors
|
||||||
|
);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: Promotion (skip if dry run)
|
||||||
|
if (dryRun) {
|
||||||
|
skipped++;
|
||||||
|
results.push({
|
||||||
|
discoveryId: loc.id,
|
||||||
|
dispensaryId: 0,
|
||||||
|
action: 'skipped',
|
||||||
|
name: loc.name,
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const promotionResult = await promoteLocation(loc);
|
||||||
|
results.push(promotionResult);
|
||||||
|
|
||||||
|
if (promotionResult.action === 'created') {
|
||||||
|
created++;
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Track new IDs for task chaining
|
||||||
|
newDispensaryIds.push(promotionResult.dispensaryId);
|
||||||
|
} else {
|
||||||
|
updated++;
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`Failed to promote location ${loc.id} (${loc.name}):`, error.message);
|
||||||
|
rejected++;
|
||||||
|
rejectedRecords.push({
|
||||||
|
id: loc.id,
|
||||||
|
name: loc.name,
|
||||||
|
errors: [`Promotion error: ${error.message}`],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
totalProcessed: locations.length,
|
||||||
|
created,
|
||||||
|
updated,
|
||||||
|
skipped,
|
||||||
|
rejected,
|
||||||
|
results,
|
||||||
|
rejectedRecords,
|
||||||
|
durationMs: Date.now() - startTime,
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
|
||||||
|
newDispensaryIds,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Promote a single discovery location by ID
|
||||||
|
*/
|
||||||
|
export async function promoteSingleLocation(
|
||||||
|
discoveryId: number
|
||||||
|
): Promise<PromotionResult> {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT * FROM dutchie_discovery_locations WHERE id = $1`,
|
||||||
|
[discoveryId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
throw new Error(`Discovery location ${discoveryId} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const loc = result.rows[0] as DiscoveryLocationRow;
|
||||||
|
|
||||||
|
// Validate
|
||||||
|
const validation = validateForPromotion(loc);
|
||||||
|
if (!validation.valid) {
|
||||||
|
throw new Error(`Validation failed: ${validation.errors.join(', ')}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Promote
|
||||||
|
return promoteLocation(loc);
|
||||||
|
}
|
||||||
@@ -18,8 +18,8 @@ import {
|
|||||||
getCitiesToCrawl,
|
getCitiesToCrawl,
|
||||||
getCityBySlug,
|
getCityBySlug,
|
||||||
seedKnownCities,
|
seedKnownCities,
|
||||||
ARIZONA_CITIES,
|
|
||||||
} from './city-discovery';
|
} from './city-discovery';
|
||||||
|
import { getCitiesForState } from './location-discovery';
|
||||||
import {
|
import {
|
||||||
DiscoveryLocation,
|
DiscoveryLocation,
|
||||||
DiscoveryCity,
|
DiscoveryCity,
|
||||||
@@ -27,6 +27,11 @@ import {
|
|||||||
mapLocationRowToLocation,
|
mapLocationRowToLocation,
|
||||||
mapCityRowToCity,
|
mapCityRowToCity,
|
||||||
} from './types';
|
} from './types';
|
||||||
|
import {
|
||||||
|
validateDiscoveredLocations,
|
||||||
|
promoteDiscoveredLocations,
|
||||||
|
promoteSingleLocation,
|
||||||
|
} from './promotion';
|
||||||
|
|
||||||
export function createDiscoveryRoutes(pool: Pool): Router {
|
export function createDiscoveryRoutes(pool: Pool): Router {
|
||||||
const router = Router();
|
const router = Router();
|
||||||
@@ -53,44 +58,44 @@ export function createDiscoveryRoutes(pool: Pool): Router {
|
|||||||
offset = '0',
|
offset = '0',
|
||||||
} = req.query;
|
} = req.query;
|
||||||
|
|
||||||
let whereClause = 'WHERE platform = $1 AND active = TRUE';
|
let whereClause = 'WHERE dl.platform = $1 AND dl.active = TRUE';
|
||||||
const params: any[] = [platform];
|
const params: any[] = [platform];
|
||||||
let paramIndex = 2;
|
let paramIndex = 2;
|
||||||
|
|
||||||
if (status) {
|
if (status) {
|
||||||
whereClause += ` AND status = $${paramIndex}`;
|
whereClause += ` AND dl.status = $${paramIndex}`;
|
||||||
params.push(status);
|
params.push(status);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stateCode) {
|
if (stateCode) {
|
||||||
whereClause += ` AND state_code = $${paramIndex}`;
|
whereClause += ` AND dl.state_code = $${paramIndex}`;
|
||||||
params.push(stateCode);
|
params.push(stateCode);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (countryCode) {
|
if (countryCode) {
|
||||||
whereClause += ` AND country_code = $${paramIndex}`;
|
whereClause += ` AND dl.country_code = $${paramIndex}`;
|
||||||
params.push(countryCode);
|
params.push(countryCode);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (city) {
|
if (city) {
|
||||||
whereClause += ` AND city ILIKE $${paramIndex}`;
|
whereClause += ` AND dl.city ILIKE $${paramIndex}`;
|
||||||
params.push(`%${city}%`);
|
params.push(`%${city}%`);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (search) {
|
if (search) {
|
||||||
whereClause += ` AND (name ILIKE $${paramIndex} OR platform_slug ILIKE $${paramIndex})`;
|
whereClause += ` AND (dl.name ILIKE $${paramIndex} OR dl.platform_slug ILIKE $${paramIndex})`;
|
||||||
params.push(`%${search}%`);
|
params.push(`%${search}%`);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hasDispensary === 'true') {
|
if (hasDispensary === 'true') {
|
||||||
whereClause += ' AND dispensary_id IS NOT NULL';
|
whereClause += ' AND dl.dispensary_id IS NOT NULL';
|
||||||
} else if (hasDispensary === 'false') {
|
} else if (hasDispensary === 'false') {
|
||||||
whereClause += ' AND dispensary_id IS NULL';
|
whereClause += ' AND dl.dispensary_id IS NULL';
|
||||||
}
|
}
|
||||||
|
|
||||||
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
|
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
|
||||||
@@ -705,15 +710,22 @@ export function createDiscoveryRoutes(pool: Pool): Router {
|
|||||||
return res.status(400).json({ error: 'stateCode is required' });
|
return res.status(400).json({ error: 'stateCode is required' });
|
||||||
}
|
}
|
||||||
|
|
||||||
let cities: any[] = [];
|
// Dynamically fetch cities from Dutchie for any state
|
||||||
if (stateCode === 'AZ') {
|
const cityNames = await getCitiesForState(stateCode as string);
|
||||||
cities = ARIZONA_CITIES;
|
|
||||||
} else {
|
if (cityNames.length === 0) {
|
||||||
return res.status(400).json({
|
return res.status(400).json({
|
||||||
error: `No predefined cities for state: ${stateCode}. Add cities to city-discovery.ts`,
|
error: `No cities found for state: ${stateCode}`,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert to seed format
|
||||||
|
const cities = cityNames.map(name => ({
|
||||||
|
name,
|
||||||
|
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
|
||||||
|
stateCode: stateCode as string,
|
||||||
|
}));
|
||||||
|
|
||||||
const result = await seedKnownCities(pool, cities);
|
const result = await seedKnownCities(pool, cities);
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
@@ -834,6 +846,136 @@ export function createDiscoveryRoutes(pool: Pool): Router {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// PROMOTION ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/discovery/admin/validate
|
||||||
|
* Validate discovered locations before promotion
|
||||||
|
*/
|
||||||
|
router.get('/admin/validate', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { stateCode } = req.query;
|
||||||
|
const summary = await validateDiscoveredLocations(stateCode as string | undefined);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
...summary,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/discovery/admin/promote
|
||||||
|
* Promote all valid discovered locations to dispensaries (idempotent)
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - stateCode: Filter by state (e.g., 'CA', 'AZ')
|
||||||
|
* - dryRun: If true, only validate without making changes
|
||||||
|
*/
|
||||||
|
router.post('/admin/promote', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { stateCode, dryRun = false } = req.body;
|
||||||
|
|
||||||
|
console.log(`[Discovery API] Starting promotion for ${stateCode || 'all states'} (dryRun=${dryRun})`);
|
||||||
|
const summary = await promoteDiscoveredLocations(stateCode, dryRun);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
...summary,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/discovery/admin/promote/:id
|
||||||
|
* Promote a single discovery location by ID
|
||||||
|
*/
|
||||||
|
router.post('/admin/promote/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
|
||||||
|
console.log(`[Discovery API] Promoting single location ${id}`);
|
||||||
|
const result = await promoteSingleLocation(parseInt(id, 10));
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
...result,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// PROMOTION LOG
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/discovery/promotion-log
|
||||||
|
* Get promotion audit log
|
||||||
|
*/
|
||||||
|
router.get('/promotion-log', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { state, dispensary_id, limit = '100' } = req.query;
|
||||||
|
|
||||||
|
let whereClause = 'WHERE 1=1';
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (state) {
|
||||||
|
whereClause += ` AND pl.state_code = $${paramIndex}`;
|
||||||
|
params.push(state);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dispensary_id) {
|
||||||
|
whereClause += ` AND pl.dispensary_id = $${paramIndex}`;
|
||||||
|
params.push(parseInt(dispensary_id as string, 10));
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
params.push(parseInt(limit as string, 10));
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
pl.*,
|
||||||
|
dl.name as discovery_name,
|
||||||
|
d.name as dispensary_name
|
||||||
|
FROM dutchie_promotion_log pl
|
||||||
|
LEFT JOIN dutchie_discovery_locations dl ON pl.discovery_id = dl.id
|
||||||
|
LEFT JOIN dispensaries d ON pl.dispensary_id = d.id
|
||||||
|
${whereClause}
|
||||||
|
ORDER BY pl.created_at DESC
|
||||||
|
LIMIT $${paramIndex}
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
logs: rows.map((r: any) => ({
|
||||||
|
id: r.id,
|
||||||
|
discoveryId: r.discovery_id,
|
||||||
|
dispensaryId: r.dispensary_id,
|
||||||
|
action: r.action,
|
||||||
|
stateCode: r.state_code,
|
||||||
|
storeName: r.store_name,
|
||||||
|
validationErrors: r.validation_errors,
|
||||||
|
fieldChanges: r.field_changes,
|
||||||
|
triggeredBy: r.triggered_by,
|
||||||
|
createdAt: r.created_at,
|
||||||
|
discoveryName: r.discovery_name,
|
||||||
|
dispensaryName: r.dispensary_name,
|
||||||
|
})),
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
return router;
|
return router;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -60,6 +60,7 @@ export interface DiscoveryLocation {
|
|||||||
stateCode: string | null;
|
stateCode: string | null;
|
||||||
postalCode: string | null;
|
postalCode: string | null;
|
||||||
countryCode: string | null;
|
countryCode: string | null;
|
||||||
|
country: string | null;
|
||||||
latitude: number | null;
|
latitude: number | null;
|
||||||
longitude: number | null;
|
longitude: number | null;
|
||||||
timezone: string | null;
|
timezone: string | null;
|
||||||
@@ -72,6 +73,18 @@ export interface DiscoveryLocation {
|
|||||||
offersPickup: boolean | null;
|
offersPickup: boolean | null;
|
||||||
isRecreational: boolean | null;
|
isRecreational: boolean | null;
|
||||||
isMedical: boolean | null;
|
isMedical: boolean | null;
|
||||||
|
// New Dutchie fields
|
||||||
|
phone: string | null;
|
||||||
|
website: string | null;
|
||||||
|
email: string | null;
|
||||||
|
description: string | null;
|
||||||
|
logoImage: string | null;
|
||||||
|
bannerImage: string | null;
|
||||||
|
chainSlug: string | null;
|
||||||
|
enterpriseId: string | null;
|
||||||
|
cName: string | null;
|
||||||
|
storeStatus: string | null;
|
||||||
|
// Timestamps
|
||||||
firstSeenAt: Date;
|
firstSeenAt: Date;
|
||||||
lastSeenAt: Date;
|
lastSeenAt: Date;
|
||||||
lastCheckedAt: Date | null;
|
lastCheckedAt: Date | null;
|
||||||
@@ -96,6 +109,7 @@ export interface DiscoveryLocationRow {
|
|||||||
state_code: string | null;
|
state_code: string | null;
|
||||||
postal_code: string | null;
|
postal_code: string | null;
|
||||||
country_code: string | null;
|
country_code: string | null;
|
||||||
|
country: string | null;
|
||||||
latitude: number | null;
|
latitude: number | null;
|
||||||
longitude: number | null;
|
longitude: number | null;
|
||||||
timezone: string | null;
|
timezone: string | null;
|
||||||
@@ -108,6 +122,18 @@ export interface DiscoveryLocationRow {
|
|||||||
offers_pickup: boolean | null;
|
offers_pickup: boolean | null;
|
||||||
is_recreational: boolean | null;
|
is_recreational: boolean | null;
|
||||||
is_medical: boolean | null;
|
is_medical: boolean | null;
|
||||||
|
// New Dutchie fields (snake_case for DB row)
|
||||||
|
phone: string | null;
|
||||||
|
website: string | null;
|
||||||
|
email: string | null;
|
||||||
|
description: string | null;
|
||||||
|
logo_image: string | null;
|
||||||
|
banner_image: string | null;
|
||||||
|
chain_slug: string | null;
|
||||||
|
enterprise_id: string | null;
|
||||||
|
c_name: string | null;
|
||||||
|
store_status: string | null;
|
||||||
|
// Timestamps
|
||||||
first_seen_at: Date;
|
first_seen_at: Date;
|
||||||
last_seen_at: Date;
|
last_seen_at: Date;
|
||||||
last_checked_at: Date | null;
|
last_checked_at: Date | null;
|
||||||
@@ -185,6 +211,8 @@ export interface FullDiscoveryResult {
|
|||||||
totalLocationsFound: number;
|
totalLocationsFound: number;
|
||||||
totalLocationsUpserted: number;
|
totalLocationsUpserted: number;
|
||||||
durationMs: number;
|
durationMs: number;
|
||||||
|
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||||
|
newDispensaryIds?: number[];
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -245,6 +273,7 @@ export function mapLocationRowToLocation(row: DiscoveryLocationRow): DiscoveryLo
|
|||||||
stateCode: row.state_code,
|
stateCode: row.state_code,
|
||||||
postalCode: row.postal_code,
|
postalCode: row.postal_code,
|
||||||
countryCode: row.country_code,
|
countryCode: row.country_code,
|
||||||
|
country: row.country,
|
||||||
latitude: row.latitude,
|
latitude: row.latitude,
|
||||||
longitude: row.longitude,
|
longitude: row.longitude,
|
||||||
timezone: row.timezone,
|
timezone: row.timezone,
|
||||||
@@ -257,6 +286,18 @@ export function mapLocationRowToLocation(row: DiscoveryLocationRow): DiscoveryLo
|
|||||||
offersPickup: row.offers_pickup,
|
offersPickup: row.offers_pickup,
|
||||||
isRecreational: row.is_recreational,
|
isRecreational: row.is_recreational,
|
||||||
isMedical: row.is_medical,
|
isMedical: row.is_medical,
|
||||||
|
// New Dutchie fields
|
||||||
|
phone: row.phone,
|
||||||
|
website: row.website,
|
||||||
|
email: row.email,
|
||||||
|
description: row.description,
|
||||||
|
logoImage: row.logo_image,
|
||||||
|
bannerImage: row.banner_image,
|
||||||
|
chainSlug: row.chain_slug,
|
||||||
|
enterpriseId: row.enterprise_id,
|
||||||
|
cName: row.c_name,
|
||||||
|
storeStatus: row.store_status,
|
||||||
|
// Timestamps
|
||||||
firstSeenAt: row.first_seen_at,
|
firstSeenAt: row.first_seen_at,
|
||||||
lastSeenAt: row.last_seen_at,
|
lastSeenAt: row.last_seen_at,
|
||||||
lastCheckedAt: row.last_checked_at,
|
lastCheckedAt: row.last_checked_at,
|
||||||
|
|||||||
@@ -16,6 +16,12 @@ import {
|
|||||||
NormalizedBrand,
|
NormalizedBrand,
|
||||||
NormalizationResult,
|
NormalizationResult,
|
||||||
} from './types';
|
} from './types';
|
||||||
|
import {
|
||||||
|
downloadProductImage,
|
||||||
|
ProductImageContext,
|
||||||
|
isImageStorageReady,
|
||||||
|
LocalImageSizes,
|
||||||
|
} from '../utils/image-storage';
|
||||||
|
|
||||||
const BATCH_SIZE = 100;
|
const BATCH_SIZE = 100;
|
||||||
|
|
||||||
@@ -23,10 +29,21 @@ const BATCH_SIZE = 100;
|
|||||||
// PRODUCT UPSERTS
|
// PRODUCT UPSERTS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
|
export interface NewProductInfo {
|
||||||
|
id: number; // store_products.id
|
||||||
|
externalProductId: string; // provider_product_id
|
||||||
|
name: string;
|
||||||
|
brandName: string | null;
|
||||||
|
primaryImageUrl: string | null;
|
||||||
|
hasLocalImage?: boolean; // True if local_image_path is already set
|
||||||
|
}
|
||||||
|
|
||||||
export interface UpsertProductsResult {
|
export interface UpsertProductsResult {
|
||||||
upserted: number;
|
upserted: number;
|
||||||
new: number;
|
new: number;
|
||||||
updated: number;
|
updated: number;
|
||||||
|
newProducts: NewProductInfo[]; // Details of newly created products
|
||||||
|
productsNeedingImages: NewProductInfo[]; // Products (new or updated) that need image downloads
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -41,12 +58,14 @@ export async function upsertStoreProducts(
|
|||||||
options: { dryRun?: boolean } = {}
|
options: { dryRun?: boolean } = {}
|
||||||
): Promise<UpsertProductsResult> {
|
): Promise<UpsertProductsResult> {
|
||||||
if (products.length === 0) {
|
if (products.length === 0) {
|
||||||
return { upserted: 0, new: 0, updated: 0 };
|
return { upserted: 0, new: 0, updated: 0, newProducts: [], productsNeedingImages: [] };
|
||||||
}
|
}
|
||||||
|
|
||||||
const { dryRun = false } = options;
|
const { dryRun = false } = options;
|
||||||
let newCount = 0;
|
let newCount = 0;
|
||||||
let updatedCount = 0;
|
let updatedCount = 0;
|
||||||
|
const newProducts: NewProductInfo[] = [];
|
||||||
|
const productsNeedingImages: NewProductInfo[] = [];
|
||||||
|
|
||||||
// Process in batches
|
// Process in batches
|
||||||
for (let i = 0; i < products.length; i += BATCH_SIZE) {
|
for (let i = 0; i < products.length; i += BATCH_SIZE) {
|
||||||
@@ -68,10 +87,10 @@ export async function upsertStoreProducts(
|
|||||||
const result = await client.query(
|
const result = await client.query(
|
||||||
`INSERT INTO store_products (
|
`INSERT INTO store_products (
|
||||||
dispensary_id, provider, provider_product_id, provider_brand_id,
|
dispensary_id, provider, provider_product_id, provider_brand_id,
|
||||||
name, brand_name, category, subcategory,
|
name_raw, brand_name_raw, category_raw, subcategory_raw,
|
||||||
price_rec, price_med, price_rec_special, price_med_special,
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
is_on_special, discount_percent,
|
is_on_special, discount_percent,
|
||||||
is_in_stock, stock_status,
|
is_in_stock, stock_status, stock_quantity, total_quantity_available,
|
||||||
thc_percent, cbd_percent,
|
thc_percent, cbd_percent,
|
||||||
image_url,
|
image_url,
|
||||||
first_seen_at, last_seen_at, updated_at
|
first_seen_at, last_seen_at, updated_at
|
||||||
@@ -80,17 +99,17 @@ export async function upsertStoreProducts(
|
|||||||
$5, $6, $7, $8,
|
$5, $6, $7, $8,
|
||||||
$9, $10, $11, $12,
|
$9, $10, $11, $12,
|
||||||
$13, $14,
|
$13, $14,
|
||||||
$15, $16,
|
$15, $16, $17, $17,
|
||||||
$17, $18,
|
$18, $19,
|
||||||
$19,
|
$20,
|
||||||
NOW(), NOW(), NOW()
|
NOW(), NOW(), NOW()
|
||||||
)
|
)
|
||||||
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
||||||
DO UPDATE SET
|
DO UPDATE SET
|
||||||
name = EXCLUDED.name,
|
name_raw = EXCLUDED.name_raw,
|
||||||
brand_name = EXCLUDED.brand_name,
|
brand_name_raw = EXCLUDED.brand_name_raw,
|
||||||
category = EXCLUDED.category,
|
category_raw = EXCLUDED.category_raw,
|
||||||
subcategory = EXCLUDED.subcategory,
|
subcategory_raw = EXCLUDED.subcategory_raw,
|
||||||
price_rec = EXCLUDED.price_rec,
|
price_rec = EXCLUDED.price_rec,
|
||||||
price_med = EXCLUDED.price_med,
|
price_med = EXCLUDED.price_med,
|
||||||
price_rec_special = EXCLUDED.price_rec_special,
|
price_rec_special = EXCLUDED.price_rec_special,
|
||||||
@@ -99,12 +118,14 @@ export async function upsertStoreProducts(
|
|||||||
discount_percent = EXCLUDED.discount_percent,
|
discount_percent = EXCLUDED.discount_percent,
|
||||||
is_in_stock = EXCLUDED.is_in_stock,
|
is_in_stock = EXCLUDED.is_in_stock,
|
||||||
stock_status = EXCLUDED.stock_status,
|
stock_status = EXCLUDED.stock_status,
|
||||||
|
stock_quantity = EXCLUDED.stock_quantity,
|
||||||
|
total_quantity_available = EXCLUDED.total_quantity_available,
|
||||||
thc_percent = EXCLUDED.thc_percent,
|
thc_percent = EXCLUDED.thc_percent,
|
||||||
cbd_percent = EXCLUDED.cbd_percent,
|
cbd_percent = EXCLUDED.cbd_percent,
|
||||||
image_url = EXCLUDED.image_url,
|
image_url = EXCLUDED.image_url,
|
||||||
last_seen_at = NOW(),
|
last_seen_at = NOW(),
|
||||||
updated_at = NOW()
|
updated_at = NOW()
|
||||||
RETURNING (xmax = 0) as is_new`,
|
RETURNING id, (xmax = 0) as is_new, (local_image_path IS NOT NULL) as has_local_image`,
|
||||||
[
|
[
|
||||||
product.dispensaryId,
|
product.dispensaryId,
|
||||||
product.platform,
|
product.platform,
|
||||||
@@ -122,16 +143,38 @@ export async function upsertStoreProducts(
|
|||||||
productPricing?.discountPercent,
|
productPricing?.discountPercent,
|
||||||
productAvailability?.inStock ?? true,
|
productAvailability?.inStock ?? true,
|
||||||
productAvailability?.stockStatus || 'unknown',
|
productAvailability?.stockStatus || 'unknown',
|
||||||
product.thcPercent,
|
productAvailability?.quantity ?? null, // stock_quantity and total_quantity_available
|
||||||
product.cbdPercent,
|
// Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
|
||||||
|
product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
|
||||||
|
product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
|
||||||
product.primaryImageUrl,
|
product.primaryImageUrl,
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
if (result.rows[0]?.is_new) {
|
const row = result.rows[0];
|
||||||
|
const productInfo: NewProductInfo = {
|
||||||
|
id: row.id,
|
||||||
|
externalProductId: product.externalProductId,
|
||||||
|
name: product.name,
|
||||||
|
brandName: product.brandName,
|
||||||
|
primaryImageUrl: product.primaryImageUrl,
|
||||||
|
hasLocalImage: row.has_local_image,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (row.is_new) {
|
||||||
newCount++;
|
newCount++;
|
||||||
|
// Track new products
|
||||||
|
newProducts.push(productInfo);
|
||||||
|
// New products always need images (if they have a source URL)
|
||||||
|
if (product.primaryImageUrl && !row.has_local_image) {
|
||||||
|
productsNeedingImages.push(productInfo);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
updatedCount++;
|
updatedCount++;
|
||||||
|
// Updated products need images only if they don't have a local image yet
|
||||||
|
if (product.primaryImageUrl && !row.has_local_image) {
|
||||||
|
productsNeedingImages.push(productInfo);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -148,6 +191,8 @@ export async function upsertStoreProducts(
|
|||||||
upserted: newCount + updatedCount,
|
upserted: newCount + updatedCount,
|
||||||
new: newCount,
|
new: newCount,
|
||||||
updated: updatedCount,
|
updated: updatedCount,
|
||||||
|
newProducts,
|
||||||
|
productsNeedingImages,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -212,8 +257,9 @@ export async function createStoreProductSnapshots(
|
|||||||
productAvailability?.inStock ?? true,
|
productAvailability?.inStock ?? true,
|
||||||
productAvailability?.quantity,
|
productAvailability?.quantity,
|
||||||
productAvailability?.stockStatus || 'unknown',
|
productAvailability?.stockStatus || 'unknown',
|
||||||
product.thcPercent,
|
// Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
|
||||||
product.cbdPercent,
|
product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
|
||||||
|
product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
|
||||||
product.primaryImageUrl,
|
product.primaryImageUrl,
|
||||||
JSON.stringify(product.rawProduct),
|
JSON.stringify(product.rawProduct),
|
||||||
]);
|
]);
|
||||||
@@ -229,7 +275,7 @@ export async function createStoreProductSnapshots(
|
|||||||
`INSERT INTO store_product_snapshots (
|
`INSERT INTO store_product_snapshots (
|
||||||
dispensary_id, provider, provider_product_id, crawl_run_id,
|
dispensary_id, provider, provider_product_id, crawl_run_id,
|
||||||
captured_at,
|
captured_at,
|
||||||
name, brand_name, category, subcategory,
|
name_raw, brand_name_raw, category_raw, subcategory_raw,
|
||||||
price_rec, price_med, price_rec_special, price_med_special,
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
is_on_special, discount_percent,
|
is_on_special, discount_percent,
|
||||||
is_in_stock, stock_quantity, stock_status,
|
is_in_stock, stock_quantity, stock_status,
|
||||||
@@ -245,6 +291,202 @@ export async function createStoreProductSnapshots(
|
|||||||
return { created };
|
return { created };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// VARIANT UPSERTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface UpsertVariantsResult {
|
||||||
|
upserted: number;
|
||||||
|
new: number;
|
||||||
|
updated: number;
|
||||||
|
snapshotsCreated: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract variant data from raw Dutchie product
|
||||||
|
*/
|
||||||
|
function extractVariantsFromRaw(rawProduct: any): any[] {
|
||||||
|
const children = rawProduct?.POSMetaData?.children || [];
|
||||||
|
return children.map((child: any) => ({
|
||||||
|
option: child.option || child.key || '',
|
||||||
|
canonicalSku: child.canonicalSKU || null,
|
||||||
|
canonicalId: child.canonicalID || null,
|
||||||
|
canonicalName: child.canonicalName || null,
|
||||||
|
priceRec: child.recPrice || child.price || null,
|
||||||
|
priceMed: child.medPrice || null,
|
||||||
|
priceRecSpecial: child.recSpecialPrice || null,
|
||||||
|
priceMedSpecial: child.medSpecialPrice || null,
|
||||||
|
quantity: child.quantityAvailable ?? child.quantity ?? null,
|
||||||
|
inStock: (child.quantityAvailable ?? child.quantity ?? 0) > 0,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse weight value and unit from option string
|
||||||
|
* e.g., "1g" -> { value: 1, unit: "g" }
|
||||||
|
* "3.5g" -> { value: 3.5, unit: "g" }
|
||||||
|
* "1/8oz" -> { value: 0.125, unit: "oz" }
|
||||||
|
*/
|
||||||
|
function parseWeight(option: string): { value: number | null; unit: string | null } {
|
||||||
|
if (!option) return { value: null, unit: null };
|
||||||
|
|
||||||
|
// Handle fractions like "1/8oz"
|
||||||
|
const fractionMatch = option.match(/^(\d+)\/(\d+)\s*(g|oz|mg|ml)?$/i);
|
||||||
|
if (fractionMatch) {
|
||||||
|
const value = parseInt(fractionMatch[1]) / parseInt(fractionMatch[2]);
|
||||||
|
return { value, unit: fractionMatch[3]?.toLowerCase() || 'oz' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle decimals like "3.5g" or "100mg"
|
||||||
|
const decimalMatch = option.match(/^([\d.]+)\s*(g|oz|mg|ml|each)?$/i);
|
||||||
|
if (decimalMatch) {
|
||||||
|
return {
|
||||||
|
value: parseFloat(decimalMatch[1]),
|
||||||
|
unit: decimalMatch[2]?.toLowerCase() || null
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { value: null, unit: null };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Upsert variants for products and create variant snapshots
|
||||||
|
*/
|
||||||
|
export async function upsertProductVariants(
|
||||||
|
pool: Pool,
|
||||||
|
dispensaryId: number,
|
||||||
|
products: NormalizedProduct[],
|
||||||
|
crawlRunId: number | null,
|
||||||
|
options: { dryRun?: boolean } = {}
|
||||||
|
): Promise<UpsertVariantsResult> {
|
||||||
|
if (products.length === 0) {
|
||||||
|
return { upserted: 0, new: 0, updated: 0, snapshotsCreated: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
const { dryRun = false } = options;
|
||||||
|
let newCount = 0;
|
||||||
|
let updatedCount = 0;
|
||||||
|
let snapshotsCreated = 0;
|
||||||
|
|
||||||
|
for (const product of products) {
|
||||||
|
// Get the store_product_id for this product
|
||||||
|
const productResult = await pool.query(
|
||||||
|
`SELECT id FROM store_products
|
||||||
|
WHERE dispensary_id = $1 AND provider = $2 AND provider_product_id = $3`,
|
||||||
|
[dispensaryId, product.platform, product.externalProductId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (productResult.rows.length === 0) {
|
||||||
|
continue; // Product not found, skip variants
|
||||||
|
}
|
||||||
|
|
||||||
|
const storeProductId = productResult.rows[0].id;
|
||||||
|
const variants = extractVariantsFromRaw(product.rawProduct);
|
||||||
|
|
||||||
|
if (variants.length === 0) {
|
||||||
|
continue; // No variants to process
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dryRun) {
|
||||||
|
console.log(`[DryRun] Would upsert ${variants.length} variants for product ${product.externalProductId}`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const variant of variants) {
|
||||||
|
const { value: weightValue, unit: weightUnit } = parseWeight(variant.option);
|
||||||
|
const isOnSpecial = (variant.priceRecSpecial !== null && variant.priceRecSpecial < variant.priceRec) ||
|
||||||
|
(variant.priceMedSpecial !== null && variant.priceMedSpecial < variant.priceMed);
|
||||||
|
|
||||||
|
// Upsert variant
|
||||||
|
const variantResult = await pool.query(
|
||||||
|
`INSERT INTO product_variants (
|
||||||
|
store_product_id, dispensary_id,
|
||||||
|
option, canonical_sku, canonical_id, canonical_name,
|
||||||
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
|
quantity, quantity_available, in_stock, is_on_special,
|
||||||
|
weight_value, weight_unit,
|
||||||
|
first_seen_at, last_seen_at, updated_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2,
|
||||||
|
$3, $4, $5, $6,
|
||||||
|
$7, $8, $9, $10,
|
||||||
|
$11, $11, $12, $13,
|
||||||
|
$14, $15,
|
||||||
|
NOW(), NOW(), NOW()
|
||||||
|
)
|
||||||
|
ON CONFLICT (store_product_id, option)
|
||||||
|
DO UPDATE SET
|
||||||
|
canonical_sku = COALESCE(EXCLUDED.canonical_sku, product_variants.canonical_sku),
|
||||||
|
canonical_id = COALESCE(EXCLUDED.canonical_id, product_variants.canonical_id),
|
||||||
|
canonical_name = COALESCE(EXCLUDED.canonical_name, product_variants.canonical_name),
|
||||||
|
price_rec = EXCLUDED.price_rec,
|
||||||
|
price_med = EXCLUDED.price_med,
|
||||||
|
price_rec_special = EXCLUDED.price_rec_special,
|
||||||
|
price_med_special = EXCLUDED.price_med_special,
|
||||||
|
quantity = EXCLUDED.quantity,
|
||||||
|
quantity_available = EXCLUDED.quantity_available,
|
||||||
|
in_stock = EXCLUDED.in_stock,
|
||||||
|
is_on_special = EXCLUDED.is_on_special,
|
||||||
|
weight_value = COALESCE(EXCLUDED.weight_value, product_variants.weight_value),
|
||||||
|
weight_unit = COALESCE(EXCLUDED.weight_unit, product_variants.weight_unit),
|
||||||
|
last_seen_at = NOW(),
|
||||||
|
last_price_change_at = CASE
|
||||||
|
WHEN product_variants.price_rec IS DISTINCT FROM EXCLUDED.price_rec
|
||||||
|
OR product_variants.price_rec_special IS DISTINCT FROM EXCLUDED.price_rec_special
|
||||||
|
THEN NOW()
|
||||||
|
ELSE product_variants.last_price_change_at
|
||||||
|
END,
|
||||||
|
last_stock_change_at = CASE
|
||||||
|
WHEN product_variants.quantity IS DISTINCT FROM EXCLUDED.quantity
|
||||||
|
THEN NOW()
|
||||||
|
ELSE product_variants.last_stock_change_at
|
||||||
|
END,
|
||||||
|
updated_at = NOW()
|
||||||
|
RETURNING id, (xmax = 0) as is_new`,
|
||||||
|
[
|
||||||
|
storeProductId, dispensaryId,
|
||||||
|
variant.option, variant.canonicalSku, variant.canonicalId, variant.canonicalName,
|
||||||
|
variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
|
||||||
|
variant.quantity, variant.inStock, isOnSpecial,
|
||||||
|
weightValue, weightUnit,
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
const variantId = variantResult.rows[0].id;
|
||||||
|
if (variantResult.rows[0]?.is_new) {
|
||||||
|
newCount++;
|
||||||
|
} else {
|
||||||
|
updatedCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create variant snapshot
|
||||||
|
await pool.query(
|
||||||
|
`INSERT INTO product_variant_snapshots (
|
||||||
|
product_variant_id, store_product_id, dispensary_id, crawl_run_id,
|
||||||
|
option,
|
||||||
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
|
quantity, in_stock, is_on_special,
|
||||||
|
captured_at
|
||||||
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW())`,
|
||||||
|
[
|
||||||
|
variantId, storeProductId, dispensaryId, crawlRunId,
|
||||||
|
variant.option,
|
||||||
|
variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
|
||||||
|
variant.quantity, variant.inStock, isOnSpecial,
|
||||||
|
]
|
||||||
|
);
|
||||||
|
snapshotsCreated++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
upserted: newCount + updatedCount,
|
||||||
|
new: newCount,
|
||||||
|
updated: updatedCount,
|
||||||
|
snapshotsCreated,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// DISCONTINUED PRODUCTS
|
// DISCONTINUED PRODUCTS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -366,6 +608,19 @@ export async function upsertBrands(
|
|||||||
// FULL HYDRATION
|
// FULL HYDRATION
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
|
export interface ImageDownloadResult {
|
||||||
|
downloaded: number;
|
||||||
|
skipped: number;
|
||||||
|
failed: number;
|
||||||
|
bytesTotal: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DispensaryContext {
|
||||||
|
stateCode: string;
|
||||||
|
storeSlug: string;
|
||||||
|
hasExistingProducts?: boolean; // True if store already has products with local images
|
||||||
|
}
|
||||||
|
|
||||||
export interface HydratePayloadResult {
|
export interface HydratePayloadResult {
|
||||||
productsUpserted: number;
|
productsUpserted: number;
|
||||||
productsNew: number;
|
productsNew: number;
|
||||||
@@ -373,6 +628,157 @@ export interface HydratePayloadResult {
|
|||||||
productsDiscontinued: number;
|
productsDiscontinued: number;
|
||||||
snapshotsCreated: number;
|
snapshotsCreated: number;
|
||||||
brandsCreated: number;
|
brandsCreated: number;
|
||||||
|
variantsUpserted: number;
|
||||||
|
variantsNew: number;
|
||||||
|
variantSnapshotsCreated: number;
|
||||||
|
imagesDownloaded: number;
|
||||||
|
imagesSkipped: number;
|
||||||
|
imagesFailed: number;
|
||||||
|
imagesBytesTotal: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to create slug from string
|
||||||
|
*/
|
||||||
|
function slugify(str: string): string {
|
||||||
|
return str
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[^a-z0-9]+/g, '-')
|
||||||
|
.replace(/^-+|-+$/g, '')
|
||||||
|
.substring(0, 50) || 'unknown';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Download images for new products and update their local paths
|
||||||
|
*/
|
||||||
|
export async function downloadProductImages(
|
||||||
|
pool: Pool,
|
||||||
|
newProducts: NewProductInfo[],
|
||||||
|
dispensaryContext: DispensaryContext,
|
||||||
|
options: { dryRun?: boolean; concurrency?: number } = {}
|
||||||
|
): Promise<ImageDownloadResult> {
|
||||||
|
const { dryRun = false, concurrency = 5 } = options;
|
||||||
|
|
||||||
|
// Filter products that have images to download
|
||||||
|
const productsWithImages = newProducts.filter(p => p.primaryImageUrl);
|
||||||
|
|
||||||
|
if (productsWithImages.length === 0) {
|
||||||
|
return { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if image storage is ready
|
||||||
|
if (!isImageStorageReady()) {
|
||||||
|
console.warn('[ImageDownload] Image storage not initialized, skipping downloads');
|
||||||
|
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dryRun) {
|
||||||
|
console.log(`[DryRun] Would download ${productsWithImages.length} images`);
|
||||||
|
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
let downloaded = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
let failed = 0;
|
||||||
|
let bytesTotal = 0;
|
||||||
|
|
||||||
|
// Process in batches with concurrency limit
|
||||||
|
for (let i = 0; i < productsWithImages.length; i += concurrency) {
|
||||||
|
const batch = productsWithImages.slice(i, i + concurrency);
|
||||||
|
|
||||||
|
const results = await Promise.allSettled(
|
||||||
|
batch.map(async (product) => {
|
||||||
|
const ctx: ProductImageContext = {
|
||||||
|
stateCode: dispensaryContext.stateCode,
|
||||||
|
storeSlug: dispensaryContext.storeSlug,
|
||||||
|
brandSlug: slugify(product.brandName || 'unknown'),
|
||||||
|
productId: product.externalProductId,
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await downloadProductImage(product.primaryImageUrl!, ctx, { skipIfExists: true });
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
// Update the database with local image path
|
||||||
|
const imagesJson = JSON.stringify({
|
||||||
|
full: result.urls!.full,
|
||||||
|
medium: result.urls!.medium,
|
||||||
|
thumb: result.urls!.thumb,
|
||||||
|
});
|
||||||
|
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE store_products
|
||||||
|
SET local_image_path = $1, images = $2
|
||||||
|
WHERE id = $3`,
|
||||||
|
[result.urls!.full, imagesJson, product.id]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const result of results) {
|
||||||
|
if (result.status === 'fulfilled') {
|
||||||
|
const downloadResult = result.value;
|
||||||
|
if (downloadResult.success) {
|
||||||
|
if (downloadResult.skipped) {
|
||||||
|
skipped++;
|
||||||
|
} else {
|
||||||
|
downloaded++;
|
||||||
|
bytesTotal += downloadResult.bytesDownloaded || 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
failed++;
|
||||||
|
console.warn(`[ImageDownload] Failed: ${downloadResult.error}`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
failed++;
|
||||||
|
console.error(`[ImageDownload] Error:`, result.reason);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[ImageDownload] Downloaded: ${downloaded}, Skipped: ${skipped}, Failed: ${failed}, Bytes: ${bytesTotal}`);
|
||||||
|
return { downloaded, skipped, failed, bytesTotal };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get dispensary context for image paths
|
||||||
|
* Also checks if this dispensary already has products with local images
|
||||||
|
* to skip unnecessary filesystem checks for existing stores
|
||||||
|
*/
|
||||||
|
async function getDispensaryContext(pool: Pool, dispensaryId: number): Promise<DispensaryContext | null> {
|
||||||
|
try {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT
|
||||||
|
d.state,
|
||||||
|
d.slug,
|
||||||
|
d.name,
|
||||||
|
EXISTS(
|
||||||
|
SELECT 1 FROM store_products sp
|
||||||
|
WHERE sp.dispensary_id = d.id
|
||||||
|
AND sp.local_image_path IS NOT NULL
|
||||||
|
LIMIT 1
|
||||||
|
) as has_local_images
|
||||||
|
FROM dispensaries d
|
||||||
|
WHERE d.id = $1`,
|
||||||
|
[dispensaryId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const row = result.rows[0];
|
||||||
|
return {
|
||||||
|
stateCode: row.state || 'unknown',
|
||||||
|
storeSlug: row.slug || slugify(row.name || `store-${dispensaryId}`),
|
||||||
|
hasExistingProducts: row.has_local_images,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[getDispensaryContext] Error:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -383,9 +789,9 @@ export async function hydrateToCanonical(
|
|||||||
dispensaryId: number,
|
dispensaryId: number,
|
||||||
normResult: NormalizationResult,
|
normResult: NormalizationResult,
|
||||||
crawlRunId: number | null,
|
crawlRunId: number | null,
|
||||||
options: { dryRun?: boolean } = {}
|
options: { dryRun?: boolean; downloadImages?: boolean } = {}
|
||||||
): Promise<HydratePayloadResult> {
|
): Promise<HydratePayloadResult> {
|
||||||
const { dryRun = false } = options;
|
const { dryRun = false, downloadImages: shouldDownloadImages = true } = options;
|
||||||
|
|
||||||
// 1. Upsert brands
|
// 1. Upsert brands
|
||||||
const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
|
const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
|
||||||
@@ -399,7 +805,7 @@ export async function hydrateToCanonical(
|
|||||||
{ dryRun }
|
{ dryRun }
|
||||||
);
|
);
|
||||||
|
|
||||||
// 3. Create snapshots
|
// 3. Create product snapshots
|
||||||
const snapshotResult = await createStoreProductSnapshots(
|
const snapshotResult = await createStoreProductSnapshots(
|
||||||
pool,
|
pool,
|
||||||
dispensaryId,
|
dispensaryId,
|
||||||
@@ -410,7 +816,16 @@ export async function hydrateToCanonical(
|
|||||||
{ dryRun }
|
{ dryRun }
|
||||||
);
|
);
|
||||||
|
|
||||||
// 4. Mark discontinued products
|
// 4. Upsert variants and create variant snapshots
|
||||||
|
const variantResult = await upsertProductVariants(
|
||||||
|
pool,
|
||||||
|
dispensaryId,
|
||||||
|
normResult.products,
|
||||||
|
crawlRunId,
|
||||||
|
{ dryRun }
|
||||||
|
);
|
||||||
|
|
||||||
|
// 5. Mark discontinued products
|
||||||
const currentProductIds = new Set(
|
const currentProductIds = new Set(
|
||||||
normResult.products.map((p) => p.externalProductId)
|
normResult.products.map((p) => p.externalProductId)
|
||||||
);
|
);
|
||||||
@@ -424,6 +839,36 @@ export async function hydrateToCanonical(
|
|||||||
{ dryRun }
|
{ dryRun }
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// 6. Download images for products that need them
|
||||||
|
// This includes:
|
||||||
|
// - New products (always need images)
|
||||||
|
// - Updated products that don't have local images yet (backfill)
|
||||||
|
// This avoids:
|
||||||
|
// - Filesystem checks for products that already have local images
|
||||||
|
// - Unnecessary HTTP requests for products with existing images
|
||||||
|
let imageResult: ImageDownloadResult = { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
|
||||||
|
|
||||||
|
if (shouldDownloadImages && productResult.productsNeedingImages.length > 0) {
|
||||||
|
const dispensaryContext = await getDispensaryContext(pool, dispensaryId);
|
||||||
|
|
||||||
|
if (dispensaryContext) {
|
||||||
|
const newCount = productResult.productsNeedingImages.filter(p => !p.hasLocalImage).length;
|
||||||
|
const backfillCount = productResult.productsNeedingImages.length - newCount;
|
||||||
|
console.log(`[Hydration] Downloading images for ${productResult.productsNeedingImages.length} products (${productResult.new} new, ${backfillCount} backfill)...`);
|
||||||
|
imageResult = await downloadProductImages(
|
||||||
|
pool,
|
||||||
|
productResult.productsNeedingImages,
|
||||||
|
dispensaryContext,
|
||||||
|
{ dryRun }
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
console.warn(`[Hydration] Could not get dispensary context for ID ${dispensaryId}, skipping image downloads`);
|
||||||
|
}
|
||||||
|
} else if (productResult.productsNeedingImages.length === 0 && productResult.upserted > 0) {
|
||||||
|
// All products already have local images
|
||||||
|
console.log(`[Hydration] All ${productResult.upserted} products already have local images, skipping downloads`);
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
productsUpserted: productResult.upserted,
|
productsUpserted: productResult.upserted,
|
||||||
productsNew: productResult.new,
|
productsNew: productResult.new,
|
||||||
@@ -431,5 +876,12 @@ export async function hydrateToCanonical(
|
|||||||
productsDiscontinued: discontinuedCount,
|
productsDiscontinued: discontinuedCount,
|
||||||
snapshotsCreated: snapshotResult.created,
|
snapshotsCreated: snapshotResult.created,
|
||||||
brandsCreated: brandResult.new,
|
brandsCreated: brandResult.new,
|
||||||
|
variantsUpserted: variantResult.upserted,
|
||||||
|
variantsNew: variantResult.new,
|
||||||
|
variantSnapshotsCreated: variantResult.snapshotsCreated,
|
||||||
|
imagesDownloaded: imageResult.downloaded,
|
||||||
|
imagesSkipped: imageResult.skipped,
|
||||||
|
imagesFailed: imageResult.failed,
|
||||||
|
imagesBytesTotal: imageResult.bytesTotal,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user