Compare commits
228 Commits
fix/ci-rem
...
develop
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
754a46c56f | ||
|
|
e450d2e99e | ||
|
|
205a8b3159 | ||
|
|
8bd29d11bb | ||
|
|
4e7b3d2336 | ||
|
|
849123693a | ||
|
|
a1227f77b9 | ||
|
|
415e89a012 | ||
|
|
45844c6281 | ||
|
|
24c9586d81 | ||
|
|
f8d61446d5 | ||
|
|
0f859d1c75 | ||
|
|
52dc669782 | ||
|
|
2e47996354 | ||
|
|
f25d4eaf27 | ||
|
|
61a6be888c | ||
|
|
09c2b3a0e1 | ||
|
|
cec34198c7 | ||
|
|
3c10e07e45 | ||
|
|
3582c2e9e2 | ||
|
|
c6874977ee | ||
|
|
68430f5c22 | ||
|
|
ccefd325aa | ||
|
|
e119c5af53 | ||
|
|
e61224aaed | ||
|
|
7cf1b7643f | ||
|
|
74f813d68f | ||
|
|
f38f1024de | ||
|
|
358099c58a | ||
|
|
7fdcfc4fc4 | ||
|
|
541b461283 | ||
|
|
8f25cf10ab | ||
|
|
79e434212f | ||
|
|
600172eff6 | ||
|
|
4c12763fa1 | ||
|
|
2cb9a093f4 | ||
|
|
15ab40a820 | ||
|
|
2708fbe319 | ||
|
|
231d49e3e8 | ||
|
|
17defa046c | ||
|
|
d76a5fb3c5 | ||
|
|
f19fc59583 | ||
|
|
4c183c87a9 | ||
|
|
ffa05f89c4 | ||
|
|
9aa885211e | ||
|
|
b24b2fbc89 | ||
|
|
f7371de3d1 | ||
|
|
98970acf13 | ||
|
|
f0e636ac70 | ||
|
|
138ade07e1 | ||
|
|
728168e799 | ||
|
|
42c6bb7424 | ||
|
|
b32e847270 | ||
|
|
287627195c | ||
|
|
bfb965fa44 | ||
|
|
7bbc77a854 | ||
|
|
39ba522643 | ||
|
|
6ea4cd0d05 | ||
|
|
520cba9d31 | ||
|
|
331b6273ac | ||
|
|
d4a18cc3ce | ||
|
|
977803d862 | ||
|
|
48c640aae5 | ||
|
|
918a1c6b26 | ||
|
|
c7541ec2eb | ||
|
|
8676762d6b | ||
|
|
3f393ef77f | ||
|
|
a8360c7260 | ||
|
|
0979c9c37a | ||
|
|
b607fd7f44 | ||
|
|
bf988529eb | ||
|
|
04153a2efa | ||
|
|
a1a6876064 | ||
|
|
83466a03c3 | ||
|
|
35d6a17740 | ||
|
|
294d3db7a2 | ||
|
|
bbbd21ba94 | ||
|
|
3496be3064 | ||
|
|
af859a85f9 | ||
|
|
d3f5e4ef4b | ||
|
|
abef265ae9 | ||
|
|
b28a91fca5 | ||
|
|
60b221e7fb | ||
|
|
15cb657f13 | ||
|
|
f15920e508 | ||
|
|
9518ca48a5 | ||
|
|
3e9667571f | ||
|
|
8f6efd377b | ||
|
|
83e9718d78 | ||
|
|
f5cb17e1d4 | ||
|
|
f48a503e82 | ||
|
|
e7b392141a | ||
|
|
15a5a4239e | ||
|
|
20d7534b93 | ||
|
|
698995e46f | ||
|
|
1861e18396 | ||
|
|
eedc027ff6 | ||
|
|
ec5fcd9bc4 | ||
|
|
58150dafa6 | ||
|
|
06adab7225 | ||
|
|
38d7678a2e | ||
|
|
aac1181f3d | ||
|
|
4eaf7e50d7 | ||
|
|
4cb4e1c502 | ||
|
|
f0bb454ca2 | ||
|
|
b8bdc48c1e | ||
|
|
8173fd2845 | ||
|
|
3921e66933 | ||
|
|
ad79605961 | ||
|
|
6439de5cd4 | ||
|
|
b51ba17d32 | ||
|
|
2d631dfad0 | ||
|
|
072388ffb2 | ||
|
|
b456fe5097 | ||
|
|
eb5b2a876e | ||
|
|
ad09aadcc9 | ||
|
|
a020e31a46 | ||
|
|
83f629fec4 | ||
|
|
d810592bf2 | ||
|
|
d02c347ef6 | ||
|
|
d779a08bbf | ||
|
|
1490c60d2a | ||
|
|
ba15802a77 | ||
|
|
d8a22fba53 | ||
|
|
cf99ef9e09 | ||
|
|
3d0ea21007 | ||
|
|
023cfc127f | ||
|
|
5ea92e25af | ||
|
|
3b8171d94e | ||
|
|
d7da0b938d | ||
|
|
88e590d026 | ||
|
|
c215d11a84 | ||
|
|
59e0e45f8f | ||
|
|
e9a688fbb3 | ||
|
|
8b3ae40089 | ||
|
|
a8fec97bcb | ||
|
|
c969c7385b | ||
|
|
5084cb1a85 | ||
|
|
ec6843dfd6 | ||
|
|
268429b86c | ||
|
|
5c08135007 | ||
|
|
9f0d68d4c9 | ||
|
|
e11400566e | ||
|
|
987ed062d5 | ||
|
|
e50f54e621 | ||
|
|
983cd71fc2 | ||
|
|
7849ee0256 | ||
|
|
432842f442 | ||
|
|
94ebbb2497 | ||
|
|
e826a4dd3e | ||
|
|
b7e96359ef | ||
|
|
b1c1955082 | ||
|
|
95c23fcdff | ||
|
|
7067db68fc | ||
|
|
271faf0f00 | ||
|
|
291a8279bd | ||
|
|
b69d03c02f | ||
|
|
54f59c6082 | ||
|
|
c16c3083b1 | ||
|
|
656b00332e | ||
|
|
843f6ded75 | ||
|
|
0175a6817e | ||
|
|
24dd301d84 | ||
|
|
1d6211db19 | ||
|
|
e62f927218 | ||
|
|
675f42841e | ||
|
|
472dbdf418 | ||
|
|
5fcc03aff4 | ||
|
|
2d489e068b | ||
|
|
470097eb19 | ||
|
|
5af86edf83 | ||
|
|
55b26e9153 | ||
|
|
97bfdb9618 | ||
|
|
6f49c5e84a | ||
|
|
a6f09ee6e3 | ||
|
|
c62f8cbf06 | ||
|
|
e4e8438d8b | ||
|
|
822d2b0609 | ||
|
|
dfd36dacf8 | ||
|
|
4ea7139ed5 | ||
|
|
63023a4061 | ||
|
|
13a80e893e | ||
|
|
c98c409f59 | ||
|
|
6c8993f7bd | ||
|
|
92f88fdcd6 | ||
|
|
fd4a9b1434 | ||
|
|
832ef1cf83 | ||
|
|
b05eaceaf0 | ||
|
|
909470d3dc | ||
|
|
9a24b4896c | ||
|
|
dd8fce6e35 | ||
|
|
65b96d9cb9 | ||
|
|
f82eed4dc3 | ||
|
|
d997ec51a2 | ||
|
|
6490df9faf | ||
|
|
d86190912f | ||
|
|
a077f81c65 | ||
|
|
6bcadd9e71 | ||
|
|
a77bf8611a | ||
|
|
33feca3138 | ||
|
|
7d85a97b63 | ||
|
|
ce081effd4 | ||
|
|
daab0ae9b2 | ||
|
|
2ed088b4d8 | ||
|
|
d3c49fa246 | ||
|
|
52cb5014fd | ||
|
|
50654be910 | ||
|
|
cdab71a1ee | ||
|
|
a35976b9e9 | ||
|
|
c68210c485 | ||
|
|
f2864bd2ad | ||
|
|
eca9e85242 | ||
|
|
3f958fbff3 | ||
|
|
c84ef0396b | ||
|
|
e1c67dcee5 | ||
|
|
34c8a8cc67 | ||
|
|
6cd1f55119 | ||
|
|
e918234928 | ||
|
|
888a608485 | ||
|
|
b5c3b05246 | ||
|
|
fdce5e0302 | ||
|
|
4679b245de | ||
|
|
a837070f54 | ||
|
|
5a929e9803 | ||
|
|
52b0fad410 | ||
|
|
9944031eea | ||
|
|
2babaa7136 | ||
|
|
beb16ad0cb |
7
.gitignore
vendored
7
.gitignore
vendored
@@ -51,3 +51,10 @@ coverage/
|
|||||||
*.tmp
|
*.tmp
|
||||||
*.temp
|
*.temp
|
||||||
llm-scraper/
|
llm-scraper/
|
||||||
|
|
||||||
|
# Claude Code
|
||||||
|
.claude/
|
||||||
|
|
||||||
|
# Test/debug scripts
|
||||||
|
backend/scripts/test-*.ts
|
||||||
|
backend/scripts/run-*.ts
|
||||||
|
|||||||
184
.woodpecker.yml
184
.woodpecker.yml
@@ -3,7 +3,7 @@ steps:
|
|||||||
# PR VALIDATION: Parallel type checks (PRs only)
|
# PR VALIDATION: Parallel type checks (PRs only)
|
||||||
# ===========================================
|
# ===========================================
|
||||||
typecheck-backend:
|
typecheck-backend:
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
image: node:22
|
||||||
commands:
|
commands:
|
||||||
- cd backend
|
- cd backend
|
||||||
- npm ci --prefer-offline
|
- npm ci --prefer-offline
|
||||||
@@ -13,7 +13,7 @@ steps:
|
|||||||
event: pull_request
|
event: pull_request
|
||||||
|
|
||||||
typecheck-cannaiq:
|
typecheck-cannaiq:
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
image: node:22
|
||||||
commands:
|
commands:
|
||||||
- cd cannaiq
|
- cd cannaiq
|
||||||
- npm ci --prefer-offline
|
- npm ci --prefer-offline
|
||||||
@@ -23,7 +23,7 @@ steps:
|
|||||||
event: pull_request
|
event: pull_request
|
||||||
|
|
||||||
typecheck-findadispo:
|
typecheck-findadispo:
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
image: node:22
|
||||||
commands:
|
commands:
|
||||||
- cd findadispo/frontend
|
- cd findadispo/frontend
|
||||||
- npm ci --prefer-offline
|
- npm ci --prefer-offline
|
||||||
@@ -33,7 +33,7 @@ steps:
|
|||||||
event: pull_request
|
event: pull_request
|
||||||
|
|
||||||
typecheck-findagram:
|
typecheck-findagram:
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
image: node:22
|
||||||
commands:
|
commands:
|
||||||
- cd findagram/frontend
|
- cd findagram/frontend
|
||||||
- npm ci --prefer-offline
|
- npm ci --prefer-offline
|
||||||
@@ -58,7 +58,7 @@ steps:
|
|||||||
-H "Authorization: token $GITEA_TOKEN" \
|
-H "Authorization: token $GITEA_TOKEN" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{"Do":"merge"}' \
|
-d '{"Do":"merge"}' \
|
||||||
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
"https://git.spdy.io/api/v1/repos/Creationshop/cannaiq/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||||
depends_on:
|
depends_on:
|
||||||
- typecheck-backend
|
- typecheck-backend
|
||||||
- typecheck-cannaiq
|
- typecheck-cannaiq
|
||||||
@@ -68,124 +68,128 @@ steps:
|
|||||||
event: pull_request
|
event: pull_request
|
||||||
|
|
||||||
# ===========================================
|
# ===========================================
|
||||||
# MASTER DEPLOY: Parallel Docker builds
|
# DOCKER: Multi-stage builds with layer caching
|
||||||
# ===========================================
|
# ===========================================
|
||||||
docker-backend:
|
docker-backend:
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
image: gcr.io/kaniko-project/executor:debug
|
||||||
settings:
|
commands:
|
||||||
registry: code.cannabrands.app
|
- /kaniko/executor
|
||||||
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/backend
|
||||||
tags:
|
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/backend/Dockerfile
|
||||||
- latest
|
--destination=10.100.9.70:5000/cannaiq/backend:latest
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
--destination=10.100.9.70:5000/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8}
|
||||||
dockerfile: backend/Dockerfile
|
--build-arg=APP_BUILD_VERSION=sha-${CI_COMMIT_SHA:0:8}
|
||||||
context: backend
|
--build-arg=APP_GIT_SHA=${CI_COMMIT_SHA}
|
||||||
username:
|
--build-arg=APP_BUILD_TIME=${CI_PIPELINE_CREATED}
|
||||||
from_secret: registry_username
|
--registry-mirror=10.100.9.70:5000
|
||||||
password:
|
--insecure-registry=10.100.9.70:5000
|
||||||
from_secret: registry_password
|
--cache=true
|
||||||
platforms: linux/amd64
|
--cache-repo=10.100.9.70:5000/cannaiq/cache-backend
|
||||||
provenance: false
|
--cache-ttl=168h
|
||||||
build_args:
|
|
||||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
|
||||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
|
||||||
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
|
||||||
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
|
||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: [master, develop]
|
||||||
event: push
|
event: push
|
||||||
|
|
||||||
docker-cannaiq:
|
docker-cannaiq:
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
image: gcr.io/kaniko-project/executor:debug
|
||||||
settings:
|
commands:
|
||||||
registry: code.cannabrands.app
|
- /kaniko/executor
|
||||||
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/cannaiq
|
||||||
tags:
|
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/cannaiq/Dockerfile
|
||||||
- latest
|
--destination=10.100.9.70:5000/cannaiq/frontend:latest
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
--destination=10.100.9.70:5000/cannaiq/frontend:sha-${CI_COMMIT_SHA:0:8}
|
||||||
dockerfile: cannaiq/Dockerfile
|
--registry-mirror=10.100.9.70:5000
|
||||||
context: cannaiq
|
--insecure-registry=10.100.9.70:5000
|
||||||
username:
|
--cache=true
|
||||||
from_secret: registry_username
|
--cache-repo=10.100.9.70:5000/cannaiq/cache-cannaiq
|
||||||
password:
|
--cache-ttl=168h
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: [master, develop]
|
||||||
event: push
|
event: push
|
||||||
|
|
||||||
docker-findadispo:
|
docker-findadispo:
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
image: gcr.io/kaniko-project/executor:debug
|
||||||
settings:
|
commands:
|
||||||
registry: code.cannabrands.app
|
- /kaniko/executor
|
||||||
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findadispo/frontend
|
||||||
tags:
|
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findadispo/frontend/Dockerfile
|
||||||
- latest
|
--destination=10.100.9.70:5000/cannaiq/findadispo:latest
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
--destination=10.100.9.70:5000/cannaiq/findadispo:sha-${CI_COMMIT_SHA:0:8}
|
||||||
dockerfile: findadispo/frontend/Dockerfile
|
--registry-mirror=10.100.9.70:5000
|
||||||
context: findadispo/frontend
|
--insecure-registry=10.100.9.70:5000
|
||||||
username:
|
--cache=true
|
||||||
from_secret: registry_username
|
--cache-repo=10.100.9.70:5000/cannaiq/cache-findadispo
|
||||||
password:
|
--cache-ttl=168h
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: [master, develop]
|
||||||
event: push
|
event: push
|
||||||
|
|
||||||
docker-findagram:
|
docker-findagram:
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
image: gcr.io/kaniko-project/executor:debug
|
||||||
settings:
|
commands:
|
||||||
registry: code.cannabrands.app
|
- /kaniko/executor
|
||||||
repo: code.cannabrands.app/creationshop/findagram-frontend
|
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findagram/frontend
|
||||||
tags:
|
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findagram/frontend/Dockerfile
|
||||||
- latest
|
--destination=10.100.9.70:5000/cannaiq/findagram:latest
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
--destination=10.100.9.70:5000/cannaiq/findagram:sha-${CI_COMMIT_SHA:0:8}
|
||||||
dockerfile: findagram/frontend/Dockerfile
|
--registry-mirror=10.100.9.70:5000
|
||||||
context: findagram/frontend
|
--insecure-registry=10.100.9.70:5000
|
||||||
username:
|
--cache=true
|
||||||
from_secret: registry_username
|
--cache-repo=10.100.9.70:5000/cannaiq/cache-findagram
|
||||||
password:
|
--cache-ttl=168h
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
depends_on: []
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: [master, develop]
|
||||||
event: push
|
event: push
|
||||||
|
|
||||||
# ===========================================
|
# ===========================================
|
||||||
# STAGE 3: Deploy and Run Migrations
|
# DEPLOY: Pull from local registry
|
||||||
# ===========================================
|
# ===========================================
|
||||||
deploy:
|
deploy:
|
||||||
image: bitnami/kubectl:latest
|
image: bitnami/kubectl:latest
|
||||||
environment:
|
environment:
|
||||||
KUBECONFIG_CONTENT:
|
K8S_TOKEN:
|
||||||
from_secret: kubeconfig_data
|
from_secret: k8s_token
|
||||||
commands:
|
commands:
|
||||||
- mkdir -p ~/.kube
|
- mkdir -p ~/.kube
|
||||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
- |
|
||||||
|
cat > ~/.kube/config << KUBEEOF
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Config
|
||||||
|
clusters:
|
||||||
|
- cluster:
|
||||||
|
certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkakNDQVIyZ0F3SUJBZ0lCQURBS0JnZ3Foa2pPUFFRREFqQWpNU0V3SHdZRFZRUUREQmhyTTNNdGMyVnkKZG1WeUxXTmhRREUzTmpVM05UUTNPRE13SGhjTk1qVXhNakUwTWpNeU5qSXpXaGNOTXpVeE1qRXlNak15TmpJegpXakFqTVNFd0h3WURWUVFEREJock0zTXRjMlZ5ZG1WeUxXTmhRREUzTmpVM05UUTNPRE13V1RBVEJnY3Foa2pPClBRSUJCZ2dxaGtqT1BRTUJCd05DQUFRWDRNdFJRTW5lWVJVV0s2cjZ3VEV2WjAxNnV4T3NUR3JJZ013TXVnNGwKajQ1bHZ6ZkM1WE1NY1pESnUxZ0t1dVJhVGxlb0xVOVJnSERIUUI4TUwzNTJvMEl3UURBT0JnTlZIUThCQWY4RQpCQU1DQXFRd0R3WURWUjBUQVFIL0JBVXdBd0VCL3pBZEJnTlZIUTRFRmdRVXIzNDZpNE42TFhzaEZsREhvSlU0CjJ1RjZseGN3Q2dZSUtvWkl6ajBFQXdJRFJ3QXdSQUlnVUtqdWRFQWJyS1JDVHROVXZTc1Rmb3FEaHFSeDM5MkYKTFFSVWlKK0hCVElDSUJqOFIxbG1zSnFSRkRHMEpwMGN4OG5ZZnFCaElRQzh6WWdRdTdBZmR4L3IKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
|
||||||
|
server: https://10.100.6.10:6443
|
||||||
|
name: spdy-k3s
|
||||||
|
contexts:
|
||||||
|
- context:
|
||||||
|
cluster: spdy-k3s
|
||||||
|
namespace: cannaiq
|
||||||
|
user: cannaiq-admin
|
||||||
|
name: cannaiq
|
||||||
|
current-context: cannaiq
|
||||||
|
users:
|
||||||
|
- name: cannaiq-admin
|
||||||
|
user:
|
||||||
|
token: $K8S_TOKEN
|
||||||
|
KUBEEOF
|
||||||
- chmod 600 ~/.kube/config
|
- chmod 600 ~/.kube/config
|
||||||
# Deploy backend first
|
- kubectl set image deployment/scraper scraper=10.100.9.70:5000/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl rollout status deployment/scraper -n cannaiq --timeout=300s
|
||||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
- REPLICAS=$(kubectl get deployment scraper-worker -n cannaiq -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then kubectl scale deployment/scraper-worker --replicas=5 -n cannaiq; fi
|
||||||
# Note: Migrations run automatically at startup via auto-migrate
|
- kubectl set image deployment/scraper-worker worker=10.100.9.70:5000/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||||
# Deploy remaining services
|
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=10.100.9.70:5000/cannaiq/frontend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/findadispo-frontend findadispo-frontend=10.100.9.70:5000/cannaiq/findadispo:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/findagram-frontend findagram-frontend=10.100.9.70:5000/cannaiq/findagram:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl rollout status deployment/cannaiq-frontend -n cannaiq --timeout=120s
|
||||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
|
||||||
depends_on:
|
depends_on:
|
||||||
- docker-backend
|
- docker-backend
|
||||||
- docker-cannaiq
|
- docker-cannaiq
|
||||||
- docker-findadispo
|
- docker-findadispo
|
||||||
- docker-findagram
|
- docker-findagram
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: [master, develop]
|
||||||
event: push
|
event: push
|
||||||
|
|||||||
@@ -1,191 +0,0 @@
|
|||||||
steps:
|
|
||||||
# ===========================================
|
|
||||||
# PR VALIDATION: Parallel type checks (PRs only)
|
|
||||||
# ===========================================
|
|
||||||
typecheck-backend:
|
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
|
||||||
commands:
|
|
||||||
- cd backend
|
|
||||||
- npm ci --prefer-offline
|
|
||||||
- npx tsc --noEmit
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
|
|
||||||
typecheck-cannaiq:
|
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
|
||||||
commands:
|
|
||||||
- cd cannaiq
|
|
||||||
- npm ci --prefer-offline
|
|
||||||
- npx tsc --noEmit
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
|
|
||||||
typecheck-findadispo:
|
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
|
||||||
commands:
|
|
||||||
- cd findadispo/frontend
|
|
||||||
- npm ci --prefer-offline
|
|
||||||
- npx tsc --noEmit 2>/dev/null || true
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
|
|
||||||
typecheck-findagram:
|
|
||||||
image: code.cannabrands.app/creationshop/node:20
|
|
||||||
commands:
|
|
||||||
- cd findagram/frontend
|
|
||||||
- npm ci --prefer-offline
|
|
||||||
- npx tsc --noEmit 2>/dev/null || true
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
|
|
||||||
# ===========================================
|
|
||||||
# AUTO-MERGE: Merge PR after all checks pass
|
|
||||||
# ===========================================
|
|
||||||
auto-merge:
|
|
||||||
image: alpine:latest
|
|
||||||
environment:
|
|
||||||
GITEA_TOKEN:
|
|
||||||
from_secret: gitea_token
|
|
||||||
commands:
|
|
||||||
- apk add --no-cache curl
|
|
||||||
- |
|
|
||||||
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
|
||||||
curl -s -X POST \
|
|
||||||
-H "Authorization: token $GITEA_TOKEN" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{"Do":"merge"}' \
|
|
||||||
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
|
||||||
depends_on:
|
|
||||||
- typecheck-backend
|
|
||||||
- typecheck-cannaiq
|
|
||||||
- typecheck-findadispo
|
|
||||||
- typecheck-findagram
|
|
||||||
when:
|
|
||||||
event: pull_request
|
|
||||||
|
|
||||||
# ===========================================
|
|
||||||
# MASTER DEPLOY: Parallel Docker builds
|
|
||||||
# ===========================================
|
|
||||||
docker-backend:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: backend/Dockerfile
|
|
||||||
context: backend
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
build_args:
|
|
||||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
|
||||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
|
||||||
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
|
||||||
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
docker-cannaiq:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: cannaiq/Dockerfile
|
|
||||||
context: cannaiq
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
docker-findadispo:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: findadispo/frontend/Dockerfile
|
|
||||||
context: findadispo/frontend
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
docker-findagram:
|
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
|
||||||
settings:
|
|
||||||
registry: code.cannabrands.app
|
|
||||||
repo: code.cannabrands.app/creationshop/findagram-frontend
|
|
||||||
tags:
|
|
||||||
- latest
|
|
||||||
- ${CI_COMMIT_SHA:0:8}
|
|
||||||
dockerfile: findagram/frontend/Dockerfile
|
|
||||||
context: findagram/frontend
|
|
||||||
username:
|
|
||||||
from_secret: registry_username
|
|
||||||
password:
|
|
||||||
from_secret: registry_password
|
|
||||||
platforms: linux/amd64
|
|
||||||
provenance: false
|
|
||||||
depends_on: []
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
|
|
||||||
# ===========================================
|
|
||||||
# STAGE 3: Deploy and Run Migrations
|
|
||||||
# ===========================================
|
|
||||||
deploy:
|
|
||||||
image: bitnami/kubectl:latest
|
|
||||||
environment:
|
|
||||||
KUBECONFIG_CONTENT:
|
|
||||||
from_secret: kubeconfig_data
|
|
||||||
commands:
|
|
||||||
- mkdir -p ~/.kube
|
|
||||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
|
||||||
- chmod 600 ~/.kube/config
|
|
||||||
# Deploy backend first
|
|
||||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
|
||||||
# Note: Migrations run automatically at startup via auto-migrate
|
|
||||||
# Deploy remaining services
|
|
||||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
|
||||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
|
||||||
depends_on:
|
|
||||||
- docker-backend
|
|
||||||
- docker-cannaiq
|
|
||||||
- docker-findadispo
|
|
||||||
- docker-findagram
|
|
||||||
when:
|
|
||||||
branch: master
|
|
||||||
event: push
|
|
||||||
@@ -1,17 +1,33 @@
|
|||||||
# Build stage
|
# Build stage
|
||||||
# Image: code.cannabrands.app/creationshop/dispensary-scraper
|
# Image: git.spdy.io/creationshop/dispensary-scraper
|
||||||
FROM code.cannabrands.app/creationshop/node:20-slim AS builder
|
FROM node:22-slim AS builder
|
||||||
|
|
||||||
|
# Install build tools for native modules (bcrypt, sharp)
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
python3 \
|
||||||
|
build-essential \
|
||||||
|
--no-install-recommends \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm install
|
|
||||||
|
# Install dependencies with retry and fallback registry
|
||||||
|
RUN npm config set fetch-retries 3 && \
|
||||||
|
npm config set fetch-retry-mintimeout 20000 && \
|
||||||
|
npm config set fetch-retry-maxtimeout 120000 && \
|
||||||
|
npm install || \
|
||||||
|
(npm config set registry https://registry.npmmirror.com && npm install)
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
|
# Prune dev dependencies for smaller production image
|
||||||
|
RUN npm prune --production
|
||||||
|
|
||||||
# Production stage
|
# Production stage
|
||||||
FROM code.cannabrands.app/creationshop/node:20-slim
|
FROM node:22-slim
|
||||||
|
|
||||||
# Build arguments for version info
|
# Build arguments for version info
|
||||||
ARG APP_BUILD_VERSION=dev
|
ARG APP_BUILD_VERSION=dev
|
||||||
@@ -44,8 +60,7 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm install --omit=dev
|
COPY --from=builder /app/node_modules ./node_modules
|
||||||
|
|
||||||
COPY --from=builder /app/dist ./dist
|
COPY --from=builder /app/dist ./dist
|
||||||
|
|
||||||
# Copy migrations for auto-migrate on startup
|
# Copy migrations for auto-migrate on startup
|
||||||
|
|||||||
268
backend/docs/CODEBASE_MAP.md
Normal file
268
backend/docs/CODEBASE_MAP.md
Normal file
@@ -0,0 +1,268 @@
|
|||||||
|
# CannaiQ Backend Codebase Map
|
||||||
|
|
||||||
|
**Last Updated:** 2025-12-12
|
||||||
|
**Purpose:** Help Claude and developers understand which code is current vs deprecated
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Reference: What to Use
|
||||||
|
|
||||||
|
### For Crawling/Scraping
|
||||||
|
| Task | Use This | NOT This |
|
||||||
|
|------|----------|----------|
|
||||||
|
| Fetch products | `src/tasks/handlers/payload-fetch.ts` | `src/hydration/*` |
|
||||||
|
| Process products | `src/tasks/handlers/product-refresh.ts` | `src/scraper-v2/*` |
|
||||||
|
| GraphQL client | `src/platforms/dutchie/client.ts` | `src/dutchie-az/services/graphql-client.ts` |
|
||||||
|
| Worker system | `src/tasks/task-worker.ts` | `src/dutchie-az/services/worker.ts` |
|
||||||
|
|
||||||
|
### For Database
|
||||||
|
| Task | Use This | NOT This |
|
||||||
|
|------|----------|----------|
|
||||||
|
| Get DB pool | `src/db/pool.ts` | `src/dutchie-az/db/connection.ts` |
|
||||||
|
| Run migrations | `src/db/migrate.ts` (CLI only) | Never import at runtime |
|
||||||
|
| Query products | `store_products` table | `products`, `dutchie_products` |
|
||||||
|
| Query stores | `dispensaries` table | `stores` table |
|
||||||
|
|
||||||
|
### For Discovery
|
||||||
|
| Task | Use This |
|
||||||
|
|------|----------|
|
||||||
|
| Discover stores | `src/discovery/*.ts` |
|
||||||
|
| Run discovery | `npx tsx src/scripts/run-discovery.ts` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Directory Status
|
||||||
|
|
||||||
|
### ACTIVE DIRECTORIES (Use These)
|
||||||
|
|
||||||
|
```
|
||||||
|
src/
|
||||||
|
├── auth/ # JWT/session auth, middleware
|
||||||
|
├── db/ # Database pool, migrations
|
||||||
|
├── discovery/ # Dutchie store discovery pipeline
|
||||||
|
├── middleware/ # Express middleware
|
||||||
|
├── multi-state/ # Multi-state query support
|
||||||
|
├── platforms/ # Platform-specific clients (Dutchie, Jane, etc)
|
||||||
|
│ └── dutchie/ # THE Dutchie client - use this one
|
||||||
|
├── routes/ # Express API routes
|
||||||
|
├── services/ # Core services (logger, scheduler, etc)
|
||||||
|
├── tasks/ # Task system (workers, handlers, scheduler)
|
||||||
|
│ └── handlers/ # Task handlers (payload_fetch, product_refresh, etc)
|
||||||
|
├── types/ # TypeScript types
|
||||||
|
└── utils/ # Utilities (storage, image processing)
|
||||||
|
```
|
||||||
|
|
||||||
|
### DEPRECATED DIRECTORIES (DO NOT USE)
|
||||||
|
|
||||||
|
```
|
||||||
|
src/
|
||||||
|
├── hydration/ # DEPRECATED - Old pipeline approach
|
||||||
|
├── scraper-v2/ # DEPRECATED - Old scraper engine
|
||||||
|
├── canonical-hydration/# DEPRECATED - Merged into tasks/handlers
|
||||||
|
├── dutchie-az/ # PARTIAL - Some parts deprecated, some active
|
||||||
|
│ ├── db/ # DEPRECATED - Use src/db/pool.ts
|
||||||
|
│ └── services/ # PARTIAL - worker.ts still runs, graphql-client.ts deprecated
|
||||||
|
├── portals/ # FUTURE - Not yet implemented
|
||||||
|
├── seo/ # PARTIAL - Settings work, templates WIP
|
||||||
|
└── system/ # DEPRECATED - Old orchestration system
|
||||||
|
```
|
||||||
|
|
||||||
|
### DEPRECATED FILES (DO NOT USE)
|
||||||
|
|
||||||
|
```
|
||||||
|
src/dutchie-az/db/connection.ts # Use src/db/pool.ts instead
|
||||||
|
src/dutchie-az/services/graphql-client.ts # Use src/platforms/dutchie/client.ts
|
||||||
|
src/hydration/*.ts # Entire directory deprecated
|
||||||
|
src/scraper-v2/*.ts # Entire directory deprecated
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Files Reference
|
||||||
|
|
||||||
|
### Entry Points
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `src/index.ts` | Main Express server | ACTIVE |
|
||||||
|
| `src/dutchie-az/services/worker.ts` | Worker process entry | ACTIVE |
|
||||||
|
| `src/tasks/task-worker.ts` | Task worker (new system) | ACTIVE |
|
||||||
|
|
||||||
|
### Dutchie Integration
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `src/platforms/dutchie/client.ts` | GraphQL client, hashes, curl | **PRIMARY** |
|
||||||
|
| `src/platforms/dutchie/queries.ts` | High-level query functions | ACTIVE |
|
||||||
|
| `src/platforms/dutchie/index.ts` | Re-exports | ACTIVE |
|
||||||
|
|
||||||
|
### Task Handlers
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
|
||||||
|
| `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
|
||||||
|
| `src/tasks/handlers/entry-point-discovery.ts` | Resolve platform IDs (auto-healing) | **PRIMARY** |
|
||||||
|
| `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
|
||||||
|
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs (legacy) | LEGACY |
|
||||||
|
| `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Transport Rules (CRITICAL)
|
||||||
|
|
||||||
|
**Browser-based (Puppeteer) is the DEFAULT transport. curl is ONLY allowed when explicitly specified.**
|
||||||
|
|
||||||
|
### Transport Selection
|
||||||
|
| `task.method` | Transport Used | Notes |
|
||||||
|
|---------------|----------------|-------|
|
||||||
|
| `null` | Browser (Puppeteer) | DEFAULT - use this for most tasks |
|
||||||
|
| `'http'` | Browser (Puppeteer) | Explicit browser request |
|
||||||
|
| `'curl'` | curl-impersonate | ONLY when explicitly needed |
|
||||||
|
|
||||||
|
### Why Browser-First?
|
||||||
|
1. **Anti-detection**: Puppeteer with StealthPlugin evades bot detection
|
||||||
|
2. **Session cookies**: Browser maintains session state automatically
|
||||||
|
3. **Fingerprinting**: Real browser fingerprint (TLS, headers, etc.)
|
||||||
|
4. **Age gates**: Browser can click through age verification
|
||||||
|
|
||||||
|
### Entry Point Discovery Auto-Healing
|
||||||
|
The `entry_point_discovery` handler uses a healing strategy:
|
||||||
|
|
||||||
|
```
|
||||||
|
1. FIRST: Check dutchie_discovery_locations for existing platform_location_id
|
||||||
|
- By linked dutchie_discovery_id
|
||||||
|
- By slug match in discovery data
|
||||||
|
→ If found, NO network call needed
|
||||||
|
|
||||||
|
2. SECOND: Browser-based GraphQL (Puppeteer)
|
||||||
|
- 5x retries for network/proxy failures
|
||||||
|
- On HTTP 403: rotate proxy and retry
|
||||||
|
- On HTTP 404 after 2 attempts: mark as 'removed'
|
||||||
|
|
||||||
|
3. HARD FAILURE: After exhausting options → 'needs_investigation'
|
||||||
|
```
|
||||||
|
|
||||||
|
### DO NOT Use curl Unless:
|
||||||
|
- Task explicitly has `method = 'curl'`
|
||||||
|
- You're testing curl-impersonate binaries
|
||||||
|
- The API explicitly requires curl fingerprinting
|
||||||
|
|
||||||
|
### Files
|
||||||
|
| File | Transport | Purpose |
|
||||||
|
|------|-----------|---------|
|
||||||
|
| `src/services/puppeteer-preflight.ts` | Browser | Preflight check |
|
||||||
|
| `src/services/curl-preflight.ts` | curl | Preflight check |
|
||||||
|
| `src/tasks/handlers/entry-point-discovery.ts` | Browser | Platform ID resolution |
|
||||||
|
| `src/tasks/handlers/payload-fetch.ts` | Both | Product fetching |
|
||||||
|
|
||||||
|
### Database
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `src/db/pool.ts` | Canonical DB pool | **PRIMARY** |
|
||||||
|
| `src/db/migrate.ts` | Migration runner (CLI only) | CLI ONLY |
|
||||||
|
| `src/db/auto-migrate.ts` | Auto-run migrations on startup | ACTIVE |
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
| File | Purpose | Status |
|
||||||
|
|------|---------|--------|
|
||||||
|
| `.env` | Environment variables | ACTIVE |
|
||||||
|
| `package.json` | Dependencies | ACTIVE |
|
||||||
|
| `tsconfig.json` | TypeScript config | ACTIVE |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## GraphQL Hashes (CRITICAL)
|
||||||
|
|
||||||
|
The correct hashes are in `src/platforms/dutchie/client.ts`:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
export const GRAPHQL_HASHES = {
|
||||||
|
FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0',
|
||||||
|
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
||||||
|
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
|
||||||
|
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**ALWAYS** use `Status: 'Active'` for FilteredProducts (not `null` or `'All'`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Scripts Reference
|
||||||
|
|
||||||
|
### Useful Scripts (in `src/scripts/`)
|
||||||
|
| Script | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `run-discovery.ts` | Run Dutchie discovery |
|
||||||
|
| `crawl-single-store.ts` | Test crawl a single store |
|
||||||
|
| `test-dutchie-graphql.ts` | Test GraphQL queries |
|
||||||
|
|
||||||
|
### One-Off Scripts (probably don't need)
|
||||||
|
| Script | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `harmonize-az-dispensaries.ts` | One-time data cleanup |
|
||||||
|
| `bootstrap-stores-for-dispensaries.ts` | One-time migration |
|
||||||
|
| `backfill-*.ts` | Historical backfill scripts |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## API Routes
|
||||||
|
|
||||||
|
### Active Routes (in `src/routes/`)
|
||||||
|
| Route File | Mount Point | Purpose |
|
||||||
|
|------------|-------------|---------|
|
||||||
|
| `auth.ts` | `/api/auth` | Login/logout/session |
|
||||||
|
| `stores.ts` | `/api/stores` | Store CRUD |
|
||||||
|
| `dashboard.ts` | `/api/dashboard` | Dashboard stats |
|
||||||
|
| `workers.ts` | `/api/workers` | Worker monitoring |
|
||||||
|
| `pipeline.ts` | `/api/pipeline` | Crawl triggers |
|
||||||
|
| `discovery.ts` | `/api/discovery` | Discovery management |
|
||||||
|
| `analytics.ts` | `/api/analytics` | Analytics queries |
|
||||||
|
| `wordpress.ts` | `/api/v1/wordpress` | WordPress plugin API |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Documentation Files
|
||||||
|
|
||||||
|
### Current Docs (in `backend/docs/`)
|
||||||
|
| Doc | Purpose | Currency |
|
||||||
|
|-----|---------|----------|
|
||||||
|
| `TASK_WORKFLOW_2024-12-10.md` | Task system architecture | CURRENT |
|
||||||
|
| `WORKER_TASK_ARCHITECTURE.md` | Worker/task design | CURRENT |
|
||||||
|
| `CRAWL_PIPELINE.md` | Crawl pipeline overview | CURRENT |
|
||||||
|
| `ORGANIC_SCRAPING_GUIDE.md` | Browser-based scraping | CURRENT |
|
||||||
|
| `CODEBASE_MAP.md` | This file | CURRENT |
|
||||||
|
| `ANALYTICS_V2_EXAMPLES.md` | Analytics API examples | CURRENT |
|
||||||
|
| `BRAND_INTELLIGENCE_API.md` | Brand API docs | CURRENT |
|
||||||
|
|
||||||
|
### Root Docs
|
||||||
|
| Doc | Purpose | Currency |
|
||||||
|
|-----|---------|----------|
|
||||||
|
| `CLAUDE.md` | Claude instructions | **PRIMARY** |
|
||||||
|
| `README.md` | Project overview | NEEDS UPDATE |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Common Mistakes to Avoid
|
||||||
|
|
||||||
|
1. **Don't use `src/hydration/`** - It's an old approach that was superseded by the task system
|
||||||
|
|
||||||
|
2. **Don't use `src/dutchie-az/db/connection.ts`** - Use `src/db/pool.ts` instead
|
||||||
|
|
||||||
|
3. **Don't import `src/db/migrate.ts` at runtime** - It will crash. Only use for CLI migrations.
|
||||||
|
|
||||||
|
4. **Don't query `stores` table** - It's empty. Use `dispensaries`.
|
||||||
|
|
||||||
|
5. **Don't query `products` table** - It's empty. Use `store_products`.
|
||||||
|
|
||||||
|
6. **Don't use wrong GraphQL hash** - Always get hash from `GRAPHQL_HASHES` in client.ts
|
||||||
|
|
||||||
|
7. **Don't use `Status: null`** - It returns 0 products. Use `Status: 'Active'`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## When in Doubt
|
||||||
|
|
||||||
|
1. Check if the file is imported in `src/index.ts` - if not, it may be deprecated
|
||||||
|
2. Check the last modified date - older files may be stale
|
||||||
|
3. Look for `DEPRECATED` comments in the code
|
||||||
|
4. Ask: "Is there a newer version of this in `src/tasks/` or `src/platforms/`?"
|
||||||
|
5. Read the relevant doc in `docs/` before modifying code
|
||||||
343
backend/docs/QUERY_API.md
Normal file
343
backend/docs/QUERY_API.md
Normal file
@@ -0,0 +1,343 @@
|
|||||||
|
# CannaiQ Query API
|
||||||
|
|
||||||
|
Query raw crawl payload data with flexible filters, sorting, and aggregation.
|
||||||
|
|
||||||
|
## Base URL
|
||||||
|
|
||||||
|
```
|
||||||
|
https://cannaiq.co/api/payloads
|
||||||
|
```
|
||||||
|
|
||||||
|
## Authentication
|
||||||
|
|
||||||
|
Include your API key in the header:
|
||||||
|
```
|
||||||
|
X-API-Key: your-api-key
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Endpoints
|
||||||
|
|
||||||
|
### 1. Query Products
|
||||||
|
|
||||||
|
Filter and search products from a store's latest crawl data.
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/{dispensaryId}/query
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Query Parameters
|
||||||
|
|
||||||
|
| Parameter | Type | Description |
|
||||||
|
|-----------|------|-------------|
|
||||||
|
| `brand` | string | Filter by brand name (partial match) |
|
||||||
|
| `category` | string | Filter by category (flower, vape, edible, etc.) |
|
||||||
|
| `subcategory` | string | Filter by subcategory |
|
||||||
|
| `strain_type` | string | Filter by strain (indica, sativa, hybrid, cbd) |
|
||||||
|
| `in_stock` | boolean | Filter by stock status (true/false) |
|
||||||
|
| `price_min` | number | Minimum price |
|
||||||
|
| `price_max` | number | Maximum price |
|
||||||
|
| `thc_min` | number | Minimum THC percentage |
|
||||||
|
| `thc_max` | number | Maximum THC percentage |
|
||||||
|
| `search` | string | Search product name (partial match) |
|
||||||
|
| `fields` | string | Comma-separated fields to return |
|
||||||
|
| `limit` | number | Max results (default 100, max 1000) |
|
||||||
|
| `offset` | number | Skip results for pagination |
|
||||||
|
| `sort` | string | Sort by: name, price, thc, brand |
|
||||||
|
| `order` | string | Sort order: asc, desc |
|
||||||
|
|
||||||
|
#### Available Fields
|
||||||
|
|
||||||
|
When using `fields` parameter, you can request:
|
||||||
|
- `id` - Product ID
|
||||||
|
- `name` - Product name
|
||||||
|
- `brand` - Brand name
|
||||||
|
- `category` - Product category
|
||||||
|
- `subcategory` - Product subcategory
|
||||||
|
- `strain_type` - Indica/Sativa/Hybrid/CBD
|
||||||
|
- `price` - Current price
|
||||||
|
- `price_med` - Medical price
|
||||||
|
- `price_rec` - Recreational price
|
||||||
|
- `thc` - THC percentage
|
||||||
|
- `cbd` - CBD percentage
|
||||||
|
- `weight` - Product weight/size
|
||||||
|
- `status` - Stock status
|
||||||
|
- `in_stock` - Boolean in-stock flag
|
||||||
|
- `image_url` - Product image
|
||||||
|
- `description` - Product description
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
**Get all flower products under $40:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/query?category=flower&price_max=40
|
||||||
|
```
|
||||||
|
|
||||||
|
**Search for "Blue Dream" with high THC:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/query?search=blue+dream&thc_min=20
|
||||||
|
```
|
||||||
|
|
||||||
|
**Get only name and price for Alien Labs products:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/query?brand=Alien+Labs&fields=name,price,thc
|
||||||
|
```
|
||||||
|
|
||||||
|
**Get top 10 highest THC products:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/query?sort=thc&order=desc&limit=10
|
||||||
|
```
|
||||||
|
|
||||||
|
**Paginate through in-stock products:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=0
|
||||||
|
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=50
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"dispensaryId": 112,
|
||||||
|
"payloadId": 45,
|
||||||
|
"fetchedAt": "2025-12-11T10:30:00Z",
|
||||||
|
"query": {
|
||||||
|
"filters": {
|
||||||
|
"brand": "Alien Labs",
|
||||||
|
"category": null,
|
||||||
|
"price_max": null
|
||||||
|
},
|
||||||
|
"sort": "price",
|
||||||
|
"order": "asc",
|
||||||
|
"limit": 100,
|
||||||
|
"offset": 0
|
||||||
|
},
|
||||||
|
"pagination": {
|
||||||
|
"total": 15,
|
||||||
|
"returned": 15,
|
||||||
|
"limit": 100,
|
||||||
|
"offset": 0,
|
||||||
|
"has_more": false
|
||||||
|
},
|
||||||
|
"products": [
|
||||||
|
{
|
||||||
|
"id": "507f1f77bcf86cd799439011",
|
||||||
|
"name": "Alien Labs - Baklava 3.5g",
|
||||||
|
"brand": "Alien Labs",
|
||||||
|
"category": "flower",
|
||||||
|
"strain_type": "hybrid",
|
||||||
|
"price": 55,
|
||||||
|
"thc": "28.5",
|
||||||
|
"in_stock": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. Aggregate Data
|
||||||
|
|
||||||
|
Group products and calculate metrics.
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/{dispensaryId}/aggregate
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Query Parameters
|
||||||
|
|
||||||
|
| Parameter | Type | Description |
|
||||||
|
|-----------|------|-------------|
|
||||||
|
| `group_by` | string | **Required.** Field to group by: brand, category, subcategory, strain_type |
|
||||||
|
| `metrics` | string | Comma-separated metrics (default: count) |
|
||||||
|
|
||||||
|
#### Available Metrics
|
||||||
|
|
||||||
|
- `count` - Number of products
|
||||||
|
- `avg_price` - Average price
|
||||||
|
- `min_price` - Lowest price
|
||||||
|
- `max_price` - Highest price
|
||||||
|
- `avg_thc` - Average THC percentage
|
||||||
|
- `in_stock_count` - Number of in-stock products
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
**Count products by brand:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/aggregate?group_by=brand
|
||||||
|
```
|
||||||
|
|
||||||
|
**Get price stats by category:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/aggregate?group_by=category&metrics=count,avg_price,min_price,max_price
|
||||||
|
```
|
||||||
|
|
||||||
|
**Get THC averages by strain type:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/aggregate?group_by=strain_type&metrics=count,avg_thc
|
||||||
|
```
|
||||||
|
|
||||||
|
**Brand analysis with stock info:**
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/112/aggregate?group_by=brand&metrics=count,avg_price,in_stock_count
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"dispensaryId": 112,
|
||||||
|
"payloadId": 45,
|
||||||
|
"fetchedAt": "2025-12-11T10:30:00Z",
|
||||||
|
"groupBy": "brand",
|
||||||
|
"metrics": ["count", "avg_price"],
|
||||||
|
"totalProducts": 450,
|
||||||
|
"groupCount": 85,
|
||||||
|
"aggregations": [
|
||||||
|
{
|
||||||
|
"brand": "Alien Labs",
|
||||||
|
"count": 15,
|
||||||
|
"avg_price": 52.33
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"brand": "Connected",
|
||||||
|
"count": 12,
|
||||||
|
"avg_price": 48.50
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. Compare Stores (Price Comparison)
|
||||||
|
|
||||||
|
Query the same data from multiple stores and compare in your app:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Get flower prices from Store A
|
||||||
|
const storeA = await fetch('/api/payloads/store/112/query?category=flower&fields=name,brand,price');
|
||||||
|
|
||||||
|
// Get flower prices from Store B
|
||||||
|
const storeB = await fetch('/api/payloads/store/115/query?category=flower&fields=name,brand,price');
|
||||||
|
|
||||||
|
// Compare in your app
|
||||||
|
const dataA = await storeA.json();
|
||||||
|
const dataB = await storeB.json();
|
||||||
|
|
||||||
|
// Find matching products and compare prices
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 4. Price History
|
||||||
|
|
||||||
|
For historical price data, use the snapshots endpoint:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/v1/products/{productId}/history?days=30
|
||||||
|
```
|
||||||
|
|
||||||
|
Or compare payloads over time:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/payloads/store/{dispensaryId}/diff?from={payloadId1}&to={payloadId2}
|
||||||
|
```
|
||||||
|
|
||||||
|
The diff endpoint shows:
|
||||||
|
- Products added
|
||||||
|
- Products removed
|
||||||
|
- Price changes
|
||||||
|
- Stock changes
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 5. List Stores
|
||||||
|
|
||||||
|
Get available dispensaries to query:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/stores
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns all stores with their IDs, names, and locations.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Use Cases
|
||||||
|
|
||||||
|
### Price Comparison App
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// 1. Get stores in Arizona
|
||||||
|
const stores = await fetch('/api/stores?state=AZ').then(r => r.json());
|
||||||
|
|
||||||
|
// 2. Query flower prices from each store
|
||||||
|
const prices = await Promise.all(
|
||||||
|
stores.map(store =>
|
||||||
|
fetch(`/api/payloads/store/${store.id}/query?category=flower&fields=name,brand,price`)
|
||||||
|
.then(r => r.json())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// 3. Build comparison matrix in your app
|
||||||
|
```
|
||||||
|
|
||||||
|
### Brand Analytics Dashboard
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Get brand presence across stores
|
||||||
|
const brandData = await Promise.all(
|
||||||
|
storeIds.map(id =>
|
||||||
|
fetch(`/api/payloads/store/${id}/aggregate?group_by=brand&metrics=count,avg_price`)
|
||||||
|
.then(r => r.json())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Aggregate brand presence across all stores
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deal Finder
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Find high-THC flower under $30
|
||||||
|
const deals = await fetch(
|
||||||
|
'/api/payloads/store/112/query?category=flower&price_max=30&thc_min=20&in_stock=true&sort=thc&order=desc'
|
||||||
|
).then(r => r.json());
|
||||||
|
```
|
||||||
|
|
||||||
|
### Inventory Tracker
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Get products that went out of stock
|
||||||
|
const diff = await fetch('/api/payloads/store/112/diff').then(r => r.json());
|
||||||
|
|
||||||
|
const outOfStock = diff.details.stockChanges.filter(
|
||||||
|
p => p.newStatus !== 'Active'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Rate Limits
|
||||||
|
|
||||||
|
- Default: 100 requests/minute per API key
|
||||||
|
- Contact support for higher limits
|
||||||
|
|
||||||
|
## Error Responses
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"success": false,
|
||||||
|
"error": "Error message here"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Common errors:
|
||||||
|
- `404` - Store or payload not found
|
||||||
|
- `400` - Missing required parameter
|
||||||
|
- `401` - Invalid or missing API key
|
||||||
|
- `429` - Rate limit exceeded
|
||||||
297
backend/docs/_archive/ORGANIC_SCRAPING_GUIDE.md
Normal file
297
backend/docs/_archive/ORGANIC_SCRAPING_GUIDE.md
Normal file
@@ -0,0 +1,297 @@
|
|||||||
|
# Organic Browser-Based Scraping Guide
|
||||||
|
|
||||||
|
**Last Updated:** 2025-12-12
|
||||||
|
**Status:** Production-ready proof of concept
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This document describes the "organic" browser-based approach to scraping Dutchie dispensary menus. Unlike direct curl/axios requests, this method uses a real browser session to make API calls, making requests appear natural and reducing detection risk.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Why Organic Scraping?
|
||||||
|
|
||||||
|
| Approach | Detection Risk | Speed | Complexity |
|
||||||
|
|----------|---------------|-------|------------|
|
||||||
|
| Direct curl | Higher | Fast | Low |
|
||||||
|
| curl-impersonate | Medium | Fast | Medium |
|
||||||
|
| **Browser-based (organic)** | **Lowest** | Slower | Higher |
|
||||||
|
|
||||||
|
Direct curl requests can be fingerprinted via:
|
||||||
|
- TLS fingerprint (cipher suites, extensions)
|
||||||
|
- Header order and values
|
||||||
|
- Missing cookies/session data
|
||||||
|
- Request patterns
|
||||||
|
|
||||||
|
Browser-based requests inherit:
|
||||||
|
- Real Chrome TLS fingerprint
|
||||||
|
- Session cookies from page visit
|
||||||
|
- Natural header order
|
||||||
|
- JavaScript execution environment
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation
|
||||||
|
|
||||||
|
### Dependencies
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
|
||||||
|
```
|
||||||
|
|
||||||
|
### Core Script: `test-intercept.js`
|
||||||
|
|
||||||
|
Located at: `backend/test-intercept.js`
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
const puppeteer = require('puppeteer-extra');
|
||||||
|
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||||
|
const fs = require('fs');
|
||||||
|
|
||||||
|
puppeteer.use(StealthPlugin());
|
||||||
|
|
||||||
|
async function capturePayload(config) {
|
||||||
|
const { dispensaryId, platformId, cName, outputPath } = config;
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: 'new',
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
// STEP 1: Establish session by visiting the menu
|
||||||
|
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
|
||||||
|
await page.goto(embedUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
|
||||||
|
// STEP 2: Fetch ALL products using GraphQL from browser context
|
||||||
|
const result = await page.evaluate(async (platformId) => {
|
||||||
|
const allProducts = [];
|
||||||
|
let pageNum = 0;
|
||||||
|
const perPage = 100;
|
||||||
|
let totalCount = 0;
|
||||||
|
const sessionId = 'browser-session-' + Date.now();
|
||||||
|
|
||||||
|
while (pageNum < 30) {
|
||||||
|
const variables = {
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId: platformId,
|
||||||
|
pricingType: 'rec',
|
||||||
|
Status: 'Active', // CRITICAL: Must be 'Active', not null
|
||||||
|
types: [],
|
||||||
|
useCache: true,
|
||||||
|
isDefaultSort: true,
|
||||||
|
sortBy: 'popularSortIdx',
|
||||||
|
sortDirection: 1,
|
||||||
|
bypassOnlineThresholds: true,
|
||||||
|
isKioskMenu: false,
|
||||||
|
removeProductsBelowOptionThresholds: false,
|
||||||
|
},
|
||||||
|
page: pageNum,
|
||||||
|
perPage: perPage,
|
||||||
|
};
|
||||||
|
|
||||||
|
const extensions = {
|
||||||
|
persistedQuery: {
|
||||||
|
version: 1,
|
||||||
|
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const qs = new URLSearchParams({
|
||||||
|
operationName: 'FilteredProducts',
|
||||||
|
variables: JSON.stringify(variables),
|
||||||
|
extensions: JSON.stringify(extensions)
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await fetch(`https://dutchie.com/api-3/graphql?${qs}`, {
|
||||||
|
method: 'GET',
|
||||||
|
headers: {
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'content-type': 'application/json',
|
||||||
|
'x-dutchie-session': sessionId,
|
||||||
|
'apollographql-client-name': 'Marketplace (production)',
|
||||||
|
},
|
||||||
|
credentials: 'include'
|
||||||
|
});
|
||||||
|
|
||||||
|
const json = await response.json();
|
||||||
|
const data = json?.data?.filteredProducts;
|
||||||
|
if (!data?.products) break;
|
||||||
|
|
||||||
|
allProducts.push(...data.products);
|
||||||
|
if (pageNum === 0) totalCount = data.queryInfo?.totalCount || 0;
|
||||||
|
if (allProducts.length >= totalCount) break;
|
||||||
|
|
||||||
|
pageNum++;
|
||||||
|
await new Promise(r => setTimeout(r, 200)); // Polite delay
|
||||||
|
}
|
||||||
|
|
||||||
|
return { products: allProducts, totalCount };
|
||||||
|
}, platformId);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
// STEP 3: Save payload
|
||||||
|
const payload = {
|
||||||
|
dispensaryId,
|
||||||
|
platformId,
|
||||||
|
cName,
|
||||||
|
fetchedAt: new Date().toISOString(),
|
||||||
|
productCount: result.products.length,
|
||||||
|
products: result.products,
|
||||||
|
};
|
||||||
|
|
||||||
|
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
|
||||||
|
return payload;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Critical Parameters
|
||||||
|
|
||||||
|
### GraphQL Hash (FilteredProducts)
|
||||||
|
|
||||||
|
```
|
||||||
|
ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0
|
||||||
|
```
|
||||||
|
|
||||||
|
**WARNING:** Using the wrong hash returns HTTP 400.
|
||||||
|
|
||||||
|
### Status Parameter
|
||||||
|
|
||||||
|
| Value | Result |
|
||||||
|
|-------|--------|
|
||||||
|
| `'Active'` | Returns in-stock products (1019 in test) |
|
||||||
|
| `null` | Returns 0 products |
|
||||||
|
| `'All'` | Returns HTTP 400 |
|
||||||
|
|
||||||
|
**ALWAYS use `Status: 'Active'`**
|
||||||
|
|
||||||
|
### Required Headers
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
{
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'content-type': 'application/json',
|
||||||
|
'x-dutchie-session': 'unique-session-id',
|
||||||
|
'apollographql-client-name': 'Marketplace (production)',
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Endpoint
|
||||||
|
|
||||||
|
```
|
||||||
|
https://dutchie.com/api-3/graphql
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Performance Benchmarks
|
||||||
|
|
||||||
|
Test store: AZ-Deeply-Rooted (1019 products)
|
||||||
|
|
||||||
|
| Metric | Value |
|
||||||
|
|--------|-------|
|
||||||
|
| Total products | 1019 |
|
||||||
|
| Time | 18.5 seconds |
|
||||||
|
| Payload size | 11.8 MB |
|
||||||
|
| Pages fetched | 11 (100 per page) |
|
||||||
|
| Success rate | 100% |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Payload Format
|
||||||
|
|
||||||
|
The output matches the existing `payload-fetch.ts` handler format:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"dispensaryId": 123,
|
||||||
|
"platformId": "6405ef617056e8014d79101b",
|
||||||
|
"cName": "AZ-Deeply-Rooted",
|
||||||
|
"fetchedAt": "2025-12-12T05:05:19.837Z",
|
||||||
|
"productCount": 1019,
|
||||||
|
"products": [
|
||||||
|
{
|
||||||
|
"id": "6927508db4851262f629a869",
|
||||||
|
"Name": "Product Name",
|
||||||
|
"brand": { "name": "Brand Name", ... },
|
||||||
|
"type": "Flower",
|
||||||
|
"THC": "25%",
|
||||||
|
"Prices": [...],
|
||||||
|
"Options": [...],
|
||||||
|
...
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Integration Points
|
||||||
|
|
||||||
|
### As a Task Handler
|
||||||
|
|
||||||
|
The organic approach can be integrated as an alternative to curl-based fetching:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// In src/tasks/handlers/organic-payload-fetch.ts
|
||||||
|
export async function handleOrganicPayloadFetch(ctx: TaskContext): Promise<TaskResult> {
|
||||||
|
// Use puppeteer-based capture
|
||||||
|
// Save to same payload storage
|
||||||
|
// Queue product_refresh task
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Worker Configuration
|
||||||
|
|
||||||
|
Add to job_schedules:
|
||||||
|
```sql
|
||||||
|
INSERT INTO job_schedules (name, role, cron_expression)
|
||||||
|
VALUES ('organic_product_crawl', 'organic_payload_fetch', '0 */6 * * *');
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### HTTP 400 Bad Request
|
||||||
|
- Check hash is correct: `ee29c060...`
|
||||||
|
- Verify Status is `'Active'` (string, not null)
|
||||||
|
|
||||||
|
### 0 Products Returned
|
||||||
|
- Status was likely `null` or `'All'` - use `'Active'`
|
||||||
|
- Check platformId is valid MongoDB ObjectId
|
||||||
|
|
||||||
|
### Session Not Established
|
||||||
|
- Increase timeout on initial page.goto()
|
||||||
|
- Check cName is valid (matches embedded-menu URL)
|
||||||
|
|
||||||
|
### Detection/Blocking
|
||||||
|
- StealthPlugin should handle most cases
|
||||||
|
- Add random delays between pages
|
||||||
|
- Use headless: 'new' (not true/false)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Files Reference
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `backend/test-intercept.js` | Proof of concept script |
|
||||||
|
| `backend/src/platforms/dutchie/client.ts` | GraphQL hashes, curl implementation |
|
||||||
|
| `backend/src/tasks/handlers/payload-fetch.ts` | Current curl-based handler |
|
||||||
|
| `backend/src/utils/payload-storage.ts` | Payload save/load utilities |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## See Also
|
||||||
|
|
||||||
|
- `DUTCHIE_CRAWL_WORKFLOW.md` - Full crawl pipeline documentation
|
||||||
|
- `TASK_WORKFLOW_2024-12-10.md` - Task system architecture
|
||||||
|
- `CLAUDE.md` - Project rules and constraints
|
||||||
25
backend/docs/_archive/README.md
Normal file
25
backend/docs/_archive/README.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# ARCHIVED DOCUMENTATION
|
||||||
|
|
||||||
|
**WARNING: These docs may be outdated or inaccurate.**
|
||||||
|
|
||||||
|
The code has evolved significantly. These docs are kept for historical reference only.
|
||||||
|
|
||||||
|
## What to Use Instead
|
||||||
|
|
||||||
|
**The single source of truth is:**
|
||||||
|
- `CLAUDE.md` (root) - Essential rules and quick reference
|
||||||
|
- `docs/CODEBASE_MAP.md` - Current file/directory reference
|
||||||
|
|
||||||
|
## Why Archive?
|
||||||
|
|
||||||
|
These docs were written during development iterations and may reference:
|
||||||
|
- Old file paths that no longer exist
|
||||||
|
- Deprecated approaches (hydration, scraper-v2)
|
||||||
|
- APIs that have changed
|
||||||
|
- Database schemas that evolved
|
||||||
|
|
||||||
|
## If You Need Details
|
||||||
|
|
||||||
|
1. First check CODEBASE_MAP.md for current file locations
|
||||||
|
2. Then read the actual source code
|
||||||
|
3. Only use archive docs as a last resort for historical context
|
||||||
@@ -504,6 +504,103 @@ The Workers Dashboard shows:
|
|||||||
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
|
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
|
||||||
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
|
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
|
||||||
|
|
||||||
|
## Browser Task Memory Limits (Updated 2025-12)
|
||||||
|
|
||||||
|
Browser-based tasks (Puppeteer/Chrome) have strict memory constraints that limit concurrency.
|
||||||
|
|
||||||
|
### Why Browser Tasks Are Different
|
||||||
|
|
||||||
|
Each browser task launches a Chrome process. Unlike I/O-bound API calls, browsers consume significant RAM:
|
||||||
|
|
||||||
|
| Component | RAM Usage |
|
||||||
|
|-----------|-----------|
|
||||||
|
| Node.js runtime | ~150 MB |
|
||||||
|
| Chrome browser (base) | ~200-250 MB |
|
||||||
|
| Dutchie menu page (loaded) | ~100-150 MB |
|
||||||
|
| **Per browser total** | **~350-450 MB** |
|
||||||
|
|
||||||
|
### Memory Math for Pod Limits
|
||||||
|
|
||||||
|
```
|
||||||
|
Pod memory limit: 2 GB (2000 MB)
|
||||||
|
Node.js runtime: -150 MB
|
||||||
|
Safety buffer: -100 MB
|
||||||
|
────────────────────────────────
|
||||||
|
Available for browsers: 1750 MB
|
||||||
|
|
||||||
|
Per browser + page: ~400 MB
|
||||||
|
|
||||||
|
Max browsers: 1750 ÷ 400 = ~4 browsers
|
||||||
|
|
||||||
|
Recommended: 3 browsers (leaves headroom for spikes)
|
||||||
|
```
|
||||||
|
|
||||||
|
### MAX_CONCURRENT_TASKS for Browser Tasks
|
||||||
|
|
||||||
|
| Browsers per Pod | RAM Used | Risk Level |
|
||||||
|
|------------------|----------|------------|
|
||||||
|
| 1 | ~500 MB | Very safe |
|
||||||
|
| 2 | ~900 MB | Safe |
|
||||||
|
| **3** | **~1.3 GB** | **Recommended** |
|
||||||
|
| 4 | ~1.7 GB | Tight (may OOM) |
|
||||||
|
| 5+ | >2 GB | Will OOM crash |
|
||||||
|
|
||||||
|
**CRITICAL**: `MAX_CONCURRENT_TASKS=3` is the maximum safe value for browser tasks with current pod limits.
|
||||||
|
|
||||||
|
### Scaling Strategy
|
||||||
|
|
||||||
|
Scale **horizontally** (more pods) rather than vertically (more concurrency per pod):
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ Cluster: 8 pods × 3 browsers = 24 concurrent tasks │
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||||
|
│ │ Pod 0 │ │ Pod 1 │ │ Pod 2 │ │ Pod 3 │ │
|
||||||
|
│ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │
|
||||||
|
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||||
|
│ │
|
||||||
|
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||||
|
│ │ Pod 4 │ │ Pod 5 │ │ Pod 6 │ │ Pod 7 │ │
|
||||||
|
│ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │
|
||||||
|
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Browser Lifecycle Per Task
|
||||||
|
|
||||||
|
Each task gets a fresh browser with fresh IP/identity:
|
||||||
|
|
||||||
|
```
|
||||||
|
1. Claim task from queue
|
||||||
|
2. Get fresh proxy from pool
|
||||||
|
3. Launch browser with proxy
|
||||||
|
4. Run preflight (verify IP)
|
||||||
|
5. Execute scrape
|
||||||
|
6. Close browser
|
||||||
|
7. Repeat
|
||||||
|
```
|
||||||
|
|
||||||
|
This ensures:
|
||||||
|
- Fresh IP per task (proxy rotation)
|
||||||
|
- Fresh fingerprint per task (UA rotation)
|
||||||
|
- No cookie/session bleed between tasks
|
||||||
|
- Predictable memory usage
|
||||||
|
|
||||||
|
### Increasing Capacity
|
||||||
|
|
||||||
|
To handle more concurrent tasks:
|
||||||
|
|
||||||
|
1. **Add more pods** (up to 8 per CLAUDE.md limit)
|
||||||
|
2. **Increase pod memory** (allows 4 browsers per pod):
|
||||||
|
```yaml
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: "2.5Gi" # from 2Gi
|
||||||
|
```
|
||||||
|
|
||||||
|
**DO NOT** simply increase `MAX_CONCURRENT_TASKS` without also increasing pod memory limits.
|
||||||
|
|
||||||
## Monitoring
|
## Monitoring
|
||||||
|
|
||||||
### Logs
|
### Logs
|
||||||
77
backend/k8s/scraper-worker-statefulset.yaml
Normal file
77
backend/k8s/scraper-worker-statefulset.yaml
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: scraper-worker
|
||||||
|
namespace: cannaiq
|
||||||
|
labels:
|
||||||
|
app: scraper-worker
|
||||||
|
spec:
|
||||||
|
clusterIP: None # Headless service required for StatefulSet
|
||||||
|
selector:
|
||||||
|
app: scraper-worker
|
||||||
|
ports:
|
||||||
|
- port: 3010
|
||||||
|
name: http
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: scraper-worker
|
||||||
|
namespace: cannaiq
|
||||||
|
spec:
|
||||||
|
serviceName: scraper-worker
|
||||||
|
replicas: 8
|
||||||
|
podManagementPolicy: Parallel # Start all pods at once
|
||||||
|
updateStrategy:
|
||||||
|
type: OnDelete # Pods only update when manually deleted - no automatic restarts
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: scraper-worker
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: scraper-worker
|
||||||
|
spec:
|
||||||
|
terminationGracePeriodSeconds: 60
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: regcred
|
||||||
|
containers:
|
||||||
|
- name: worker
|
||||||
|
image: git.spdy.io/creationshop/cannaiq:latest
|
||||||
|
imagePullPolicy: Always
|
||||||
|
command: ["node"]
|
||||||
|
args: ["dist/tasks/task-worker.js"]
|
||||||
|
env:
|
||||||
|
- name: WORKER_MODE
|
||||||
|
value: "true"
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: MAX_CONCURRENT_TASKS
|
||||||
|
value: "50"
|
||||||
|
- name: API_BASE_URL
|
||||||
|
value: http://scraper
|
||||||
|
- name: NODE_OPTIONS
|
||||||
|
value: --max-old-space-size=1500
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: scraper-config
|
||||||
|
- secretRef:
|
||||||
|
name: scraper-secrets
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 1Gi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 2Gi
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
- pgrep -f 'task-worker' > /dev/null
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 30
|
||||||
|
failureThreshold: 3
|
||||||
88
backend/migrations/083_discovery_runs.sql
Normal file
88
backend/migrations/083_discovery_runs.sql
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
-- Migration 083: Discovery Run Tracking
|
||||||
|
-- Tracks progress of store discovery runs step-by-step
|
||||||
|
|
||||||
|
-- Main discovery runs table
|
||||||
|
CREATE TABLE IF NOT EXISTS discovery_runs (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
platform VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed
|
||||||
|
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
finished_at TIMESTAMPTZ,
|
||||||
|
task_id INTEGER REFERENCES worker_task_queue(id),
|
||||||
|
|
||||||
|
-- Totals
|
||||||
|
states_total INTEGER DEFAULT 0,
|
||||||
|
states_completed INTEGER DEFAULT 0,
|
||||||
|
locations_discovered INTEGER DEFAULT 0,
|
||||||
|
locations_promoted INTEGER DEFAULT 0,
|
||||||
|
new_store_ids INTEGER[] DEFAULT '{}',
|
||||||
|
|
||||||
|
-- Error info
|
||||||
|
error_message TEXT,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Per-state progress within a run
|
||||||
|
CREATE TABLE IF NOT EXISTS discovery_run_states (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
||||||
|
state_code VARCHAR(2) NOT NULL,
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, completed, failed
|
||||||
|
started_at TIMESTAMPTZ,
|
||||||
|
finished_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
-- Results
|
||||||
|
cities_found INTEGER DEFAULT 0,
|
||||||
|
locations_found INTEGER DEFAULT 0,
|
||||||
|
locations_upserted INTEGER DEFAULT 0,
|
||||||
|
new_dispensary_ids INTEGER[] DEFAULT '{}',
|
||||||
|
|
||||||
|
-- Error info
|
||||||
|
error_message TEXT,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(run_id, state_code)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Step-by-step log for detailed progress tracking
|
||||||
|
CREATE TABLE IF NOT EXISTS discovery_run_steps (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
||||||
|
state_code VARCHAR(2),
|
||||||
|
step_name VARCHAR(100) NOT NULL,
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'started', -- started, completed, failed
|
||||||
|
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
finished_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
-- Details (JSON for flexibility)
|
||||||
|
details JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for querying
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_runs_status ON discovery_runs(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_runs_platform ON discovery_runs(platform);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_runs_started_at ON discovery_runs(started_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_run_states_run_id ON discovery_run_states(run_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_run_steps_run_id ON discovery_run_steps(run_id);
|
||||||
|
|
||||||
|
-- View for latest run status per platform
|
||||||
|
CREATE OR REPLACE VIEW v_latest_discovery_runs AS
|
||||||
|
SELECT DISTINCT ON (platform)
|
||||||
|
id,
|
||||||
|
platform,
|
||||||
|
status,
|
||||||
|
started_at,
|
||||||
|
finished_at,
|
||||||
|
states_total,
|
||||||
|
states_completed,
|
||||||
|
locations_discovered,
|
||||||
|
locations_promoted,
|
||||||
|
array_length(new_store_ids, 1) as new_stores_count,
|
||||||
|
error_message,
|
||||||
|
EXTRACT(EPOCH FROM (COALESCE(finished_at, NOW()) - started_at)) as duration_seconds
|
||||||
|
FROM discovery_runs
|
||||||
|
ORDER BY platform, started_at DESC;
|
||||||
253
backend/migrations/084_dual_transport_preflight.sql
Normal file
253
backend/migrations/084_dual_transport_preflight.sql
Normal file
@@ -0,0 +1,253 @@
|
|||||||
|
-- Migration 084: Dual Transport Preflight System
|
||||||
|
-- Workers run both curl and http (Puppeteer) preflights on startup
|
||||||
|
-- Tasks can require a specific transport method
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 1: Add preflight columns to worker_registry
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- Preflight status for curl/axios transport (proxy-based)
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_curl_status VARCHAR(20) DEFAULT 'pending';
|
||||||
|
|
||||||
|
-- Preflight status for http/Puppeteer transport (browser-based)
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_http_status VARCHAR(20) DEFAULT 'pending';
|
||||||
|
|
||||||
|
-- Timestamps for when each preflight completed
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_curl_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_http_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- Error messages for failed preflights
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_curl_error TEXT;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_http_error TEXT;
|
||||||
|
|
||||||
|
-- Response time for successful preflights (ms)
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_curl_ms INTEGER;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_http_ms INTEGER;
|
||||||
|
|
||||||
|
-- Constraints for preflight status values
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
DROP CONSTRAINT IF EXISTS valid_preflight_curl_status;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD CONSTRAINT valid_preflight_curl_status
|
||||||
|
CHECK (preflight_curl_status IN ('pending', 'passed', 'failed', 'skipped'));
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
DROP CONSTRAINT IF EXISTS valid_preflight_http_status;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD CONSTRAINT valid_preflight_http_status
|
||||||
|
CHECK (preflight_http_status IN ('pending', 'passed', 'failed', 'skipped'));
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 2: Add method column to worker_tasks
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- Transport method requirement for the task
|
||||||
|
-- NULL = no preference (any worker can claim)
|
||||||
|
-- 'curl' = requires curl/axios transport (proxy-based, fast)
|
||||||
|
-- 'http' = requires http/Puppeteer transport (browser-based, anti-detect)
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS method VARCHAR(10);
|
||||||
|
|
||||||
|
-- Constraint for valid method values
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
DROP CONSTRAINT IF EXISTS valid_task_method;
|
||||||
|
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD CONSTRAINT valid_task_method
|
||||||
|
CHECK (method IS NULL OR method IN ('curl', 'http'));
|
||||||
|
|
||||||
|
-- Index for method-based task claiming
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_method
|
||||||
|
ON worker_tasks(method)
|
||||||
|
WHERE status = 'pending';
|
||||||
|
|
||||||
|
-- Set default method for all existing pending tasks to 'http'
|
||||||
|
-- ALL current tasks require Puppeteer/browser-based transport
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET method = 'http'
|
||||||
|
WHERE method IS NULL;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 3: Update claim_task function for method compatibility
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION claim_task(
|
||||||
|
p_role VARCHAR(50),
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||||
|
p_http_passed BOOLEAN DEFAULT FALSE
|
||||||
|
) RETURNS worker_tasks AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_task worker_tasks;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_tasks
|
||||||
|
WHERE role = p_role
|
||||||
|
AND status = 'pending'
|
||||||
|
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||||
|
-- Method compatibility: worker must have passed the required preflight
|
||||||
|
AND (
|
||||||
|
method IS NULL -- No preference, any worker can claim
|
||||||
|
OR (method = 'curl' AND p_curl_passed = TRUE)
|
||||||
|
OR (method = 'http' AND p_http_passed = TRUE)
|
||||||
|
)
|
||||||
|
-- Exclude stores that already have an active task
|
||||||
|
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||||
|
SELECT dispensary_id FROM worker_tasks
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND dispensary_id IS NOT NULL
|
||||||
|
))
|
||||||
|
ORDER BY priority DESC, created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_task;
|
||||||
|
|
||||||
|
RETURN claimed_task;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 4: Update v_active_workers view
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
DROP VIEW IF EXISTS v_active_workers;
|
||||||
|
|
||||||
|
CREATE VIEW v_active_workers AS
|
||||||
|
SELECT
|
||||||
|
wr.id,
|
||||||
|
wr.worker_id,
|
||||||
|
wr.friendly_name,
|
||||||
|
wr.role,
|
||||||
|
wr.status,
|
||||||
|
wr.pod_name,
|
||||||
|
wr.hostname,
|
||||||
|
wr.started_at,
|
||||||
|
wr.last_heartbeat_at,
|
||||||
|
wr.last_task_at,
|
||||||
|
wr.tasks_completed,
|
||||||
|
wr.tasks_failed,
|
||||||
|
wr.current_task_id,
|
||||||
|
-- Preflight status
|
||||||
|
wr.preflight_curl_status,
|
||||||
|
wr.preflight_http_status,
|
||||||
|
wr.preflight_curl_at,
|
||||||
|
wr.preflight_http_at,
|
||||||
|
wr.preflight_curl_error,
|
||||||
|
wr.preflight_http_error,
|
||||||
|
wr.preflight_curl_ms,
|
||||||
|
wr.preflight_http_ms,
|
||||||
|
-- Computed fields
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
|
CASE
|
||||||
|
WHEN wr.status = 'offline' THEN 'offline'
|
||||||
|
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
|
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||||
|
ELSE 'ready'
|
||||||
|
END as health_status,
|
||||||
|
-- Capability flags (can this worker handle curl/http tasks?)
|
||||||
|
(wr.preflight_curl_status = 'passed') as can_curl,
|
||||||
|
(wr.preflight_http_status = 'passed') as can_http
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.status != 'terminated'
|
||||||
|
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 5: View for task queue with method info
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
DROP VIEW IF EXISTS v_task_history;
|
||||||
|
|
||||||
|
CREATE VIEW v_task_history AS
|
||||||
|
SELECT
|
||||||
|
t.id,
|
||||||
|
t.role,
|
||||||
|
t.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
t.platform,
|
||||||
|
t.status,
|
||||||
|
t.priority,
|
||||||
|
t.method,
|
||||||
|
t.worker_id,
|
||||||
|
t.scheduled_for,
|
||||||
|
t.claimed_at,
|
||||||
|
t.started_at,
|
||||||
|
t.completed_at,
|
||||||
|
t.error_message,
|
||||||
|
t.retry_count,
|
||||||
|
t.created_at,
|
||||||
|
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
|
||||||
|
FROM worker_tasks t
|
||||||
|
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
||||||
|
ORDER BY t.created_at DESC;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 6: Helper function to update worker preflight status
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_transport VARCHAR(10), -- 'curl' or 'http'
|
||||||
|
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
||||||
|
p_response_ms INTEGER DEFAULT NULL,
|
||||||
|
p_error TEXT DEFAULT NULL
|
||||||
|
) RETURNS VOID AS $$
|
||||||
|
BEGIN
|
||||||
|
IF p_transport = 'curl' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_curl_status = p_status,
|
||||||
|
preflight_curl_at = NOW(),
|
||||||
|
preflight_curl_ms = p_response_ms,
|
||||||
|
preflight_curl_error = p_error,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
ELSIF p_transport = 'http' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_http_status = p_status,
|
||||||
|
preflight_http_at = NOW(),
|
||||||
|
preflight_http_ms = p_response_ms,
|
||||||
|
preflight_http_error = p_error,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- Comments
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_curl_status IS 'Status of curl/axios preflight: pending, passed, failed, skipped';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_http_status IS 'Status of http/Puppeteer preflight: pending, passed, failed, skipped';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_curl_at IS 'When curl preflight completed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_http_at IS 'When http preflight completed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_curl_error IS 'Error message if curl preflight failed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_http_error IS 'Error message if http preflight failed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_curl_ms IS 'Response time of successful curl preflight (ms)';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_http_ms IS 'Response time of successful http preflight (ms)';
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_tasks.method IS 'Transport method required: NULL=any, curl=proxy-based, http=browser-based';
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task, respecting method requirements and per-store locking';
|
||||||
|
COMMENT ON FUNCTION update_worker_preflight IS 'Update a workers preflight status for a given transport';
|
||||||
168
backend/migrations/085_preflight_ip_fingerprint.sql
Normal file
168
backend/migrations/085_preflight_ip_fingerprint.sql
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
-- Migration 085: Add IP and fingerprint columns for preflight reporting
|
||||||
|
-- These columns were missing from migration 084
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 1: Add IP address columns to worker_registry
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- IP address detected during curl/axios preflight
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS curl_ip VARCHAR(45);
|
||||||
|
|
||||||
|
-- IP address detected during http/Puppeteer preflight
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS http_ip VARCHAR(45);
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 2: Add fingerprint data column
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- Browser fingerprint data captured during Puppeteer preflight
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS fingerprint_data JSONB;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 3: Add combined preflight status/timestamp for convenience
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
-- Overall preflight status (computed from both transports)
|
||||||
|
-- Values: 'pending', 'passed', 'partial', 'failed'
|
||||||
|
-- - 'pending': neither transport tested
|
||||||
|
-- - 'passed': both transports passed (or http passed for browser-only)
|
||||||
|
-- - 'partial': at least one passed
|
||||||
|
-- - 'failed': no transport passed
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_status VARCHAR(20) DEFAULT 'pending';
|
||||||
|
|
||||||
|
-- Most recent preflight completion timestamp
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS preflight_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 4: Update function to set preflight status
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_transport VARCHAR(10), -- 'curl' or 'http'
|
||||||
|
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
||||||
|
p_ip VARCHAR(45) DEFAULT NULL,
|
||||||
|
p_response_ms INTEGER DEFAULT NULL,
|
||||||
|
p_error TEXT DEFAULT NULL,
|
||||||
|
p_fingerprint JSONB DEFAULT NULL
|
||||||
|
) RETURNS VOID AS $$
|
||||||
|
DECLARE
|
||||||
|
v_curl_status VARCHAR(20);
|
||||||
|
v_http_status VARCHAR(20);
|
||||||
|
v_overall_status VARCHAR(20);
|
||||||
|
BEGIN
|
||||||
|
IF p_transport = 'curl' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_curl_status = p_status,
|
||||||
|
preflight_curl_at = NOW(),
|
||||||
|
preflight_curl_ms = p_response_ms,
|
||||||
|
preflight_curl_error = p_error,
|
||||||
|
curl_ip = p_ip,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
ELSIF p_transport = 'http' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_http_status = p_status,
|
||||||
|
preflight_http_at = NOW(),
|
||||||
|
preflight_http_ms = p_response_ms,
|
||||||
|
preflight_http_error = p_error,
|
||||||
|
http_ip = p_ip,
|
||||||
|
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Update overall preflight status
|
||||||
|
SELECT preflight_curl_status, preflight_http_status
|
||||||
|
INTO v_curl_status, v_http_status
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- Compute overall status
|
||||||
|
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
|
||||||
|
v_overall_status := 'passed';
|
||||||
|
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
|
||||||
|
v_overall_status := 'partial';
|
||||||
|
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
|
||||||
|
v_overall_status := 'failed';
|
||||||
|
ELSE
|
||||||
|
v_overall_status := 'pending';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_status = v_overall_status,
|
||||||
|
preflight_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- PART 5: Update v_active_workers view
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
DROP VIEW IF EXISTS v_active_workers;
|
||||||
|
|
||||||
|
CREATE VIEW v_active_workers AS
|
||||||
|
SELECT
|
||||||
|
wr.id,
|
||||||
|
wr.worker_id,
|
||||||
|
wr.friendly_name,
|
||||||
|
wr.role,
|
||||||
|
wr.status,
|
||||||
|
wr.pod_name,
|
||||||
|
wr.hostname,
|
||||||
|
wr.started_at,
|
||||||
|
wr.last_heartbeat_at,
|
||||||
|
wr.last_task_at,
|
||||||
|
wr.tasks_completed,
|
||||||
|
wr.tasks_failed,
|
||||||
|
wr.current_task_id,
|
||||||
|
-- IP addresses from preflights
|
||||||
|
wr.curl_ip,
|
||||||
|
wr.http_ip,
|
||||||
|
-- Combined preflight status
|
||||||
|
wr.preflight_status,
|
||||||
|
wr.preflight_at,
|
||||||
|
-- Detailed preflight status per transport
|
||||||
|
wr.preflight_curl_status,
|
||||||
|
wr.preflight_http_status,
|
||||||
|
wr.preflight_curl_at,
|
||||||
|
wr.preflight_http_at,
|
||||||
|
wr.preflight_curl_error,
|
||||||
|
wr.preflight_http_error,
|
||||||
|
wr.preflight_curl_ms,
|
||||||
|
wr.preflight_http_ms,
|
||||||
|
-- Fingerprint data
|
||||||
|
wr.fingerprint_data,
|
||||||
|
-- Computed fields
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
|
CASE
|
||||||
|
WHEN wr.status = 'offline' THEN 'offline'
|
||||||
|
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
|
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||||
|
ELSE 'ready'
|
||||||
|
END as health_status,
|
||||||
|
-- Capability flags (can this worker handle curl/http tasks?)
|
||||||
|
(wr.preflight_curl_status = 'passed') as can_curl,
|
||||||
|
(wr.preflight_http_status = 'passed') as can_http
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.status != 'terminated'
|
||||||
|
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||||
|
|
||||||
|
-- ===================================================================
|
||||||
|
-- Comments
|
||||||
|
-- ===================================================================
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_registry.curl_ip IS 'IP address detected during curl/axios preflight';
|
||||||
|
COMMENT ON COLUMN worker_registry.http_ip IS 'IP address detected during Puppeteer preflight';
|
||||||
|
COMMENT ON COLUMN worker_registry.fingerprint_data IS 'Browser fingerprint captured during Puppeteer preflight';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_status IS 'Overall preflight status: pending, passed, partial, failed';
|
||||||
|
COMMENT ON COLUMN worker_registry.preflight_at IS 'Most recent preflight completion timestamp';
|
||||||
59
backend/migrations/085_trusted_origins.sql
Normal file
59
backend/migrations/085_trusted_origins.sql
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
-- Migration 085: Trusted Origins Management
|
||||||
|
-- Allows admin to manage trusted IPs and domains via UI instead of hardcoded values
|
||||||
|
|
||||||
|
-- Trusted origins table (IPs and domains that bypass API key auth)
|
||||||
|
CREATE TABLE IF NOT EXISTS trusted_origins (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Origin type: 'ip', 'domain', 'pattern'
|
||||||
|
origin_type VARCHAR(20) NOT NULL CHECK (origin_type IN ('ip', 'domain', 'pattern')),
|
||||||
|
|
||||||
|
-- The actual value
|
||||||
|
-- For ip: '127.0.0.1', '::1', '192.168.1.0/24'
|
||||||
|
-- For domain: 'cannaiq.co', 'findadispo.com'
|
||||||
|
-- For pattern: '^https://.*\.cannabrands\.app$' (regex)
|
||||||
|
origin_value VARCHAR(255) NOT NULL,
|
||||||
|
|
||||||
|
-- Description for admin reference
|
||||||
|
description TEXT,
|
||||||
|
|
||||||
|
-- Active flag
|
||||||
|
active BOOLEAN DEFAULT true,
|
||||||
|
|
||||||
|
-- Audit
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
created_by INTEGER REFERENCES users(id),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(origin_type, origin_value)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for quick lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active ON trusted_origins(active) WHERE active = true;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_trusted_origins_type ON trusted_origins(origin_type, active);
|
||||||
|
|
||||||
|
-- Seed with current hardcoded values
|
||||||
|
INSERT INTO trusted_origins (origin_type, origin_value, description) VALUES
|
||||||
|
-- Trusted IPs (localhost)
|
||||||
|
('ip', '127.0.0.1', 'Localhost IPv4'),
|
||||||
|
('ip', '::1', 'Localhost IPv6'),
|
||||||
|
('ip', '::ffff:127.0.0.1', 'Localhost IPv4-mapped IPv6'),
|
||||||
|
|
||||||
|
-- Trusted domains
|
||||||
|
('domain', 'cannaiq.co', 'CannaiQ production'),
|
||||||
|
('domain', 'www.cannaiq.co', 'CannaiQ production (www)'),
|
||||||
|
('domain', 'findadispo.com', 'FindADispo production'),
|
||||||
|
('domain', 'www.findadispo.com', 'FindADispo production (www)'),
|
||||||
|
('domain', 'findagram.co', 'Findagram production'),
|
||||||
|
('domain', 'www.findagram.co', 'Findagram production (www)'),
|
||||||
|
('domain', 'localhost:3010', 'Local backend dev'),
|
||||||
|
('domain', 'localhost:8080', 'Local admin dev'),
|
||||||
|
('domain', 'localhost:5173', 'Local Vite dev'),
|
||||||
|
|
||||||
|
-- Pattern-based (regex)
|
||||||
|
('pattern', '^https://.*\.cannabrands\.app$', 'All cannabrands.app subdomains'),
|
||||||
|
('pattern', '^https://.*\.cannaiq\.co$', 'All cannaiq.co subdomains')
|
||||||
|
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||||
|
|
||||||
|
-- Add comment
|
||||||
|
COMMENT ON TABLE trusted_origins IS 'IPs and domains that bypass API key authentication. Managed via /admin.';
|
||||||
10
backend/migrations/086_proxy_url_column.sql
Normal file
10
backend/migrations/086_proxy_url_column.sql
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
-- Migration 086: Add proxy_url column for alternative URL formats
|
||||||
|
-- Some proxy providers use non-standard URL formats (e.g., host:port:user:pass)
|
||||||
|
-- This column allows storing the raw URL directly
|
||||||
|
|
||||||
|
-- Add proxy_url column - if set, used directly instead of constructing from parts
|
||||||
|
ALTER TABLE proxies
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_url TEXT;
|
||||||
|
|
||||||
|
-- Add comment
|
||||||
|
COMMENT ON COLUMN proxies.proxy_url IS 'Raw proxy URL (if provider uses non-standard format). Takes precedence over constructed URL from host/port/user/pass.';
|
||||||
30
backend/migrations/088_discovery_payloads.sql
Normal file
30
backend/migrations/088_discovery_payloads.sql
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
-- Migration 088: Extend raw_crawl_payloads for discovery payloads
|
||||||
|
--
|
||||||
|
-- Enables saving raw store data from Dutchie discovery crawls.
|
||||||
|
-- Store discovery returns raw dispensary objects - save them for historical analysis.
|
||||||
|
|
||||||
|
-- Add payload_type to distinguish product crawls from discovery crawls
|
||||||
|
ALTER TABLE raw_crawl_payloads
|
||||||
|
ADD COLUMN IF NOT EXISTS payload_type VARCHAR(32) NOT NULL DEFAULT 'product';
|
||||||
|
|
||||||
|
-- Add state_code for discovery payloads (null for product payloads)
|
||||||
|
ALTER TABLE raw_crawl_payloads
|
||||||
|
ADD COLUMN IF NOT EXISTS state_code VARCHAR(10);
|
||||||
|
|
||||||
|
-- Add store_count for discovery payloads (alternative to product_count)
|
||||||
|
ALTER TABLE raw_crawl_payloads
|
||||||
|
ADD COLUMN IF NOT EXISTS store_count INTEGER;
|
||||||
|
|
||||||
|
-- Make dispensary_id nullable for discovery payloads
|
||||||
|
ALTER TABLE raw_crawl_payloads
|
||||||
|
ALTER COLUMN dispensary_id DROP NOT NULL;
|
||||||
|
|
||||||
|
-- Add index for discovery payload queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_type_state
|
||||||
|
ON raw_crawl_payloads(payload_type, state_code)
|
||||||
|
WHERE payload_type = 'store_discovery';
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.payload_type IS 'Type: product (default), store_discovery';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.state_code IS 'State code for discovery payloads (e.g., AZ, MI)';
|
||||||
|
COMMENT ON COLUMN raw_crawl_payloads.store_count IS 'Number of stores in discovery payload';
|
||||||
105
backend/migrations/089_immutable_schedules.sql
Normal file
105
backend/migrations/089_immutable_schedules.sql
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
-- Migration 089: Immutable Schedules with Per-State Product Discovery
|
||||||
|
--
|
||||||
|
-- Key changes:
|
||||||
|
-- 1. Add is_immutable column - schedules can be edited but not deleted
|
||||||
|
-- 2. Add method column - all tasks use 'http' (Puppeteer transport)
|
||||||
|
-- 3. Store discovery weekly (168h)
|
||||||
|
-- 4. Per-state product_discovery schedules (4h default)
|
||||||
|
-- 5. Remove old payload_fetch schedules
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- 1) Add new columns to task_schedules
|
||||||
|
-- =====================================================
|
||||||
|
ALTER TABLE task_schedules
|
||||||
|
ADD COLUMN IF NOT EXISTS is_immutable BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
ALTER TABLE task_schedules
|
||||||
|
ADD COLUMN IF NOT EXISTS method VARCHAR(10) DEFAULT 'http';
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- 2) Update store_discovery to weekly and immutable
|
||||||
|
-- =====================================================
|
||||||
|
UPDATE task_schedules
|
||||||
|
SET interval_hours = 168, -- 7 days
|
||||||
|
is_immutable = TRUE,
|
||||||
|
method = 'http',
|
||||||
|
description = 'Discover new Dutchie stores weekly (HTTP transport)'
|
||||||
|
WHERE name IN ('store_discovery_dutchie', 'Store Discovery');
|
||||||
|
|
||||||
|
-- Insert if doesn't exist
|
||||||
|
INSERT INTO task_schedules (name, role, interval_hours, priority, description, is_immutable, method, platform, next_run_at)
|
||||||
|
VALUES ('Store Discovery', 'store_discovery', 168, 5, 'Discover new Dutchie stores weekly (HTTP transport)', TRUE, 'http', 'dutchie', NOW())
|
||||||
|
ON CONFLICT (name) DO UPDATE SET
|
||||||
|
interval_hours = 168,
|
||||||
|
is_immutable = TRUE,
|
||||||
|
method = 'http',
|
||||||
|
description = 'Discover new Dutchie stores weekly (HTTP transport)';
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- 3) Remove old payload_fetch and product_refresh_all schedules
|
||||||
|
-- =====================================================
|
||||||
|
DELETE FROM task_schedules WHERE name IN ('payload_fetch_all', 'product_refresh_all');
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- 4) Create per-state product_discovery schedules
|
||||||
|
-- =====================================================
|
||||||
|
-- One schedule per state that has dispensaries with active cannabis programs
|
||||||
|
INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
|
||||||
|
SELECT
|
||||||
|
'product_discovery_' || lower(s.code) AS name,
|
||||||
|
'product_discovery' AS role,
|
||||||
|
s.code AS state_code,
|
||||||
|
4 AS interval_hours, -- 4 hours default, editable
|
||||||
|
10 AS priority,
|
||||||
|
'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
|
||||||
|
TRUE AS is_immutable, -- Can edit but not delete
|
||||||
|
'http' AS method,
|
||||||
|
CASE WHEN s.is_active THEN TRUE ELSE FALSE END AS enabled,
|
||||||
|
-- Stagger start times: each state starts 5 minutes after the previous
|
||||||
|
NOW() + (ROW_NUMBER() OVER (ORDER BY s.code) * INTERVAL '5 minutes') AS next_run_at
|
||||||
|
FROM states s
|
||||||
|
WHERE EXISTS (
|
||||||
|
SELECT 1 FROM dispensaries d
|
||||||
|
WHERE d.state_id = s.id AND d.crawl_enabled = true
|
||||||
|
)
|
||||||
|
ON CONFLICT (name) DO UPDATE SET
|
||||||
|
is_immutable = TRUE,
|
||||||
|
method = 'http',
|
||||||
|
description = EXCLUDED.description;
|
||||||
|
|
||||||
|
-- Also create schedules for states that might have stores discovered later
|
||||||
|
INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
|
||||||
|
SELECT
|
||||||
|
'product_discovery_' || lower(s.code) AS name,
|
||||||
|
'product_discovery' AS role,
|
||||||
|
s.code AS state_code,
|
||||||
|
4 AS interval_hours,
|
||||||
|
10 AS priority,
|
||||||
|
'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
|
||||||
|
TRUE AS is_immutable,
|
||||||
|
'http' AS method,
|
||||||
|
FALSE AS enabled, -- Disabled until stores exist
|
||||||
|
NOW() + INTERVAL '1 hour'
|
||||||
|
FROM states s
|
||||||
|
WHERE NOT EXISTS (
|
||||||
|
SELECT 1 FROM task_schedules ts WHERE ts.name = 'product_discovery_' || lower(s.code)
|
||||||
|
)
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- 5) Make analytics_refresh immutable
|
||||||
|
-- =====================================================
|
||||||
|
UPDATE task_schedules
|
||||||
|
SET is_immutable = TRUE, method = 'http'
|
||||||
|
WHERE name = 'analytics_refresh';
|
||||||
|
|
||||||
|
-- =====================================================
|
||||||
|
-- 6) Add index for schedule lookups
|
||||||
|
-- =====================================================
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_schedules_state_code
|
||||||
|
ON task_schedules(state_code)
|
||||||
|
WHERE state_code IS NOT NULL;
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON COLUMN task_schedules.is_immutable IS 'If TRUE, schedule cannot be deleted (only edited)';
|
||||||
|
COMMENT ON COLUMN task_schedules.method IS 'Transport method: http (Puppeteer/browser) or curl (axios)';
|
||||||
66
backend/migrations/090_modification_tracking.sql
Normal file
66
backend/migrations/090_modification_tracking.sql
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
-- Migration 090: Add modification tracking columns
|
||||||
|
--
|
||||||
|
-- Tracks when records were last modified and by which task.
|
||||||
|
-- Enables debugging, auditing, and understanding data freshness.
|
||||||
|
--
|
||||||
|
-- Columns added:
|
||||||
|
-- last_modified_at - When the record was last modified by a task
|
||||||
|
-- last_modified_by_task - Which task role modified it (e.g., 'product_refresh')
|
||||||
|
-- last_modified_task_id - The specific task ID that modified it
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- dispensaries table
|
||||||
|
-- ============================================================
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
|
||||||
|
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
|
||||||
|
|
||||||
|
-- Index for querying recently modified records
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_last_modified
|
||||||
|
ON dispensaries(last_modified_at DESC)
|
||||||
|
WHERE last_modified_at IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for querying by task type
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_modified_by_task
|
||||||
|
ON dispensaries(last_modified_by_task)
|
||||||
|
WHERE last_modified_by_task IS NOT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN dispensaries.last_modified_at IS 'Timestamp when this record was last modified by a task';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_modified_by_task IS 'Task role that last modified this record (e.g., store_discovery_state, entry_point_discovery)';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- store_products table
|
||||||
|
-- ============================================================
|
||||||
|
ALTER TABLE store_products
|
||||||
|
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
ALTER TABLE store_products
|
||||||
|
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
|
||||||
|
|
||||||
|
ALTER TABLE store_products
|
||||||
|
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
|
||||||
|
|
||||||
|
-- Index for querying recently modified products
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_last_modified
|
||||||
|
ON store_products(last_modified_at DESC)
|
||||||
|
WHERE last_modified_at IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for querying by task type
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_modified_by_task
|
||||||
|
ON store_products(last_modified_by_task)
|
||||||
|
WHERE last_modified_by_task IS NOT NULL;
|
||||||
|
|
||||||
|
-- Composite index for finding products modified by a specific task
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_task_modified
|
||||||
|
ON store_products(dispensary_id, last_modified_at DESC)
|
||||||
|
WHERE last_modified_at IS NOT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN store_products.last_modified_at IS 'Timestamp when this record was last modified by a task';
|
||||||
|
COMMENT ON COLUMN store_products.last_modified_by_task IS 'Task role that last modified this record (e.g., product_refresh, product_discovery)';
|
||||||
|
COMMENT ON COLUMN store_products.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';
|
||||||
26
backend/migrations/091_store_discovery_tracking.sql
Normal file
26
backend/migrations/091_store_discovery_tracking.sql
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
-- Migration 091: Add store discovery tracking columns
|
||||||
|
-- Per auto-healing scheme (2025-12-12):
|
||||||
|
-- Track when store_discovery last updated each dispensary
|
||||||
|
-- Track when last payload was saved
|
||||||
|
|
||||||
|
-- Add last_store_discovery_at to track when store_discovery updated this record
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_store_discovery_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- Add last_payload_at to track when last product payload was saved
|
||||||
|
-- (Complements last_fetch_at which tracks API fetch time)
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS last_payload_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- Add index for finding stale discovery data
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_store_discovery_at
|
||||||
|
ON dispensaries (last_store_discovery_at DESC NULLS LAST)
|
||||||
|
WHERE crawl_enabled = true;
|
||||||
|
|
||||||
|
-- Add index for finding dispensaries without recent payloads
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_payload_at
|
||||||
|
ON dispensaries (last_payload_at DESC NULLS LAST)
|
||||||
|
WHERE crawl_enabled = true;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN dispensaries.last_store_discovery_at IS 'When store_discovery task last updated this record';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_payload_at IS 'When last product payload was saved for this dispensary';
|
||||||
30
backend/migrations/092_fix_trulieve_urls.sql
Normal file
30
backend/migrations/092_fix_trulieve_urls.sql
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
-- Fix 3 Trulieve/Harvest stores with incorrect menu URLs
|
||||||
|
-- These records have NULL or mismatched platform_dispensary_id so store_discovery
|
||||||
|
-- ON CONFLICT can't update them automatically
|
||||||
|
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET
|
||||||
|
menu_url = 'https://dutchie.com/dispensary/svaccha-llc-nirvana-center-apache-junction',
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = 224;
|
||||||
|
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET
|
||||||
|
menu_url = 'https://dutchie.com/dispensary/trulieve-of-phoenix-tatum',
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = 76;
|
||||||
|
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET
|
||||||
|
menu_url = 'https://dutchie.com/dispensary/harvest-of-havasu',
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = 403;
|
||||||
|
|
||||||
|
-- Queue entry_point_discovery tasks to resolve their platform_dispensary_id
|
||||||
|
-- method='http' ensures only workers that passed http preflight can claim these
|
||||||
|
INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for, method)
|
||||||
|
VALUES
|
||||||
|
('entry_point_discovery', 224, 5, NOW(), 'http'),
|
||||||
|
('entry_point_discovery', 76, 5, NOW(), 'http'),
|
||||||
|
('entry_point_discovery', 403, 5, NOW(), 'http')
|
||||||
|
ON CONFLICT DO NOTHING;
|
||||||
35
backend/migrations/092_store_intelligence_cache.sql
Normal file
35
backend/migrations/092_store_intelligence_cache.sql
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
-- Migration 092: Store Intelligence Cache
|
||||||
|
-- Pre-computed store intelligence data refreshed by analytics_refresh task
|
||||||
|
-- Eliminates costly aggregation queries on /intelligence/stores endpoint
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS store_intelligence_cache (
|
||||||
|
dispensary_id INTEGER PRIMARY KEY REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
|
||||||
|
-- Basic counts
|
||||||
|
sku_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
brand_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
snapshot_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
|
||||||
|
-- Pricing
|
||||||
|
avg_price_rec NUMERIC(10,2),
|
||||||
|
avg_price_med NUMERIC(10,2),
|
||||||
|
min_price NUMERIC(10,2),
|
||||||
|
max_price NUMERIC(10,2),
|
||||||
|
|
||||||
|
-- Category breakdown (JSONB for flexibility)
|
||||||
|
category_counts JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
last_crawl_at TIMESTAMPTZ,
|
||||||
|
last_refresh_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for fast lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_intelligence_cache_refresh
|
||||||
|
ON store_intelligence_cache (last_refresh_at DESC);
|
||||||
|
|
||||||
|
COMMENT ON TABLE store_intelligence_cache IS 'Pre-computed store intelligence metrics, refreshed by analytics_refresh task';
|
||||||
|
COMMENT ON COLUMN store_intelligence_cache.category_counts IS 'JSON object mapping category_raw to product count';
|
||||||
43
backend/migrations/093_fix_mv_state_metrics.sql
Normal file
43
backend/migrations/093_fix_mv_state_metrics.sql
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
-- Migration: 093_fix_mv_state_metrics.sql
|
||||||
|
-- Purpose: Fix mv_state_metrics to use brand_name_raw and show correct store counts
|
||||||
|
-- Issues fixed:
|
||||||
|
-- 1. unique_brands used brand_id (often NULL), now uses brand_name_raw
|
||||||
|
-- 2. Added out_of_stock_products column
|
||||||
|
-- 3. dispensary_count now correctly named
|
||||||
|
|
||||||
|
-- Drop and recreate the materialized view with correct definition
|
||||||
|
DROP MATERIALIZED VIEW IF EXISTS mv_state_metrics;
|
||||||
|
|
||||||
|
CREATE MATERIALIZED VIEW mv_state_metrics AS
|
||||||
|
SELECT
|
||||||
|
d.state,
|
||||||
|
s.name AS state_name,
|
||||||
|
COUNT(DISTINCT d.id) AS dispensary_count,
|
||||||
|
COUNT(DISTINCT CASE WHEN d.menu_type = 'dutchie' THEN d.id END) AS dutchie_stores,
|
||||||
|
COUNT(DISTINCT CASE WHEN d.crawl_enabled = true THEN d.id END) AS active_stores,
|
||||||
|
COUNT(sp.id) AS total_products,
|
||||||
|
COUNT(CASE WHEN COALESCE(sp.is_in_stock, true) THEN sp.id END) AS in_stock_products,
|
||||||
|
COUNT(CASE WHEN sp.is_in_stock = false THEN sp.id END) AS out_of_stock_products,
|
||||||
|
COUNT(CASE WHEN sp.is_on_special THEN sp.id END) AS on_special_products,
|
||||||
|
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != '') AS unique_brands,
|
||||||
|
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS unique_categories,
|
||||||
|
ROUND(AVG(sp.price_rec) FILTER (WHERE sp.price_rec > 0)::NUMERIC, 2) AS avg_price_rec,
|
||||||
|
MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) AS min_price_rec,
|
||||||
|
MAX(sp.price_rec) FILTER (WHERE sp.price_rec > 0) AS max_price_rec,
|
||||||
|
NOW() AS refreshed_at
|
||||||
|
FROM dispensaries d
|
||||||
|
LEFT JOIN states s ON d.state = s.code
|
||||||
|
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
|
||||||
|
WHERE d.state IS NOT NULL
|
||||||
|
GROUP BY d.state, s.name;
|
||||||
|
|
||||||
|
-- Create unique index for CONCURRENTLY refresh support
|
||||||
|
CREATE UNIQUE INDEX idx_mv_state_metrics_state ON mv_state_metrics(state);
|
||||||
|
|
||||||
|
-- Update refresh function
|
||||||
|
CREATE OR REPLACE FUNCTION refresh_state_metrics()
|
||||||
|
RETURNS void AS $$
|
||||||
|
BEGIN
|
||||||
|
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_state_metrics;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
516
backend/migrations/094_import_evomi_proxies.sql
Normal file
516
backend/migrations/094_import_evomi_proxies.sql
Normal file
@@ -0,0 +1,516 @@
|
|||||||
|
-- Migration: Import 500 Evomi residential proxies
|
||||||
|
-- These are sticky-session rotating proxies where password contains session ID
|
||||||
|
-- Active is set to false - run Test All to verify and activate
|
||||||
|
|
||||||
|
-- First, drop the old unique constraint that doesn't account for username/password
|
||||||
|
ALTER TABLE proxies DROP CONSTRAINT IF EXISTS proxies_host_port_protocol_key;
|
||||||
|
|
||||||
|
-- Add new unique constraint that includes username and password
|
||||||
|
-- This allows multiple entries for the same host:port with different credentials (sessions)
|
||||||
|
ALTER TABLE proxies ADD CONSTRAINT proxies_host_port_protocol_username_password_key
|
||||||
|
UNIQUE(host, port, protocol, username, password);
|
||||||
|
|
||||||
|
-- Now insert all 500 proxies
|
||||||
|
INSERT INTO proxies (host, port, protocol, username, password, active, max_connections)
|
||||||
|
VALUES
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4XRRPF1UQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5UNGX7N7K', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9PSKYP1GU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GZBKKYL2S', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YHJHM0XZU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ESDYQ34CJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GAXUMFKQI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2FF66K4CI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SUYM0R49B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A8VHZMEFP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WNRLH6NXR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SPSB3IUX6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-85N76UU5Q', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-189P3LH2F', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-47DQOAGWY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IBT0QO7M2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UPXOUOH8X', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BFQ1PH75D', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KNTFKRY1J', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5L8IG6DZX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9YE13X0BA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6KBHCHF0I', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CETHHFHZ6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A06J8ST3I', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YFS93P1YR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RB74B3R6C', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2JW27O3EU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCUX84BL0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1A2KSG6HO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4QW8ILV0E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0Q09GH2VL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-16BRXBCYC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9W02B3R4L', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CVAEH76YT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CATOG0Q5I', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F81625L74', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DO4AVTPK4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SBZPXORD5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JA1AWOX03', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0FUJTRSYT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CM1R2RSTB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EHPJZCK1S', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYLKORNAF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-05A8BUD25', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RHM1Q6O4M', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ES5VPCE6Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-P0JEGLP4O', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OC4AX88D0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3BN54IEBV', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ABSC7S550', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LNIJU6R2V', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OYGQPPCOV', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-32YBOHQWR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7KGEMK4SL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FAW8T2EBW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GPV69KI9T', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JPBHSN8M2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VZ1JQOF15', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7DJXXPK1E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JXKQ7JVZ1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-88Q5UQX3B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HAI5K0JFO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-65SUKG0QH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1XFJETX1F', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7ZNUCVCBW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O1DCK15LA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WLTEA65WB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCHAFNK2P', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6ODSZ6CUT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SZ8R2EFH4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9EPPYQREC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MPCBES7UI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FCCPL0XWZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GJ23UYEGI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RQT80689I', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TDQO2AP5E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D5Q5SEUEO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DZN4ZTENM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4HVQ33VK9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F1HJ7GPHA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RM708QD2Y', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-K36N27GM5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O73TS0DAE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-54QXRWEA8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1P6LP0365', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WMZ2ST34E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-175UYF58T', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-W0HTK6F28', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D5275CTIM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IH2IWVZOH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C4VFW7GSA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O9XGULSNA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PJ1W1P5L9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MQQU30KPC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BNPIBZTYV', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7BNRCH922', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5AZLU117B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3PPJ49VJC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FMC8CQO74', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VCHW23CXJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1S4749PCB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0T9DJFZPK', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-L0RMV65W3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FZ1ZZUQNA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6IFJD23DI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZKUEP5XM0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z8KU62CLT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LO77J78X1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-27FBKYRJ4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0TDQTESGW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IMKI89WQ1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ANS65MIJS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O3T2OTT0Y', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MWW6Z1QVM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TT47MX0BB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-59CFKTM14', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DOD61TVZN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RH9Y298WS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X98AATJ7B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C3UMES1W8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8O3J7G3PT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3K4OH78OJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N4A3JMVL1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HK1SRLAC9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y9VLJJXVU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KTTH7R0EC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JKVX01E8T', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HW2VPAHJO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7WZ9UHBH8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JTKFK0CP7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-G3F27NXG5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-K7I2JWYSP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CTUU8UQ0T', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ISHMAP6RQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LVWNZ1LHP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N5CQ1YG2Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XL2XY2SLZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UCRZVFIV1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VLGQFYNEL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YPCDM9O5Y', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R6VA2S25E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4W8X8BBUL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5INDC8M80', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q8RKKOF29', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B5ED3EFBC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8IC5ZXAX1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCGM25D75', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1MO06IRID', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4QWGUGN6W', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5T9M5KEHT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9KG7W7NZF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NYGN5R2CL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H61OXFCJ2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-30WSQ4EFH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-J36NG6MY2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TZU34ZA7A', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LPWNYL74G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DDJTXOS4Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HFOS4S185', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2MLGIFL1M', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CI5AHX0TC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WSXVCH1WN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8F0C3D06T', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3YZR0664F', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1L2VMWTM0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KPMCB57O7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N6QXQDZV3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-35FAYFWDP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TVZWE2JR8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0WK86IKLF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8WBU6ESHJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGU6UNM01', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-86CXNEQZC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NZ4LFCHE3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZKB6D72RF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BKXNG77NS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3MJ332POD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SL9VEYNJ0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LY8KO43Z8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8KGF1XR1L', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WT6FB54HW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7UQ9JMG5E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KX3L2040U', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HL809F9WU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-T9GU40ERH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I5O2NX3G9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RVOUYU3NO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2T3ETNUKS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SW0B93DZZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PQ55UF3K6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VNRWWHHJB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Q26FZ7EP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZWD9FA90J', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QSGMQX3RZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-83NZ9MEAC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q9QQ4AL37', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QBE9KD60Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NRNUXUO44', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8F0XKQ9P8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-095JV1CJN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WRRSIRUTZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DTUD7IDQI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ASCEAI9LD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YOUM7BJZH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PEG2ZH9J3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WAUW31F78', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GIBZ6U7AQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-63TD9LFBG', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0MH1N9MJB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YFP9RNQIK', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SW4N5162D', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-53MWFB2MP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QWLUKBMIN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JHS6QIX9G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6R04HZ5UD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OUJLT31VN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6BMKW933S', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R4GG84E4Q', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-00XAP630X', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AK97MC2A0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NBS2GKGO5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NVFEWK4S5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MTV3WSYS1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JS8RM4JGW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6NL4QR1XN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4BUUQVSN6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-56WEAAU3M', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WCA56PFTF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TK1QAZP0B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SYZ5ADFXP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-S3VLOUW6G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V2K1V1JWJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MZ6VHV5PQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DRZDQDPN3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-231VVRYYA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-06G3MC88G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WS52I2ZVD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QTNQD55U', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EX7ALECU3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DQN8TVQY6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FJT54OQFI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BLTYUF7QR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8DL2JXDSO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KBAOXIJ4Y', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYL28R5UW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NCRDA8LYB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BQYKXQLXU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PSHCS65MR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-90Y1WFVYZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4GG33NUPW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5Y0A79GED', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RMZHTAD6J', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XBSOJ5I36', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AAJW53VNE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9NYSPSEL6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-94WMY337S', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-35Y3BJQFW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R7WY3TMRC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RXAQVH0F3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EFQ2AVFSB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XPOUJSAVD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RSHPF5NTT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z9402336V', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OI36C5WOJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XEOGV1LVS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QIQDXG9NC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9IY242GGT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PQTEUT52E', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-18NKI3WPS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-34U3QAA49', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-S05TYKBBF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B4J8WCWDD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HR377WC28', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PNRR7S1T2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UNR0N0KJ9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NARQQANBE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8PUL1MYUU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KJPCT1FP3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGC80N0AM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Y1JN8DH3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y56M31T07', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NHYHXQSV1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V30RZVG7L', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CR6V2GSOU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VSAF5O0LJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4F4BF2LFH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ERSMQHXNX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q0TFLZQWS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZXCS6SMHD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JHXYAUGRA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IT2XYWES2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-22UCD94OG', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VGDLQ3K35', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O8AFL8RGX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9RBIZ8G9X', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9JIU0SVBV', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PWRBG0GWU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZME1MX12T', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A7LWRKSJP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5XISX0HD4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5T6EXKD3Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-10ILV351B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FDULBZDIY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SFVR6I980', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FKV8DCZGT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ECRK3M3IZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WMKSLOF39', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HGE60O6AL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RGCWDJOT8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DESWK5KVN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RD593HJ92', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XWNCAO39B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AQ4XGDLX8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2ZOVEA1PL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JF4FUX83X', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CQ228GK3B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XCTMU9I7U', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-M3F37T22W', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ASZUXM9M9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CJVHX24WW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KZT4T898V', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RI128R5TE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HCAG6X9MJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XOQENWBP7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1LTQGM497', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZLVZT4O1G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FTIXTXCIA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O2YE6QNHY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0JPDDBF47', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H1FP1IFJI', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FYBPBMY5B', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F7BWDVC97', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MLENB1LQ4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FT9YNU8UP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5W21Q2O5L', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YM61QWPR3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XXFQJJHZM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H52YKCM9X', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NT56ZNZ54', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DRJY7BMB5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-P6886RPXX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PBXW2EY5K', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5VQCJTM36', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NMM3GGM1J', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1JQQ0CDSA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R89YI91K4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7L7L9MXOT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-50Z7MXKZS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EGADRZTIB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1DR7H46H6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O28QZL994', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EYTRWVERM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HAJZAUWJV', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AGYO3AB89', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V224329ZM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4YTMSFWYK', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QP40RL1N1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CB1BVAMAH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9VGXUY02O', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BCPVVKCZ3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VDC3CWZX7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7HWLI21FA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5QWIUJEFM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4C3PBMAIZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QC7DM7PH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A6R5G3FWV', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3A6WDE12Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0F2LZA9RU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGBJXMXRX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5YOGR8PQ1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LPBFBUF3N', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TUSPGR2AY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-G05I8M2FQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H5NDXJIAQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X8FJL8WQZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KIB2FQRUP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VNV0OYWR7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GKBPM3PB2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XVPI30KE7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y3PRMJP51', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KEPP5SBML', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0PDUZ6QEQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1GHWWFLLE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-149S2TO8O', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1ZB6FSIGE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VCRQTXDZL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-645JVC3XL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2HJ00JBSR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7FZDG2W65', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HD6ANE3LN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2HS1B1J8V', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IHOHYMDF5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYZMAFEKF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JO85WX5JE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RURJDCURW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FZC3BLXPJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B0YR2LOZ1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6ZFP58ZRK', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UMZDLHQ78', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8A2IHDXY3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EDYEPWUMT', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X3TM99R12', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DLV0UTQ72', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SFU0ZYIM0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YAJ6A66NH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X8CFU41AU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CJ3Z4WP32', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UJBLRQKXA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-T78R8EBGH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DDIH55GNZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F1SSD4NWF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4BE55FKRD', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BG2DFBL46', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MKVMNR7W4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C3Z4JUGU5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NVP8EEEGQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MQFWP2LU7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BH873JG6H', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3D76651SM', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KZ7V6KWMP', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CD8NEJFJN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PWXE9L30H', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1RT95F5LR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q7CEEROE5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q08APOAEG', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NNKREGLXE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YQEG33MKX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VRD9G7H5K', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-68R86GQ1G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BXZUKQL2M', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QM13UD73C', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I7OOGJLNS', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GXDBO1IQJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JJZPRFMWN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DBTDFITGW', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VYHL6ASIJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F61NNU332', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6Z9H72KMC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WVOONDMA9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CXTSTBXN3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CSMZLC921', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3FTBSARZJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ESHGKBXLY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-E0YLXW5H4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QFI6UMWE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-23VOWHO88', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-02Q9U5QCH', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3POMNSMB0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NTT8OWUFQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MT5XEHJWX', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ILDOY0PCQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MN9HU4DGO', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1YOPU7GLL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZC5BM5MYB', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UD3FXK3I9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LMDJOV52Y', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N45X16BSL', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1CBY3Z7QC', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F0D3AO9E6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YQA8GUOD1', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2EE999233', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D6GD5WT2Y', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7DFBMLTMY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-J6TJKC6VJ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2AWQ3ZRF4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4KOVIF5W3', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3489SXI1U', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F37VKUHVE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GHBMAVCE4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-W64U46547', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1GUJV1MGQ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-M13IOZVI9', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TX7EVZN1Z', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2PTS2ML8J', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VTG83RVX7', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2IOE6BR66', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I68XZMR23', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q940UN6MU', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Y9NFR0N0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MYP341DZ8', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WJ68VGKAZ', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-819MSDR9H', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-27CGND4VG', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YYDOD47BF', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YU7F6J8G5', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HMY16WTCA', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FPWEBRLG2', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FGE79X0DE', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-551LMZ84R', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UWMBDCTX4', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BNHQXW9HY', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WB0P5LCN6', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z4P9E1SVG', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UVW2G9IRN', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OO93WVLB0', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NTRIK82TG', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8TXV42S74', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z74LKL50G', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QQEXNIPTR', false, 1),
|
||||||
|
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WGK2VD34L', false, 1)
|
||||||
|
ON CONFLICT DO NOTHING;
|
||||||
81
backend/migrations/095_proxy_auto_retry.sql
Normal file
81
backend/migrations/095_proxy_auto_retry.sql
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
-- Migration: Auto-retry failed proxies after cooldown period
|
||||||
|
-- Proxies that fail will be retried after a configurable interval
|
||||||
|
|
||||||
|
-- Add last_failed_at column to track when proxy last failed
|
||||||
|
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS last_failed_at TIMESTAMP;
|
||||||
|
|
||||||
|
-- Add retry settings
|
||||||
|
INSERT INTO settings (key, value, description)
|
||||||
|
VALUES
|
||||||
|
('proxy_retry_interval_hours', '4', 'Hours to wait before retrying a failed proxy'),
|
||||||
|
('proxy_max_failures_before_permanent', '10', 'Max failures before proxy is permanently disabled')
|
||||||
|
ON CONFLICT (key) DO NOTHING;
|
||||||
|
|
||||||
|
-- Create function to get eligible proxies (active OR failed but past retry interval)
|
||||||
|
CREATE OR REPLACE FUNCTION get_eligible_proxy_ids()
|
||||||
|
RETURNS TABLE(proxy_id INT) AS $$
|
||||||
|
DECLARE
|
||||||
|
retry_hours INT;
|
||||||
|
BEGIN
|
||||||
|
-- Get retry interval from settings (default 4 hours)
|
||||||
|
SELECT COALESCE(value::int, 4) INTO retry_hours
|
||||||
|
FROM settings WHERE key = 'proxy_retry_interval_hours';
|
||||||
|
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT p.id
|
||||||
|
FROM proxies p
|
||||||
|
WHERE p.active = true
|
||||||
|
OR (
|
||||||
|
p.active = false
|
||||||
|
AND p.last_failed_at IS NOT NULL
|
||||||
|
AND p.last_failed_at < NOW() - (retry_hours || ' hours')::interval
|
||||||
|
AND p.failure_count < 10 -- Don't retry if too many failures
|
||||||
|
)
|
||||||
|
ORDER BY
|
||||||
|
p.active DESC, -- Prefer active proxies
|
||||||
|
p.failure_count ASC, -- Then prefer proxies with fewer failures
|
||||||
|
RANDOM();
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Create scheduled job to periodically re-enable proxies past their retry window
|
||||||
|
-- This runs every hour and marks proxies as active if they're past retry interval
|
||||||
|
CREATE OR REPLACE FUNCTION auto_reenable_proxies()
|
||||||
|
RETURNS INT AS $$
|
||||||
|
DECLARE
|
||||||
|
retry_hours INT;
|
||||||
|
max_failures INT;
|
||||||
|
reenabled_count INT;
|
||||||
|
BEGIN
|
||||||
|
-- Get settings
|
||||||
|
SELECT COALESCE(value::int, 4) INTO retry_hours
|
||||||
|
FROM settings WHERE key = 'proxy_retry_interval_hours';
|
||||||
|
|
||||||
|
SELECT COALESCE(value::int, 10) INTO max_failures
|
||||||
|
FROM settings WHERE key = 'proxy_max_failures_before_permanent';
|
||||||
|
|
||||||
|
-- Re-enable proxies that have cooled down
|
||||||
|
UPDATE proxies
|
||||||
|
SET active = true,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE active = false
|
||||||
|
AND last_failed_at IS NOT NULL
|
||||||
|
AND last_failed_at < NOW() - (retry_hours || ' hours')::interval
|
||||||
|
AND failure_count < max_failures;
|
||||||
|
|
||||||
|
GET DIAGNOSTICS reenabled_count = ROW_COUNT;
|
||||||
|
|
||||||
|
IF reenabled_count > 0 THEN
|
||||||
|
RAISE NOTICE 'Auto-reenabled % proxies after % hour cooldown', reenabled_count, retry_hours;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN reenabled_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Add index for efficient querying
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_proxies_retry
|
||||||
|
ON proxies(active, last_failed_at, failure_count);
|
||||||
|
|
||||||
|
COMMENT ON COLUMN proxies.last_failed_at IS 'Timestamp of last failure - used for auto-retry logic';
|
||||||
|
COMMENT ON FUNCTION auto_reenable_proxies() IS 'Call periodically to re-enable failed proxies that have cooled down';
|
||||||
20
backend/migrations/096_product_search_indexes.sql
Normal file
20
backend/migrations/096_product_search_indexes.sql
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
-- Migration: Add trigram indexes for fast ILIKE product searches
|
||||||
|
-- Enables fast searches on name_raw, brand_name_raw, and description
|
||||||
|
|
||||||
|
-- Enable pg_trgm extension if not already enabled
|
||||||
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||||
|
|
||||||
|
-- Create GIN trigram indexes for fast ILIKE searches
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_name_trgm
|
||||||
|
ON store_products USING gin (name_raw gin_trgm_ops);
|
||||||
|
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_brand_name_trgm
|
||||||
|
ON store_products USING gin (brand_name_raw gin_trgm_ops);
|
||||||
|
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_description_trgm
|
||||||
|
ON store_products USING gin (description gin_trgm_ops);
|
||||||
|
|
||||||
|
-- Add comment
|
||||||
|
COMMENT ON INDEX idx_store_products_name_trgm IS 'Trigram index for fast ILIKE searches on product name';
|
||||||
|
COMMENT ON INDEX idx_store_products_brand_name_trgm IS 'Trigram index for fast ILIKE searches on brand name';
|
||||||
|
COMMENT ON INDEX idx_store_products_description_trgm IS 'Trigram index for fast ILIKE searches on description';
|
||||||
11
backend/migrations/097_worker_tasks_worker_id_index.sql
Normal file
11
backend/migrations/097_worker_tasks_worker_id_index.sql
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
-- Migration: Add indexes for dashboard performance
|
||||||
|
-- Speeds up the tasks listing query with ORDER BY and JOIN
|
||||||
|
|
||||||
|
-- Index for JOIN with worker_registry
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_worker_tasks_worker_id
|
||||||
|
ON worker_tasks(worker_id)
|
||||||
|
WHERE worker_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for ORDER BY created_at DESC (dashboard listing)
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_worker_tasks_created_at_desc
|
||||||
|
ON worker_tasks(created_at DESC);
|
||||||
13
backend/migrations/098_dispensary_stage_tracking.sql
Normal file
13
backend/migrations/098_dispensary_stage_tracking.sql
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
-- Migration: Add stage tracking columns to dispensaries table
|
||||||
|
-- Required for stage checkpoint feature in task handlers
|
||||||
|
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_successes INTEGER DEFAULT 0;
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_failures INTEGER DEFAULT 0;
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_successful_crawl_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
-- Indexes for finding stores by status
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_consecutive_successes
|
||||||
|
ON dispensaries(consecutive_successes) WHERE consecutive_successes > 0;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_consecutive_failures
|
||||||
|
ON dispensaries(consecutive_failures) WHERE consecutive_failures > 0;
|
||||||
68
backend/migrations/099_working_hours.sql
Normal file
68
backend/migrations/099_working_hours.sql
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
-- Migration: 099_working_hours.sql
|
||||||
|
-- Description: Working hours profiles for natural traffic pattern simulation
|
||||||
|
-- Created: 2024-12-13
|
||||||
|
|
||||||
|
-- Working hours table: defines hourly activity weights to mimic natural traffic
|
||||||
|
CREATE TABLE IF NOT EXISTS working_hours (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(50) UNIQUE NOT NULL,
|
||||||
|
description TEXT,
|
||||||
|
|
||||||
|
-- Hour weights: {"0": 15, "1": 5, ..., "18": 100, ...}
|
||||||
|
-- Value = percent chance to trigger activity that hour (0-100)
|
||||||
|
hour_weights JSONB NOT NULL,
|
||||||
|
|
||||||
|
-- Day-of-week multipliers (0=Sunday, 6=Saturday)
|
||||||
|
-- Optional adjustment for weekend vs weekday patterns
|
||||||
|
dow_weights JSONB DEFAULT '{"0": 90, "1": 100, "2": 100, "3": 100, "4": 100, "5": 110, "6": 95}',
|
||||||
|
|
||||||
|
timezone VARCHAR(50) DEFAULT 'America/Phoenix',
|
||||||
|
enabled BOOLEAN DEFAULT true,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Seed: Natural traffic pattern based on internet usage research
|
||||||
|
-- Optimized for cannabis dispensary browsing (lunch + after-work peaks)
|
||||||
|
INSERT INTO working_hours (name, description, timezone, hour_weights) VALUES (
|
||||||
|
'natural_traffic',
|
||||||
|
'Mimics natural user browsing patterns - peaks at lunch and 5-7 PM',
|
||||||
|
'America/Phoenix',
|
||||||
|
'{
|
||||||
|
"0": 15,
|
||||||
|
"1": 5,
|
||||||
|
"2": 5,
|
||||||
|
"3": 5,
|
||||||
|
"4": 5,
|
||||||
|
"5": 10,
|
||||||
|
"6": 20,
|
||||||
|
"7": 30,
|
||||||
|
"8": 35,
|
||||||
|
"9": 45,
|
||||||
|
"10": 50,
|
||||||
|
"11": 60,
|
||||||
|
"12": 75,
|
||||||
|
"13": 65,
|
||||||
|
"14": 60,
|
||||||
|
"15": 70,
|
||||||
|
"16": 80,
|
||||||
|
"17": 95,
|
||||||
|
"18": 100,
|
||||||
|
"19": 100,
|
||||||
|
"20": 90,
|
||||||
|
"21": 70,
|
||||||
|
"22": 45,
|
||||||
|
"23": 25
|
||||||
|
}'::jsonb
|
||||||
|
) ON CONFLICT (name) DO UPDATE SET
|
||||||
|
hour_weights = EXCLUDED.hour_weights,
|
||||||
|
description = EXCLUDED.description,
|
||||||
|
updated_at = NOW();
|
||||||
|
|
||||||
|
-- Index for quick lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_working_hours_name ON working_hours(name);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_working_hours_enabled ON working_hours(enabled);
|
||||||
|
|
||||||
|
COMMENT ON TABLE working_hours IS 'Activity profiles for natural traffic simulation. Hour weights are percent chance (0-100) to trigger activity.';
|
||||||
|
COMMENT ON COLUMN working_hours.hour_weights IS 'JSON object mapping hour (0-23) to percent chance (0-100). 100 = always run, 0 = never run.';
|
||||||
|
COMMENT ON COLUMN working_hours.dow_weights IS 'Optional day-of-week multipliers. 0=Sunday. Applied as (hour_weight * dow_weight / 100).';
|
||||||
19
backend/migrations/100_worker_timezone.sql
Normal file
19
backend/migrations/100_worker_timezone.sql
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
-- Migration: 100_worker_timezone.sql
|
||||||
|
-- Description: Add timezone column to worker_registry for working hours support
|
||||||
|
-- Created: 2024-12-13
|
||||||
|
|
||||||
|
-- Add timezone column to worker_registry
|
||||||
|
-- Populated from preflight IP geolocation (e.g., 'America/New_York')
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add working_hours_id to link worker to a specific working hours profile
|
||||||
|
-- NULL means use default 'natural_traffic' profile
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS working_hours_id INTEGER REFERENCES working_hours(id);
|
||||||
|
|
||||||
|
-- Index for workers by timezone (useful for capacity planning)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_timezone ON worker_registry(timezone);
|
||||||
|
|
||||||
|
COMMENT ON COLUMN worker_registry.timezone IS 'IANA timezone from preflight IP geolocation (e.g., America/New_York)';
|
||||||
|
COMMENT ON COLUMN worker_registry.working_hours_id IS 'Reference to working_hours profile. NULL uses default natural_traffic.';
|
||||||
78
backend/migrations/101_worker_preflight_timezone.sql
Normal file
78
backend/migrations/101_worker_preflight_timezone.sql
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
-- Migration: 101_worker_preflight_timezone.sql
|
||||||
|
-- Description: Update update_worker_preflight to extract timezone from fingerprint
|
||||||
|
-- Created: 2024-12-13
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION public.update_worker_preflight(
|
||||||
|
p_worker_id character varying,
|
||||||
|
p_transport character varying,
|
||||||
|
p_status character varying,
|
||||||
|
p_ip character varying DEFAULT NULL,
|
||||||
|
p_response_ms integer DEFAULT NULL,
|
||||||
|
p_error text DEFAULT NULL,
|
||||||
|
p_fingerprint jsonb DEFAULT NULL
|
||||||
|
)
|
||||||
|
RETURNS void
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
AS $function$
|
||||||
|
DECLARE
|
||||||
|
v_curl_status VARCHAR(20);
|
||||||
|
v_http_status VARCHAR(20);
|
||||||
|
v_overall_status VARCHAR(20);
|
||||||
|
v_timezone VARCHAR(50);
|
||||||
|
BEGIN
|
||||||
|
IF p_transport = 'curl' THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_curl_status = p_status,
|
||||||
|
preflight_curl_at = NOW(),
|
||||||
|
preflight_curl_ms = p_response_ms,
|
||||||
|
preflight_curl_error = p_error,
|
||||||
|
curl_ip = p_ip,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
ELSIF p_transport = 'http' THEN
|
||||||
|
-- Extract timezone from fingerprint JSON if present
|
||||||
|
v_timezone := p_fingerprint->>'detectedTimezone';
|
||||||
|
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_http_status = p_status,
|
||||||
|
preflight_http_at = NOW(),
|
||||||
|
preflight_http_ms = p_response_ms,
|
||||||
|
preflight_http_error = p_error,
|
||||||
|
http_ip = p_ip,
|
||||||
|
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
|
||||||
|
-- Save extracted timezone
|
||||||
|
timezone = COALESCE(v_timezone, timezone),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Update overall preflight status
|
||||||
|
SELECT preflight_curl_status, preflight_http_status
|
||||||
|
INTO v_curl_status, v_http_status
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- Compute overall status
|
||||||
|
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
|
||||||
|
v_overall_status := 'passed';
|
||||||
|
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
|
||||||
|
v_overall_status := 'partial';
|
||||||
|
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
|
||||||
|
v_overall_status := 'failed';
|
||||||
|
ELSE
|
||||||
|
v_overall_status := 'pending';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
preflight_status = v_overall_status,
|
||||||
|
preflight_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END;
|
||||||
|
$function$;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION update_worker_preflight(varchar, varchar, varchar, varchar, integer, text, jsonb)
|
||||||
|
IS 'Updates worker preflight status and extracts timezone from fingerprint for working hours';
|
||||||
114
backend/migrations/102_check_working_hours.sql
Normal file
114
backend/migrations/102_check_working_hours.sql
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
-- Migration: 102_check_working_hours.sql
|
||||||
|
-- Description: Function to check if worker should be available based on working hours
|
||||||
|
-- Created: 2024-12-13
|
||||||
|
|
||||||
|
-- Function to check if a worker should be available for work
|
||||||
|
-- Returns TRUE if worker passes the probability check for current hour
|
||||||
|
-- Returns FALSE if worker should sleep/skip this cycle
|
||||||
|
CREATE OR REPLACE FUNCTION check_working_hours(
|
||||||
|
p_worker_id VARCHAR,
|
||||||
|
p_profile_name VARCHAR DEFAULT 'natural_traffic'
|
||||||
|
)
|
||||||
|
RETURNS TABLE (
|
||||||
|
is_available BOOLEAN,
|
||||||
|
current_hour INTEGER,
|
||||||
|
hour_weight INTEGER,
|
||||||
|
worker_timezone VARCHAR,
|
||||||
|
roll INTEGER,
|
||||||
|
reason TEXT
|
||||||
|
)
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
AS $function$
|
||||||
|
DECLARE
|
||||||
|
v_timezone VARCHAR(50);
|
||||||
|
v_hour INTEGER;
|
||||||
|
v_weight INTEGER;
|
||||||
|
v_dow INTEGER;
|
||||||
|
v_dow_weight INTEGER;
|
||||||
|
v_final_weight INTEGER;
|
||||||
|
v_roll INTEGER;
|
||||||
|
v_hour_weights JSONB;
|
||||||
|
v_dow_weights JSONB;
|
||||||
|
v_profile_enabled BOOLEAN;
|
||||||
|
BEGIN
|
||||||
|
-- Get worker's timezone (from preflight)
|
||||||
|
SELECT wr.timezone INTO v_timezone
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- Default to America/Phoenix if no timezone set
|
||||||
|
v_timezone := COALESCE(v_timezone, 'America/Phoenix');
|
||||||
|
|
||||||
|
-- Get current hour in worker's timezone
|
||||||
|
v_hour := EXTRACT(HOUR FROM NOW() AT TIME ZONE v_timezone)::INTEGER;
|
||||||
|
|
||||||
|
-- Get day of week (0=Sunday)
|
||||||
|
v_dow := EXTRACT(DOW FROM NOW() AT TIME ZONE v_timezone)::INTEGER;
|
||||||
|
|
||||||
|
-- Get working hours profile
|
||||||
|
SELECT wh.hour_weights, wh.dow_weights, wh.enabled
|
||||||
|
INTO v_hour_weights, v_dow_weights, v_profile_enabled
|
||||||
|
FROM working_hours wh
|
||||||
|
WHERE wh.name = p_profile_name AND wh.enabled = true;
|
||||||
|
|
||||||
|
-- If profile not found or disabled, always available
|
||||||
|
IF v_hour_weights IS NULL THEN
|
||||||
|
RETURN QUERY SELECT
|
||||||
|
TRUE::BOOLEAN,
|
||||||
|
v_hour,
|
||||||
|
100::INTEGER,
|
||||||
|
v_timezone,
|
||||||
|
0::INTEGER,
|
||||||
|
'Profile not found or disabled - defaulting to available'::TEXT;
|
||||||
|
RETURN;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Get hour weight (default to 50 if hour not specified)
|
||||||
|
v_weight := COALESCE((v_hour_weights->>v_hour::TEXT)::INTEGER, 50);
|
||||||
|
|
||||||
|
-- Get day-of-week weight (default to 100)
|
||||||
|
v_dow_weight := COALESCE((v_dow_weights->>v_dow::TEXT)::INTEGER, 100);
|
||||||
|
|
||||||
|
-- Calculate final weight (hour_weight * dow_weight / 100)
|
||||||
|
v_final_weight := (v_weight * v_dow_weight / 100);
|
||||||
|
|
||||||
|
-- Roll the dice (0-99)
|
||||||
|
v_roll := floor(random() * 100)::INTEGER;
|
||||||
|
|
||||||
|
-- Return result
|
||||||
|
RETURN QUERY SELECT
|
||||||
|
(v_roll < v_final_weight)::BOOLEAN AS is_available,
|
||||||
|
v_hour AS current_hour,
|
||||||
|
v_final_weight AS hour_weight,
|
||||||
|
v_timezone AS worker_timezone,
|
||||||
|
v_roll AS roll,
|
||||||
|
CASE
|
||||||
|
WHEN v_roll < v_final_weight THEN
|
||||||
|
format('Available: rolled %s < %s%% threshold', v_roll, v_final_weight)
|
||||||
|
ELSE
|
||||||
|
format('Sleeping: rolled %s >= %s%% threshold', v_roll, v_final_weight)
|
||||||
|
END AS reason;
|
||||||
|
END;
|
||||||
|
$function$;
|
||||||
|
|
||||||
|
-- Simplified version that just returns boolean
|
||||||
|
CREATE OR REPLACE FUNCTION is_worker_available(
|
||||||
|
p_worker_id VARCHAR,
|
||||||
|
p_profile_name VARCHAR DEFAULT 'natural_traffic'
|
||||||
|
)
|
||||||
|
RETURNS BOOLEAN
|
||||||
|
LANGUAGE plpgsql
|
||||||
|
AS $function$
|
||||||
|
DECLARE
|
||||||
|
v_result BOOLEAN;
|
||||||
|
BEGIN
|
||||||
|
SELECT is_available INTO v_result
|
||||||
|
FROM check_working_hours(p_worker_id, p_profile_name);
|
||||||
|
RETURN COALESCE(v_result, TRUE);
|
||||||
|
END;
|
||||||
|
$function$;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION check_working_hours(VARCHAR, VARCHAR) IS
|
||||||
|
'Check if worker should be available based on working hours profile. Returns detailed info.';
|
||||||
|
COMMENT ON FUNCTION is_worker_available(VARCHAR, VARCHAR) IS
|
||||||
|
'Simple boolean check if worker passes working hours probability roll.';
|
||||||
12
backend/migrations/103_schedule_dispensary_id.sql
Normal file
12
backend/migrations/103_schedule_dispensary_id.sql
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
-- Migration: 103_schedule_dispensary_id.sql
|
||||||
|
-- Description: Add dispensary_id to task_schedules for per-store schedules
|
||||||
|
-- Created: 2025-12-13
|
||||||
|
|
||||||
|
-- Add dispensary_id column for single-store schedules
|
||||||
|
ALTER TABLE task_schedules
|
||||||
|
ADD COLUMN IF NOT EXISTS dispensary_id INTEGER REFERENCES dispensaries(id);
|
||||||
|
|
||||||
|
-- Index for quick lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_schedules_dispensary_id ON task_schedules(dispensary_id);
|
||||||
|
|
||||||
|
COMMENT ON COLUMN task_schedules.dispensary_id IS 'For single-store schedules. If set, only this store is refreshed. If NULL, uses state_code for all stores in state.';
|
||||||
25
backend/migrations/104_task_source_tracking.sql
Normal file
25
backend/migrations/104_task_source_tracking.sql
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
-- Migration 104: Add source tracking to worker_tasks
|
||||||
|
-- Purpose: Track WHERE tasks are created from (schedule vs API endpoint)
|
||||||
|
--
|
||||||
|
-- All automated task creation should be visible in task_schedules.
|
||||||
|
-- This column helps identify "phantom" tasks created outside the schedule system.
|
||||||
|
|
||||||
|
-- Add source column to worker_tasks
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS source VARCHAR(100);
|
||||||
|
|
||||||
|
-- Add source_id column (references schedule_id if from a schedule)
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS source_schedule_id INTEGER REFERENCES task_schedules(id);
|
||||||
|
|
||||||
|
-- Add request metadata (IP, user agent) for debugging
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS source_metadata JSONB;
|
||||||
|
|
||||||
|
-- Create index for querying by source
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_source ON worker_tasks(source);
|
||||||
|
|
||||||
|
-- Comment explaining source values
|
||||||
|
COMMENT ON COLUMN worker_tasks.source IS 'Task creation source: schedule, api_run_now, api_crawl_state, api_batch_staggered, api_batch_az_stores, task_chain, manual';
|
||||||
|
COMMENT ON COLUMN worker_tasks.source_schedule_id IS 'ID of the schedule that created this task (if source=schedule or source=api_run_now)';
|
||||||
|
COMMENT ON COLUMN worker_tasks.source_metadata IS 'Request metadata: {ip, user_agent, endpoint, timestamp}';
|
||||||
25
backend/migrations/105_dashboard_performance_indexes.sql
Normal file
25
backend/migrations/105_dashboard_performance_indexes.sql
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
-- Migration 105: Add indexes for dashboard performance
|
||||||
|
-- Purpose: Speed up the /dashboard and /national/summary endpoints
|
||||||
|
--
|
||||||
|
-- These queries were identified as slow:
|
||||||
|
-- 1. COUNT(*) FROM store_product_snapshots WHERE captured_at >= NOW() - INTERVAL '24 hours'
|
||||||
|
-- 2. National summary aggregate queries
|
||||||
|
|
||||||
|
-- Index for snapshot counts by time (used in dashboard)
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_product_snapshots_captured_at
|
||||||
|
ON store_product_snapshots(captured_at DESC);
|
||||||
|
|
||||||
|
-- Index for crawl traces by time and success (used in dashboard)
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_crawl_traces_started_success
|
||||||
|
ON crawl_orchestration_traces(started_at DESC, success);
|
||||||
|
|
||||||
|
-- Partial index for recent failed crawls (faster for dashboard alerts)
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_crawl_traces_recent_failures
|
||||||
|
ON crawl_orchestration_traces(started_at DESC)
|
||||||
|
WHERE success = false;
|
||||||
|
|
||||||
|
-- Composite index for store_products aggregations by dispensary
|
||||||
|
-- Helps with national summary state metrics query
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_dispensary_brand
|
||||||
|
ON store_products(dispensary_id, brand_name_raw)
|
||||||
|
WHERE brand_name_raw IS NOT NULL;
|
||||||
10
backend/migrations/106_rename_store_discovery_schedule.sql
Normal file
10
backend/migrations/106_rename_store_discovery_schedule.sql
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
-- Migration: 106_rename_store_discovery_schedule.sql
|
||||||
|
-- Description: Rename store_discovery_dutchie to 'Store Discovery'
|
||||||
|
-- Created: 2025-12-13
|
||||||
|
|
||||||
|
-- Update the schedule name for better display
|
||||||
|
-- The platform='dutchie' field is preserved for badge display in UI
|
||||||
|
UPDATE task_schedules
|
||||||
|
SET name = 'Store Discovery',
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE name = 'store_discovery_dutchie';
|
||||||
23
backend/migrations/107_proxy_tracking.sql
Normal file
23
backend/migrations/107_proxy_tracking.sql
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
-- Migration: 107_proxy_tracking.sql
|
||||||
|
-- Description: Add proxy tracking columns to worker_tasks for geo-targeting visibility
|
||||||
|
-- Created: 2025-12-13
|
||||||
|
|
||||||
|
-- Add proxy tracking columns to worker_tasks
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
|
||||||
|
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
|
||||||
|
|
||||||
|
ALTER TABLE worker_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_source VARCHAR(10);
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'IP address of proxy used for this task';
|
||||||
|
COMMENT ON COLUMN worker_tasks.proxy_geo IS 'Geo target used (e.g., "arizona", "phoenix, arizona")';
|
||||||
|
COMMENT ON COLUMN worker_tasks.proxy_source IS 'Source of proxy: "api" (Evomi dynamic) or "static" (fallback table)';
|
||||||
|
|
||||||
|
-- Index for proxy analysis
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip
|
||||||
|
ON worker_tasks(proxy_ip)
|
||||||
|
WHERE proxy_ip IS NOT NULL;
|
||||||
231
backend/migrations/108_worker_geo_sessions.sql
Normal file
231
backend/migrations/108_worker_geo_sessions.sql
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
-- Migration: 108_worker_geo_sessions.sql
|
||||||
|
-- Description: Add geo session tracking to worker_registry for state-based task assignment
|
||||||
|
-- Created: 2025-12-13
|
||||||
|
|
||||||
|
-- Worker geo session columns
|
||||||
|
-- Worker qualifies with a geo (state/city), then only claims tasks matching that geo
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS current_state VARCHAR(2);
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS current_city VARCHAR(100);
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS geo_session_started_at TIMESTAMPTZ;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS session_task_count INT DEFAULT 0;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS session_max_tasks INT DEFAULT 7;
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON COLUMN worker_registry.current_state IS 'Worker''s current geo assignment (US state code, e.g., AZ)';
|
||||||
|
COMMENT ON COLUMN worker_registry.current_city IS 'Worker''s current city assignment (optional, e.g., phoenix)';
|
||||||
|
COMMENT ON COLUMN worker_registry.geo_session_started_at IS 'When worker''s current geo session started';
|
||||||
|
COMMENT ON COLUMN worker_registry.session_task_count IS 'Number of tasks completed in current geo session';
|
||||||
|
COMMENT ON COLUMN worker_registry.session_max_tasks IS 'Max tasks per geo session before re-qualification (default 7)';
|
||||||
|
COMMENT ON COLUMN worker_registry.proxy_geo IS 'Geo target string used for proxy (e.g., "arizona" or "phoenix, arizona")';
|
||||||
|
|
||||||
|
-- Index for finding workers by state
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_current_state
|
||||||
|
ON worker_registry(current_state)
|
||||||
|
WHERE current_state IS NOT NULL;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- UPDATED claim_task FUNCTION
|
||||||
|
-- Now filters by worker's geo session state
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION claim_task(
|
||||||
|
p_role VARCHAR(50),
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||||
|
p_http_passed BOOLEAN DEFAULT FALSE
|
||||||
|
) RETURNS worker_tasks AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_task worker_tasks;
|
||||||
|
worker_state VARCHAR(2);
|
||||||
|
session_valid BOOLEAN;
|
||||||
|
session_tasks INT;
|
||||||
|
max_tasks INT;
|
||||||
|
BEGIN
|
||||||
|
-- Get worker's current geo session info
|
||||||
|
SELECT
|
||||||
|
current_state,
|
||||||
|
session_task_count,
|
||||||
|
session_max_tasks,
|
||||||
|
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
|
||||||
|
INTO worker_state, session_tasks, max_tasks, session_valid
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- If no valid geo session, or session exhausted, worker can't claim tasks
|
||||||
|
-- Worker must re-qualify first
|
||||||
|
IF worker_state IS NULL OR NOT session_valid OR session_tasks >= COALESCE(max_tasks, 7) THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Claim task matching worker's state
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT wt.id FROM worker_tasks wt
|
||||||
|
JOIN dispensaries d ON wt.dispensary_id = d.id
|
||||||
|
WHERE wt.role = p_role
|
||||||
|
AND wt.status = 'pending'
|
||||||
|
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
|
||||||
|
-- GEO FILTER: Task's dispensary must match worker's state
|
||||||
|
AND d.state = worker_state
|
||||||
|
-- Method compatibility: worker must have passed the required preflight
|
||||||
|
AND (
|
||||||
|
wt.method IS NULL -- No preference, any worker can claim
|
||||||
|
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
|
||||||
|
OR (wt.method = 'http' AND p_http_passed = TRUE)
|
||||||
|
)
|
||||||
|
-- Exclude stores that already have an active task
|
||||||
|
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
|
||||||
|
SELECT dispensary_id FROM worker_tasks
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND dispensary_id IS NOT NULL
|
||||||
|
))
|
||||||
|
ORDER BY wt.priority DESC, wt.created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_task;
|
||||||
|
|
||||||
|
-- If task claimed, increment session task count
|
||||||
|
-- Note: Use claimed_task.id IS NOT NULL (not claimed_task IS NOT NULL)
|
||||||
|
-- PostgreSQL composite type NULL check quirk
|
||||||
|
IF claimed_task.id IS NOT NULL THEN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET session_task_count = session_task_count + 1
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN claimed_task;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: assign_worker_geo
|
||||||
|
-- Assigns a geo session to a worker based on demand
|
||||||
|
-- Returns the assigned state, or NULL if no tasks available
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION assign_worker_geo(
|
||||||
|
p_worker_id VARCHAR(100)
|
||||||
|
) RETURNS VARCHAR(2) AS $$
|
||||||
|
DECLARE
|
||||||
|
assigned_state VARCHAR(2);
|
||||||
|
BEGIN
|
||||||
|
-- Find state with highest demand (pending tasks) and lowest coverage (workers)
|
||||||
|
SELECT d.state INTO assigned_state
|
||||||
|
FROM dispensaries d
|
||||||
|
JOIN worker_tasks wt ON wt.dispensary_id = d.id
|
||||||
|
LEFT JOIN worker_registry wr ON wr.current_state = d.state
|
||||||
|
AND wr.status = 'active'
|
||||||
|
AND wr.geo_session_started_at > NOW() - INTERVAL '60 minutes'
|
||||||
|
WHERE wt.status = 'pending'
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
GROUP BY d.state
|
||||||
|
ORDER BY
|
||||||
|
COUNT(wt.id) DESC, -- Most pending tasks first
|
||||||
|
COUNT(DISTINCT wr.worker_id) ASC -- Fewest workers second
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
-- If no pending tasks anywhere, return NULL
|
||||||
|
IF assigned_state IS NULL THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Assign the state to this worker
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
current_state = assigned_state,
|
||||||
|
current_city = NULL, -- City assigned later if available
|
||||||
|
geo_session_started_at = NOW(),
|
||||||
|
session_task_count = 0
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
RETURN assigned_state;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: check_worker_geo_session
|
||||||
|
-- Returns info about worker's current geo session
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION check_worker_geo_session(
|
||||||
|
p_worker_id VARCHAR(100)
|
||||||
|
) RETURNS TABLE (
|
||||||
|
current_state VARCHAR(2),
|
||||||
|
current_city VARCHAR(100),
|
||||||
|
session_valid BOOLEAN,
|
||||||
|
session_tasks_remaining INT,
|
||||||
|
session_minutes_remaining INT
|
||||||
|
) AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
wr.current_state,
|
||||||
|
wr.current_city,
|
||||||
|
(wr.geo_session_started_at IS NOT NULL AND wr.geo_session_started_at > NOW() - INTERVAL '60 minutes') as session_valid,
|
||||||
|
GREATEST(0, wr.session_max_tasks - wr.session_task_count) as session_tasks_remaining,
|
||||||
|
GREATEST(0, EXTRACT(EPOCH FROM (wr.geo_session_started_at + INTERVAL '60 minutes' - NOW())) / 60)::INT as session_minutes_remaining
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.worker_id = p_worker_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- View for worker thinness per state
|
||||||
|
-- Derives states from dispensaries table - no external states table dependency
|
||||||
|
CREATE OR REPLACE VIEW worker_state_capacity AS
|
||||||
|
WITH active_states AS (
|
||||||
|
-- Get unique states from dispensaries with valid platform IDs
|
||||||
|
SELECT DISTINCT state as code
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE state IS NOT NULL
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
|
),
|
||||||
|
pending_by_state AS (
|
||||||
|
SELECT d.state, COUNT(*) as count
|
||||||
|
FROM worker_tasks t
|
||||||
|
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||||
|
WHERE t.status = 'pending'
|
||||||
|
AND d.state IS NOT NULL
|
||||||
|
GROUP BY d.state
|
||||||
|
),
|
||||||
|
workers_by_state AS (
|
||||||
|
SELECT
|
||||||
|
current_state,
|
||||||
|
COUNT(*) as count,
|
||||||
|
SUM(GREATEST(0, session_max_tasks - session_task_count)) as remaining_capacity
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE status IN ('active', 'idle') -- Include both active and idle workers
|
||||||
|
AND preflight_http_status = 'passed'
|
||||||
|
AND current_state IS NOT NULL
|
||||||
|
AND geo_session_started_at > NOW() - INTERVAL '60 minutes'
|
||||||
|
GROUP BY current_state
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
s.code as state,
|
||||||
|
s.code as state_name, -- Use code as name since we don't have a states lookup table
|
||||||
|
COALESCE(p.count, 0) as pending_tasks,
|
||||||
|
COALESCE(w.count, 0) as workers_on_state,
|
||||||
|
COALESCE(w.remaining_capacity, 0) as remaining_capacity,
|
||||||
|
CASE
|
||||||
|
WHEN COALESCE(w.remaining_capacity, 0) = 0 AND COALESCE(p.count, 0) > 0 THEN 'no_coverage'
|
||||||
|
WHEN COALESCE(w.remaining_capacity, 0) < COALESCE(p.count, 0) THEN 'thin'
|
||||||
|
ELSE 'ok'
|
||||||
|
END as status
|
||||||
|
FROM active_states s
|
||||||
|
LEFT JOIN pending_by_state p ON p.state = s.code
|
||||||
|
LEFT JOIN workers_by_state w ON w.current_state = s.code
|
||||||
|
ORDER BY COALESCE(p.count, 0) DESC;
|
||||||
354
backend/migrations/109_worker_identity_pool.sql
Normal file
354
backend/migrations/109_worker_identity_pool.sql
Normal file
@@ -0,0 +1,354 @@
|
|||||||
|
-- Migration: 109_worker_identity_pool.sql
|
||||||
|
-- Description: Identity pool for diverse IP/fingerprint rotation
|
||||||
|
-- Created: 2025-12-14
|
||||||
|
--
|
||||||
|
-- Workers claim identities (IP + fingerprint) from pool.
|
||||||
|
-- Each identity used for 3-5 tasks, then cools down 2-3 hours.
|
||||||
|
-- This creates natural browsing patterns - same person doesn't hit 20 stores.
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- IDENTITY POOL TABLE
|
||||||
|
-- ============================================================
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_identities (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Evomi session controls the IP
|
||||||
|
session_id VARCHAR(100) UNIQUE NOT NULL,
|
||||||
|
|
||||||
|
-- Detected IP from this session
|
||||||
|
ip_address INET,
|
||||||
|
|
||||||
|
-- Geo targeting
|
||||||
|
state_code VARCHAR(2) NOT NULL,
|
||||||
|
city VARCHAR(100), -- City-level targeting for diversity
|
||||||
|
|
||||||
|
-- Fingerprint data (UA, timezone, locale, device, etc.)
|
||||||
|
fingerprint JSONB NOT NULL,
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_used_at TIMESTAMPTZ,
|
||||||
|
cooldown_until TIMESTAMPTZ, -- Can't reuse until this time
|
||||||
|
|
||||||
|
-- Usage stats
|
||||||
|
total_tasks_completed INT DEFAULT 0,
|
||||||
|
total_sessions INT DEFAULT 1, -- How many times this identity has been used
|
||||||
|
|
||||||
|
-- Current state
|
||||||
|
is_active BOOLEAN DEFAULT FALSE, -- Currently claimed by a worker
|
||||||
|
active_worker_id VARCHAR(100), -- Which worker has it
|
||||||
|
|
||||||
|
-- Health tracking
|
||||||
|
consecutive_failures INT DEFAULT 0,
|
||||||
|
is_healthy BOOLEAN DEFAULT TRUE -- Set false if IP gets blocked
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for efficient lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_identities_state_city
|
||||||
|
ON worker_identities(state_code, city);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_identities_available
|
||||||
|
ON worker_identities(state_code, is_active, cooldown_until)
|
||||||
|
WHERE is_healthy = TRUE;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_identities_cooldown
|
||||||
|
ON worker_identities(cooldown_until)
|
||||||
|
WHERE is_healthy = TRUE AND is_active = FALSE;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- METRO AREA MAPPING
|
||||||
|
-- For fallback when exact city not available
|
||||||
|
-- ============================================================
|
||||||
|
CREATE TABLE IF NOT EXISTS metro_areas (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
metro_name VARCHAR(100) NOT NULL,
|
||||||
|
state_code VARCHAR(2) NOT NULL,
|
||||||
|
city VARCHAR(100) NOT NULL,
|
||||||
|
is_primary BOOLEAN DEFAULT FALSE, -- Primary city of the metro
|
||||||
|
UNIQUE(state_code, city)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Phoenix Metro Area
|
||||||
|
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||||
|
('Phoenix Metro', 'AZ', 'Phoenix', TRUE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Mesa', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Glendale', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Tempe', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Scottsdale', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Chandler', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Peoria', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'El Mirage', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Tolleson', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Sun City', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Apache Junction', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Cave Creek', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Gilbert', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Surprise', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Avondale', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Goodyear', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Buckeye', FALSE),
|
||||||
|
('Phoenix Metro', 'AZ', 'Queen Creek', FALSE)
|
||||||
|
ON CONFLICT (state_code, city) DO NOTHING;
|
||||||
|
|
||||||
|
-- Tucson Metro Area
|
||||||
|
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||||
|
('Tucson Metro', 'AZ', 'Tucson', TRUE),
|
||||||
|
('Tucson Metro', 'AZ', 'Oro Valley', FALSE),
|
||||||
|
('Tucson Metro', 'AZ', 'Marana', FALSE),
|
||||||
|
('Tucson Metro', 'AZ', 'Sahuarita', FALSE),
|
||||||
|
('Tucson Metro', 'AZ', 'South Tucson', FALSE)
|
||||||
|
ON CONFLICT (state_code, city) DO NOTHING;
|
||||||
|
|
||||||
|
-- Flagstaff Area
|
||||||
|
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||||
|
('Flagstaff Area', 'AZ', 'Flagstaff', TRUE),
|
||||||
|
('Flagstaff Area', 'AZ', 'Sedona', FALSE)
|
||||||
|
ON CONFLICT (state_code, city) DO NOTHING;
|
||||||
|
|
||||||
|
-- Prescott Area
|
||||||
|
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||||
|
('Prescott Area', 'AZ', 'Prescott', TRUE),
|
||||||
|
('Prescott Area', 'AZ', 'Prescott Valley', FALSE)
|
||||||
|
ON CONFLICT (state_code, city) DO NOTHING;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: claim_identity
|
||||||
|
-- Claims an available identity for a worker
|
||||||
|
-- Tries: exact city -> metro area -> any in state -> create new
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION claim_identity(
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_state_code VARCHAR(2),
|
||||||
|
p_city VARCHAR(100) DEFAULT NULL
|
||||||
|
) RETURNS worker_identities AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_identity worker_identities;
|
||||||
|
metro_name_val VARCHAR(100);
|
||||||
|
primary_city VARCHAR(100);
|
||||||
|
BEGIN
|
||||||
|
-- 1. Try exact city match (if city provided)
|
||||||
|
IF p_city IS NOT NULL THEN
|
||||||
|
UPDATE worker_identities
|
||||||
|
SET is_active = TRUE,
|
||||||
|
active_worker_id = p_worker_id,
|
||||||
|
last_used_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_identities
|
||||||
|
WHERE state_code = p_state_code
|
||||||
|
AND city = p_city
|
||||||
|
AND is_active = FALSE
|
||||||
|
AND is_healthy = TRUE
|
||||||
|
AND (cooldown_until IS NULL OR cooldown_until < NOW())
|
||||||
|
ORDER BY last_used_at ASC NULLS FIRST
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_identity;
|
||||||
|
|
||||||
|
IF claimed_identity.id IS NOT NULL THEN
|
||||||
|
RETURN claimed_identity;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- 2. Try metro area fallback
|
||||||
|
IF p_city IS NOT NULL THEN
|
||||||
|
-- Find the metro area for this city
|
||||||
|
SELECT ma.metro_name INTO metro_name_val
|
||||||
|
FROM metro_areas ma
|
||||||
|
WHERE ma.state_code = p_state_code AND ma.city = p_city;
|
||||||
|
|
||||||
|
IF metro_name_val IS NOT NULL THEN
|
||||||
|
-- Get primary city of metro
|
||||||
|
SELECT ma.city INTO primary_city
|
||||||
|
FROM metro_areas ma
|
||||||
|
WHERE ma.metro_name = metro_name_val AND ma.is_primary = TRUE;
|
||||||
|
|
||||||
|
-- Try any city in same metro
|
||||||
|
UPDATE worker_identities wi
|
||||||
|
SET is_active = TRUE,
|
||||||
|
active_worker_id = p_worker_id,
|
||||||
|
last_used_at = NOW()
|
||||||
|
WHERE wi.id = (
|
||||||
|
SELECT wi2.id FROM worker_identities wi2
|
||||||
|
JOIN metro_areas ma ON wi2.city = ma.city AND wi2.state_code = ma.state_code
|
||||||
|
WHERE ma.metro_name = metro_name_val
|
||||||
|
AND wi2.is_active = FALSE
|
||||||
|
AND wi2.is_healthy = TRUE
|
||||||
|
AND (wi2.cooldown_until IS NULL OR wi2.cooldown_until < NOW())
|
||||||
|
ORDER BY wi2.last_used_at ASC NULLS FIRST
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_identity;
|
||||||
|
|
||||||
|
IF claimed_identity.id IS NOT NULL THEN
|
||||||
|
RETURN claimed_identity;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- 3. Try any identity in state
|
||||||
|
UPDATE worker_identities
|
||||||
|
SET is_active = TRUE,
|
||||||
|
active_worker_id = p_worker_id,
|
||||||
|
last_used_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_identities
|
||||||
|
WHERE state_code = p_state_code
|
||||||
|
AND is_active = FALSE
|
||||||
|
AND is_healthy = TRUE
|
||||||
|
AND (cooldown_until IS NULL OR cooldown_until < NOW())
|
||||||
|
ORDER BY last_used_at ASC NULLS FIRST
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_identity;
|
||||||
|
|
||||||
|
-- Return whatever we got (NULL if nothing available - caller should create new)
|
||||||
|
RETURN claimed_identity;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: release_identity
|
||||||
|
-- Releases an identity back to pool with cooldown
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION release_identity(
|
||||||
|
p_identity_id INT,
|
||||||
|
p_tasks_completed INT DEFAULT 0,
|
||||||
|
p_failed BOOLEAN DEFAULT FALSE
|
||||||
|
) RETURNS VOID AS $$
|
||||||
|
DECLARE
|
||||||
|
cooldown_hours FLOAT;
|
||||||
|
BEGIN
|
||||||
|
-- Random cooldown between 2-3 hours for diversity
|
||||||
|
cooldown_hours := 2 + random(); -- 2.0 to 3.0 hours
|
||||||
|
|
||||||
|
UPDATE worker_identities
|
||||||
|
SET is_active = FALSE,
|
||||||
|
active_worker_id = NULL,
|
||||||
|
total_tasks_completed = total_tasks_completed + p_tasks_completed,
|
||||||
|
total_sessions = total_sessions + 1,
|
||||||
|
cooldown_until = NOW() + (cooldown_hours || ' hours')::INTERVAL,
|
||||||
|
consecutive_failures = CASE WHEN p_failed THEN consecutive_failures + 1 ELSE 0 END,
|
||||||
|
is_healthy = CASE WHEN consecutive_failures >= 3 THEN FALSE ELSE TRUE END
|
||||||
|
WHERE id = p_identity_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: get_pending_tasks_by_geo
|
||||||
|
-- Gets pending tasks grouped by state/city for identity assignment
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION get_pending_tasks_by_geo(
|
||||||
|
p_limit INT DEFAULT 10
|
||||||
|
) RETURNS TABLE (
|
||||||
|
state_code VARCHAR(2),
|
||||||
|
city VARCHAR(100),
|
||||||
|
pending_count BIGINT,
|
||||||
|
available_identities BIGINT
|
||||||
|
) AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
d.state as state_code,
|
||||||
|
d.city,
|
||||||
|
COUNT(t.id) as pending_count,
|
||||||
|
(
|
||||||
|
SELECT COUNT(*) FROM worker_identities wi
|
||||||
|
WHERE wi.state_code = d.state
|
||||||
|
AND (wi.city = d.city OR wi.city IS NULL)
|
||||||
|
AND wi.is_active = FALSE
|
||||||
|
AND wi.is_healthy = TRUE
|
||||||
|
AND (wi.cooldown_until IS NULL OR wi.cooldown_until < NOW())
|
||||||
|
) as available_identities
|
||||||
|
FROM worker_tasks t
|
||||||
|
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||||
|
WHERE t.status = 'pending'
|
||||||
|
AND d.state IS NOT NULL
|
||||||
|
GROUP BY d.state, d.city
|
||||||
|
ORDER BY COUNT(t.id) DESC
|
||||||
|
LIMIT p_limit;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- FUNCTION: get_tasks_for_identity
|
||||||
|
-- Gets tasks matching an identity's geo (same city or metro)
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE FUNCTION get_tasks_for_identity(
|
||||||
|
p_state_code VARCHAR(2),
|
||||||
|
p_city VARCHAR(100),
|
||||||
|
p_limit INT DEFAULT 5
|
||||||
|
) RETURNS TABLE (
|
||||||
|
task_id INT,
|
||||||
|
dispensary_id INT,
|
||||||
|
dispensary_name VARCHAR(255),
|
||||||
|
dispensary_city VARCHAR(100),
|
||||||
|
role VARCHAR(50)
|
||||||
|
) AS $$
|
||||||
|
DECLARE
|
||||||
|
metro_name_val VARCHAR(100);
|
||||||
|
BEGIN
|
||||||
|
-- Find metro area for this city
|
||||||
|
SELECT ma.metro_name INTO metro_name_val
|
||||||
|
FROM metro_areas ma
|
||||||
|
WHERE ma.state_code = p_state_code AND ma.city = p_city;
|
||||||
|
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
t.id as task_id,
|
||||||
|
d.id as dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city as dispensary_city,
|
||||||
|
t.role
|
||||||
|
FROM worker_tasks t
|
||||||
|
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||||
|
WHERE t.status = 'pending'
|
||||||
|
AND d.state = p_state_code
|
||||||
|
AND (
|
||||||
|
-- Exact city match
|
||||||
|
d.city = p_city
|
||||||
|
-- Or same metro area
|
||||||
|
OR (metro_name_val IS NOT NULL AND d.city IN (
|
||||||
|
SELECT ma.city FROM metro_areas ma WHERE ma.metro_name = metro_name_val
|
||||||
|
))
|
||||||
|
-- Or any in state if no metro
|
||||||
|
OR (metro_name_val IS NULL)
|
||||||
|
)
|
||||||
|
ORDER BY
|
||||||
|
CASE WHEN d.city = p_city THEN 0 ELSE 1 END, -- Prefer exact city
|
||||||
|
t.priority DESC,
|
||||||
|
t.created_at ASC
|
||||||
|
LIMIT p_limit;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VIEW: identity_pool_status
|
||||||
|
-- Overview of identity pool health and availability
|
||||||
|
-- ============================================================
|
||||||
|
CREATE OR REPLACE VIEW identity_pool_status AS
|
||||||
|
SELECT
|
||||||
|
state_code,
|
||||||
|
city,
|
||||||
|
COUNT(*) as total_identities,
|
||||||
|
COUNT(*) FILTER (WHERE is_active) as active,
|
||||||
|
COUNT(*) FILTER (WHERE NOT is_active AND is_healthy AND (cooldown_until IS NULL OR cooldown_until < NOW())) as available,
|
||||||
|
COUNT(*) FILTER (WHERE NOT is_active AND cooldown_until > NOW()) as cooling_down,
|
||||||
|
COUNT(*) FILTER (WHERE NOT is_healthy) as unhealthy,
|
||||||
|
SUM(total_tasks_completed) as total_tasks,
|
||||||
|
AVG(total_tasks_completed)::INT as avg_tasks_per_identity
|
||||||
|
FROM worker_identities
|
||||||
|
GROUP BY state_code, city
|
||||||
|
ORDER BY state_code, city;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- Comments
|
||||||
|
-- ============================================================
|
||||||
|
COMMENT ON TABLE worker_identities IS 'Pool of IP/fingerprint identities for worker rotation';
|
||||||
|
COMMENT ON TABLE metro_areas IS 'City groupings for geographic fallback matching';
|
||||||
|
COMMENT ON FUNCTION claim_identity IS 'Claim an available identity: exact city -> metro -> state -> NULL (create new)';
|
||||||
|
COMMENT ON FUNCTION release_identity IS 'Release identity with 2-3 hour random cooldown';
|
||||||
|
COMMENT ON FUNCTION get_pending_tasks_by_geo IS 'Get pending task counts by state/city';
|
||||||
|
COMMENT ON FUNCTION get_tasks_for_identity IS 'Get tasks matching identity geo (city or metro area)';
|
||||||
92
backend/migrations/110_trusted_origins.sql
Normal file
92
backend/migrations/110_trusted_origins.sql
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
-- Migration: 110_trusted_origins.sql
|
||||||
|
-- Description: Trusted origins for API access without token
|
||||||
|
-- Created: 2024-12-14
|
||||||
|
--
|
||||||
|
-- Manages which domains, IPs, and patterns can access the API without a Bearer token.
|
||||||
|
-- Used by auth middleware to grant 'internal' role to trusted requests.
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- TRUSTED ORIGINS TABLE
|
||||||
|
-- ============================================================
|
||||||
|
CREATE TABLE IF NOT EXISTS trusted_origins (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Origin identification
|
||||||
|
name VARCHAR(100) NOT NULL, -- Friendly name (e.g., "CannaIQ Production")
|
||||||
|
origin_type VARCHAR(20) NOT NULL, -- 'domain', 'ip', or 'pattern'
|
||||||
|
origin_value VARCHAR(255) NOT NULL, -- The actual value to match
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
description TEXT, -- Optional notes
|
||||||
|
active BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Tracking
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
created_by INTEGER REFERENCES users(id),
|
||||||
|
|
||||||
|
-- Constraints
|
||||||
|
CONSTRAINT valid_origin_type CHECK (origin_type IN ('domain', 'ip', 'pattern')),
|
||||||
|
UNIQUE(origin_type, origin_value)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for active lookups (used by auth middleware)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active
|
||||||
|
ON trusted_origins(active) WHERE active = TRUE;
|
||||||
|
|
||||||
|
-- Updated at trigger
|
||||||
|
CREATE OR REPLACE FUNCTION update_trusted_origins_updated_at()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.updated_at = NOW();
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS trusted_origins_updated_at ON trusted_origins;
|
||||||
|
CREATE TRIGGER trusted_origins_updated_at
|
||||||
|
BEFORE UPDATE ON trusted_origins
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_trusted_origins_updated_at();
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- SEED DEFAULT TRUSTED ORIGINS
|
||||||
|
-- These match the hardcoded fallbacks in middleware.ts
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Production domains
|
||||||
|
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||||
|
('CannaIQ Production', 'domain', 'https://cannaiq.co', 'Main CannaIQ dashboard'),
|
||||||
|
('CannaIQ Production (www)', 'domain', 'https://www.cannaiq.co', 'Main CannaIQ dashboard with www'),
|
||||||
|
('FindADispo Production', 'domain', 'https://findadispo.com', 'Consumer dispensary finder'),
|
||||||
|
('FindADispo Production (www)', 'domain', 'https://www.findadispo.com', 'Consumer dispensary finder with www'),
|
||||||
|
('Findagram Production', 'domain', 'https://findagram.co', 'Instagram-style cannabis discovery'),
|
||||||
|
('Findagram Production (www)', 'domain', 'https://www.findagram.co', 'Instagram-style cannabis discovery with www')
|
||||||
|
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||||
|
|
||||||
|
-- Wildcard patterns
|
||||||
|
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||||
|
('CannaBrands Subdomains', 'pattern', '^https://.*\\.cannabrands\\.app$', 'All *.cannabrands.app subdomains'),
|
||||||
|
('CannaIQ Subdomains', 'pattern', '^https://.*\\.cannaiq\\.co$', 'All *.cannaiq.co subdomains')
|
||||||
|
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||||
|
|
||||||
|
-- Local development
|
||||||
|
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||||
|
('Local API', 'domain', 'http://localhost:3010', 'Local backend API'),
|
||||||
|
('Local Admin', 'domain', 'http://localhost:8080', 'Local admin dashboard'),
|
||||||
|
('Local Vite Dev', 'domain', 'http://localhost:5173', 'Vite dev server')
|
||||||
|
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||||
|
|
||||||
|
-- Trusted IPs (localhost)
|
||||||
|
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||||
|
('Localhost IPv4', 'ip', '127.0.0.1', 'Local machine'),
|
||||||
|
('Localhost IPv6', 'ip', '::1', 'Local machine IPv6'),
|
||||||
|
('Localhost IPv6 Mapped', 'ip', '::ffff:127.0.0.1', 'IPv6-mapped IPv4 localhost')
|
||||||
|
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- COMMENTS
|
||||||
|
-- ============================================================
|
||||||
|
COMMENT ON TABLE trusted_origins IS 'Domains, IPs, and patterns that can access API without token';
|
||||||
|
COMMENT ON COLUMN trusted_origins.origin_type IS 'domain = exact URL match, ip = IP address, pattern = regex pattern';
|
||||||
|
COMMENT ON COLUMN trusted_origins.origin_value IS 'For domain: full URL. For ip: IP address. For pattern: regex string';
|
||||||
35
backend/migrations/111_system_settings.sql
Normal file
35
backend/migrations/111_system_settings.sql
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
-- Migration: 111_system_settings.sql
|
||||||
|
-- Description: System settings table for runtime configuration
|
||||||
|
-- Created: 2024-12-14
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS system_settings (
|
||||||
|
key VARCHAR(100) PRIMARY KEY,
|
||||||
|
value TEXT NOT NULL,
|
||||||
|
description TEXT,
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_by INTEGER REFERENCES users(id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Task pool gate - controls whether workers can claim tasks
|
||||||
|
INSERT INTO system_settings (key, value, description) VALUES
|
||||||
|
('task_pool_open', 'true', 'When false, workers cannot claim new tasks from the pool')
|
||||||
|
ON CONFLICT (key) DO NOTHING;
|
||||||
|
|
||||||
|
-- Updated at trigger
|
||||||
|
CREATE OR REPLACE FUNCTION update_system_settings_updated_at()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.updated_at = NOW();
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS system_settings_updated_at ON system_settings;
|
||||||
|
CREATE TRIGGER system_settings_updated_at
|
||||||
|
BEFORE UPDATE ON system_settings
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_system_settings_updated_at();
|
||||||
|
|
||||||
|
COMMENT ON TABLE system_settings IS 'Runtime configuration settings';
|
||||||
|
COMMENT ON COLUMN system_settings.key IS 'Setting name (e.g., task_pool_open)';
|
||||||
|
COMMENT ON COLUMN system_settings.value IS 'Setting value as string';
|
||||||
390
backend/migrations/112_worker_session_pool.sql
Normal file
390
backend/migrations/112_worker_session_pool.sql
Normal file
@@ -0,0 +1,390 @@
|
|||||||
|
-- Migration 112: Worker Session Pool
|
||||||
|
-- Tracks IP/fingerprint sessions with exclusive locks and cooldowns
|
||||||
|
-- Each worker claims up to 6 tasks, uses one IP/fingerprint for those tasks,
|
||||||
|
-- then retires the session (8hr cooldown before IP can be reused)
|
||||||
|
|
||||||
|
-- Drop old identity pool tables if they exist (replacing with simpler session model)
|
||||||
|
DROP TABLE IF EXISTS worker_identity_claims CASCADE;
|
||||||
|
DROP TABLE IF EXISTS worker_identities CASCADE;
|
||||||
|
|
||||||
|
-- Worker sessions: tracks active and cooling down IP/fingerprint pairs
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_sessions (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- IP and fingerprint for this session
|
||||||
|
ip_address VARCHAR(45) NOT NULL,
|
||||||
|
fingerprint_hash VARCHAR(64) NOT NULL,
|
||||||
|
fingerprint_data JSONB,
|
||||||
|
|
||||||
|
-- Geo this session is locked to
|
||||||
|
state_code VARCHAR(2) NOT NULL,
|
||||||
|
city VARCHAR(100),
|
||||||
|
|
||||||
|
-- Ownership
|
||||||
|
worker_id VARCHAR(255), -- NULL if in cooldown
|
||||||
|
|
||||||
|
-- Status: 'active' (locked to worker), 'cooldown' (8hr wait), 'available'
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'available',
|
||||||
|
|
||||||
|
-- Task tracking
|
||||||
|
tasks_claimed INTEGER NOT NULL DEFAULT 0,
|
||||||
|
tasks_completed INTEGER NOT NULL DEFAULT 0,
|
||||||
|
tasks_failed INTEGER NOT NULL DEFAULT 0,
|
||||||
|
max_tasks INTEGER NOT NULL DEFAULT 6,
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
locked_at TIMESTAMPTZ, -- When worker locked this session
|
||||||
|
retired_at TIMESTAMPTZ, -- When session was retired (cooldown starts)
|
||||||
|
cooldown_until TIMESTAMPTZ, -- When session becomes available again
|
||||||
|
|
||||||
|
-- Constraints
|
||||||
|
CONSTRAINT valid_status CHECK (status IN ('active', 'cooldown', 'available'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for fast lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_sessions_ip ON worker_sessions(ip_address);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_sessions_status ON worker_sessions(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_sessions_worker ON worker_sessions(worker_id) WHERE worker_id IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_sessions_geo ON worker_sessions(state_code, city);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_sessions_cooldown ON worker_sessions(cooldown_until) WHERE status = 'cooldown';
|
||||||
|
|
||||||
|
-- Unique constraint: only one active session per IP
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_sessions_active_ip
|
||||||
|
ON worker_sessions(ip_address)
|
||||||
|
WHERE status = 'active';
|
||||||
|
|
||||||
|
-- Function: Check if IP is available (not active, not in cooldown)
|
||||||
|
CREATE OR REPLACE FUNCTION is_ip_available(check_ip VARCHAR(45))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
-- Check if any session has this IP and is either active or in cooldown
|
||||||
|
RETURN NOT EXISTS (
|
||||||
|
SELECT 1 FROM worker_sessions
|
||||||
|
WHERE ip_address = check_ip
|
||||||
|
AND (status = 'active' OR (status = 'cooldown' AND cooldown_until > NOW()))
|
||||||
|
);
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Lock a session to a worker
|
||||||
|
-- Returns the session if successful, NULL if IP not available
|
||||||
|
CREATE OR REPLACE FUNCTION lock_worker_session(
|
||||||
|
p_worker_id VARCHAR(255),
|
||||||
|
p_ip_address VARCHAR(45),
|
||||||
|
p_state_code VARCHAR(2),
|
||||||
|
p_city VARCHAR(100) DEFAULT NULL,
|
||||||
|
p_fingerprint_hash VARCHAR(64) DEFAULT NULL,
|
||||||
|
p_fingerprint_data JSONB DEFAULT NULL
|
||||||
|
) RETURNS worker_sessions AS $$
|
||||||
|
DECLARE
|
||||||
|
v_session worker_sessions;
|
||||||
|
BEGIN
|
||||||
|
-- First check if IP is available
|
||||||
|
IF NOT is_ip_available(p_ip_address) THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Try to find an existing available session for this IP
|
||||||
|
SELECT * INTO v_session
|
||||||
|
FROM worker_sessions
|
||||||
|
WHERE ip_address = p_ip_address
|
||||||
|
AND status = 'available'
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
IF v_session.id IS NOT NULL THEN
|
||||||
|
-- Reuse existing session
|
||||||
|
UPDATE worker_sessions SET
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
status = 'active',
|
||||||
|
state_code = p_state_code,
|
||||||
|
city = p_city,
|
||||||
|
fingerprint_hash = COALESCE(p_fingerprint_hash, fingerprint_hash),
|
||||||
|
fingerprint_data = COALESCE(p_fingerprint_data, fingerprint_data),
|
||||||
|
tasks_claimed = 0,
|
||||||
|
tasks_completed = 0,
|
||||||
|
tasks_failed = 0,
|
||||||
|
locked_at = NOW(),
|
||||||
|
retired_at = NULL,
|
||||||
|
cooldown_until = NULL
|
||||||
|
WHERE id = v_session.id
|
||||||
|
RETURNING * INTO v_session;
|
||||||
|
ELSE
|
||||||
|
-- Create new session
|
||||||
|
INSERT INTO worker_sessions (
|
||||||
|
ip_address, fingerprint_hash, fingerprint_data,
|
||||||
|
state_code, city, worker_id, status, locked_at
|
||||||
|
) VALUES (
|
||||||
|
p_ip_address, COALESCE(p_fingerprint_hash, md5(random()::text)),
|
||||||
|
p_fingerprint_data, p_state_code, p_city, p_worker_id, 'active', NOW()
|
||||||
|
)
|
||||||
|
RETURNING * INTO v_session;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN v_session;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Retire a session (start 8hr cooldown)
|
||||||
|
CREATE OR REPLACE FUNCTION retire_worker_session(p_worker_id VARCHAR(255))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
DECLARE
|
||||||
|
v_updated INTEGER;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_sessions SET
|
||||||
|
status = 'cooldown',
|
||||||
|
worker_id = NULL,
|
||||||
|
retired_at = NOW(),
|
||||||
|
cooldown_until = NOW() + INTERVAL '8 hours'
|
||||||
|
WHERE worker_id = p_worker_id
|
||||||
|
AND status = 'active';
|
||||||
|
|
||||||
|
GET DIAGNOSTICS v_updated = ROW_COUNT;
|
||||||
|
RETURN v_updated > 0;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Release expired cooldowns
|
||||||
|
CREATE OR REPLACE FUNCTION release_expired_sessions()
|
||||||
|
RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
v_released INTEGER;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_sessions SET
|
||||||
|
status = 'available'
|
||||||
|
WHERE status = 'cooldown'
|
||||||
|
AND cooldown_until <= NOW();
|
||||||
|
|
||||||
|
GET DIAGNOSTICS v_released = ROW_COUNT;
|
||||||
|
RETURN v_released;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Get session for worker
|
||||||
|
CREATE OR REPLACE FUNCTION get_worker_session(p_worker_id VARCHAR(255))
|
||||||
|
RETURNS worker_sessions AS $$
|
||||||
|
SELECT * FROM worker_sessions
|
||||||
|
WHERE worker_id = p_worker_id AND status = 'active'
|
||||||
|
LIMIT 1;
|
||||||
|
$$ LANGUAGE sql;
|
||||||
|
|
||||||
|
-- Function: Increment task counters
|
||||||
|
CREATE OR REPLACE FUNCTION session_task_completed(p_worker_id VARCHAR(255))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_sessions SET
|
||||||
|
tasks_completed = tasks_completed + 1
|
||||||
|
WHERE worker_id = p_worker_id AND status = 'active';
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION session_task_failed(p_worker_id VARCHAR(255))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_sessions SET
|
||||||
|
tasks_failed = tasks_failed + 1
|
||||||
|
WHERE worker_id = p_worker_id AND status = 'active';
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION session_task_claimed(p_worker_id VARCHAR(255), p_count INTEGER DEFAULT 1)
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_sessions SET
|
||||||
|
tasks_claimed = tasks_claimed + p_count
|
||||||
|
WHERE worker_id = p_worker_id AND status = 'active';
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Scheduled job hint: Run release_expired_sessions() every 5 minutes
|
||||||
|
COMMENT ON FUNCTION release_expired_sessions() IS
|
||||||
|
'Run periodically to release sessions from cooldown. Suggest: every 5 minutes.';
|
||||||
|
|
||||||
|
-- =============================================================================
|
||||||
|
-- ATOMIC TASK CLAIMING
|
||||||
|
-- Worker claims up to 6 tasks for same geo in one transaction
|
||||||
|
-- =============================================================================
|
||||||
|
|
||||||
|
-- Function: Claim up to N tasks for same geo
|
||||||
|
-- Returns claimed tasks with dispensary geo info
|
||||||
|
CREATE OR REPLACE FUNCTION claim_tasks_batch(
|
||||||
|
p_worker_id VARCHAR(255),
|
||||||
|
p_max_tasks INTEGER DEFAULT 6,
|
||||||
|
p_role VARCHAR(50) DEFAULT NULL -- Optional role filter
|
||||||
|
) RETURNS TABLE (
|
||||||
|
task_id INTEGER,
|
||||||
|
role VARCHAR(50),
|
||||||
|
dispensary_id INTEGER,
|
||||||
|
dispensary_name VARCHAR(255),
|
||||||
|
city VARCHAR(100),
|
||||||
|
state_code VARCHAR(2),
|
||||||
|
platform VARCHAR(50),
|
||||||
|
method VARCHAR(20)
|
||||||
|
) AS $$
|
||||||
|
DECLARE
|
||||||
|
v_target_state VARCHAR(2);
|
||||||
|
v_target_city VARCHAR(100);
|
||||||
|
v_claimed_count INTEGER := 0;
|
||||||
|
BEGIN
|
||||||
|
-- First, find the geo with most pending tasks to target
|
||||||
|
SELECT d.state, d.city INTO v_target_state, v_target_city
|
||||||
|
FROM worker_tasks t
|
||||||
|
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||||
|
WHERE t.status = 'pending'
|
||||||
|
AND (p_role IS NULL OR t.role = p_role)
|
||||||
|
GROUP BY d.state, d.city
|
||||||
|
ORDER BY COUNT(*) DESC
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
-- No pending tasks
|
||||||
|
IF v_target_state IS NULL THEN
|
||||||
|
RETURN;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Claim up to p_max_tasks for this geo
|
||||||
|
RETURN QUERY
|
||||||
|
WITH claimed AS (
|
||||||
|
UPDATE worker_tasks t SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW()
|
||||||
|
FROM (
|
||||||
|
SELECT t2.id
|
||||||
|
FROM worker_tasks t2
|
||||||
|
JOIN dispensaries d ON t2.dispensary_id = d.id
|
||||||
|
WHERE t2.status = 'pending'
|
||||||
|
AND d.state = v_target_state
|
||||||
|
AND (v_target_city IS NULL OR d.city = v_target_city)
|
||||||
|
AND (p_role IS NULL OR t2.role = p_role)
|
||||||
|
ORDER BY t2.priority DESC, t2.created_at ASC
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
LIMIT p_max_tasks
|
||||||
|
) sub
|
||||||
|
WHERE t.id = sub.id
|
||||||
|
RETURNING t.id, t.role, t.dispensary_id, t.method
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
c.id as task_id,
|
||||||
|
c.role,
|
||||||
|
c.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state as state_code,
|
||||||
|
d.platform,
|
||||||
|
c.method
|
||||||
|
FROM claimed c
|
||||||
|
JOIN dispensaries d ON c.dispensary_id = d.id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Release claimed tasks back to pending (for failed worker or cleanup)
|
||||||
|
CREATE OR REPLACE FUNCTION release_claimed_tasks(p_worker_id VARCHAR(255))
|
||||||
|
RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
v_released INTEGER;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks SET
|
||||||
|
status = 'pending',
|
||||||
|
worker_id = NULL,
|
||||||
|
claimed_at = NULL
|
||||||
|
WHERE worker_id = p_worker_id
|
||||||
|
AND status IN ('claimed', 'running');
|
||||||
|
|
||||||
|
GET DIAGNOSTICS v_released = ROW_COUNT;
|
||||||
|
RETURN v_released;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Mark task as running
|
||||||
|
CREATE OR REPLACE FUNCTION start_task(p_task_id INTEGER, p_worker_id VARCHAR(255))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks SET
|
||||||
|
status = 'running',
|
||||||
|
started_at = NOW()
|
||||||
|
WHERE id = p_task_id
|
||||||
|
AND worker_id = p_worker_id
|
||||||
|
AND status = 'claimed';
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Mark task as completed (leaves pool)
|
||||||
|
CREATE OR REPLACE FUNCTION complete_task(
|
||||||
|
p_task_id INTEGER,
|
||||||
|
p_worker_id VARCHAR(255),
|
||||||
|
p_result JSONB DEFAULT NULL
|
||||||
|
) RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks SET
|
||||||
|
status = 'completed',
|
||||||
|
completed_at = NOW(),
|
||||||
|
result = p_result
|
||||||
|
WHERE id = p_task_id
|
||||||
|
AND worker_id = p_worker_id
|
||||||
|
AND status = 'running';
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function: Mark task as failed (returns to pending for retry)
|
||||||
|
CREATE OR REPLACE FUNCTION fail_task(
|
||||||
|
p_task_id INTEGER,
|
||||||
|
p_worker_id VARCHAR(255),
|
||||||
|
p_error TEXT DEFAULT NULL,
|
||||||
|
p_max_retries INTEGER DEFAULT 3
|
||||||
|
) RETURNS BOOLEAN AS $$
|
||||||
|
DECLARE
|
||||||
|
v_retry_count INTEGER;
|
||||||
|
BEGIN
|
||||||
|
-- Get current retry count
|
||||||
|
SELECT COALESCE(retry_count, 0) INTO v_retry_count
|
||||||
|
FROM worker_tasks WHERE id = p_task_id;
|
||||||
|
|
||||||
|
IF v_retry_count >= p_max_retries THEN
|
||||||
|
-- Max retries exceeded - mark as permanently failed
|
||||||
|
UPDATE worker_tasks SET
|
||||||
|
status = 'failed',
|
||||||
|
completed_at = NOW(),
|
||||||
|
error_message = p_error,
|
||||||
|
retry_count = v_retry_count + 1
|
||||||
|
WHERE id = p_task_id
|
||||||
|
AND worker_id = p_worker_id;
|
||||||
|
ELSE
|
||||||
|
-- Return to pending for retry
|
||||||
|
UPDATE worker_tasks SET
|
||||||
|
status = 'pending',
|
||||||
|
worker_id = NULL,
|
||||||
|
claimed_at = NULL,
|
||||||
|
started_at = NULL,
|
||||||
|
error_message = p_error,
|
||||||
|
retry_count = v_retry_count + 1
|
||||||
|
WHERE id = p_task_id
|
||||||
|
AND worker_id = p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN FOUND;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Add retry_count column if not exists
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'worker_tasks' AND column_name = 'retry_count'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE worker_tasks ADD COLUMN retry_count INTEGER NOT NULL DEFAULT 0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'worker_tasks' AND column_name = 'claimed_at'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE worker_tasks ADD COLUMN claimed_at TIMESTAMPTZ;
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
381
backend/migrations/113_task_pools.sql
Normal file
381
backend/migrations/113_task_pools.sql
Normal file
@@ -0,0 +1,381 @@
|
|||||||
|
-- Task Pools: Group tasks by geo area for worker assignment
|
||||||
|
-- Workers claim a pool, get proxy for that geo, then pull tasks from pool
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- TASK POOLS TABLE
|
||||||
|
-- ============================================================================
|
||||||
|
-- Each pool represents a metro area (e.g., Phoenix AZ = 100mi radius)
|
||||||
|
-- Dispensaries are assigned to pools based on location
|
||||||
|
-- Workers claim a pool, not individual tasks
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS task_pools (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(100) NOT NULL UNIQUE, -- e.g., 'phoenix_az'
|
||||||
|
display_name VARCHAR(100) NOT NULL, -- e.g., 'Phoenix, AZ'
|
||||||
|
state_code VARCHAR(2) NOT NULL, -- e.g., 'AZ'
|
||||||
|
city VARCHAR(100) NOT NULL, -- e.g., 'Phoenix'
|
||||||
|
latitude DECIMAL(10, 6) NOT NULL, -- pool center lat
|
||||||
|
longitude DECIMAL(10, 6) NOT NULL, -- pool center lng
|
||||||
|
radius_miles INTEGER DEFAULT 100, -- pool radius (100mi default)
|
||||||
|
timezone VARCHAR(50) NOT NULL, -- e.g., 'America/Phoenix'
|
||||||
|
is_active BOOLEAN DEFAULT true,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for active pools
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_pools_active ON task_pools(is_active) WHERE is_active = true;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- LINK DISPENSARIES TO POOLS
|
||||||
|
-- ============================================================================
|
||||||
|
-- Add pool_id to dispensaries table
|
||||||
|
|
||||||
|
ALTER TABLE dispensaries
|
||||||
|
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
|
||||||
|
|
||||||
|
-- Index for pool membership
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_pool ON dispensaries(pool_id) WHERE pool_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- WORKER POOL ASSIGNMENT
|
||||||
|
-- ============================================================================
|
||||||
|
-- Track which pool a worker is currently assigned to
|
||||||
|
|
||||||
|
ALTER TABLE worker_registry
|
||||||
|
ADD COLUMN IF NOT EXISTS current_pool_id INTEGER REFERENCES task_pools(id),
|
||||||
|
ADD COLUMN IF NOT EXISTS pool_claimed_at TIMESTAMPTZ,
|
||||||
|
ADD COLUMN IF NOT EXISTS pool_stores_visited INTEGER DEFAULT 0,
|
||||||
|
ADD COLUMN IF NOT EXISTS pool_max_stores INTEGER DEFAULT 6;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SEED INITIAL POOLS
|
||||||
|
-- ============================================================================
|
||||||
|
-- Major cannabis markets with approximate center coordinates
|
||||||
|
|
||||||
|
INSERT INTO task_pools (name, display_name, state_code, city, latitude, longitude, timezone, radius_miles) VALUES
|
||||||
|
-- Arizona
|
||||||
|
('phoenix_az', 'Phoenix, AZ', 'AZ', 'Phoenix', 33.4484, -112.0740, 'America/Phoenix', 100),
|
||||||
|
('tucson_az', 'Tucson, AZ', 'AZ', 'Tucson', 32.2226, -110.9747, 'America/Phoenix', 75),
|
||||||
|
|
||||||
|
-- California
|
||||||
|
('los_angeles_ca', 'Los Angeles, CA', 'CA', 'Los Angeles', 34.0522, -118.2437, 'America/Los_Angeles', 100),
|
||||||
|
('san_francisco_ca', 'San Francisco, CA', 'CA', 'San Francisco', 37.7749, -122.4194, 'America/Los_Angeles', 75),
|
||||||
|
('san_diego_ca', 'San Diego, CA', 'CA', 'San Diego', 32.7157, -117.1611, 'America/Los_Angeles', 75),
|
||||||
|
('sacramento_ca', 'Sacramento, CA', 'CA', 'Sacramento', 38.5816, -121.4944, 'America/Los_Angeles', 75),
|
||||||
|
|
||||||
|
-- Colorado
|
||||||
|
('denver_co', 'Denver, CO', 'CO', 'Denver', 39.7392, -104.9903, 'America/Denver', 100),
|
||||||
|
|
||||||
|
-- Illinois
|
||||||
|
('chicago_il', 'Chicago, IL', 'IL', 'Chicago', 41.8781, -87.6298, 'America/Chicago', 100),
|
||||||
|
|
||||||
|
-- Massachusetts
|
||||||
|
('boston_ma', 'Boston, MA', 'MA', 'Boston', 42.3601, -71.0589, 'America/New_York', 75),
|
||||||
|
|
||||||
|
-- Michigan
|
||||||
|
('detroit_mi', 'Detroit, MI', 'MI', 'Detroit', 42.3314, -83.0458, 'America/Detroit', 100),
|
||||||
|
|
||||||
|
-- Nevada
|
||||||
|
('las_vegas_nv', 'Las Vegas, NV', 'NV', 'Las Vegas', 36.1699, -115.1398, 'America/Los_Angeles', 75),
|
||||||
|
('reno_nv', 'Reno, NV', 'NV', 'Reno', 39.5296, -119.8138, 'America/Los_Angeles', 50),
|
||||||
|
|
||||||
|
-- New Jersey
|
||||||
|
('newark_nj', 'Newark, NJ', 'NJ', 'Newark', 40.7357, -74.1724, 'America/New_York', 75),
|
||||||
|
|
||||||
|
-- New York
|
||||||
|
('new_york_ny', 'New York, NY', 'NY', 'New York', 40.7128, -74.0060, 'America/New_York', 75),
|
||||||
|
|
||||||
|
-- Oklahoma
|
||||||
|
('oklahoma_city_ok', 'Oklahoma City, OK', 'OK', 'Oklahoma City', 35.4676, -97.5164, 'America/Chicago', 100),
|
||||||
|
('tulsa_ok', 'Tulsa, OK', 'OK', 'Tulsa', 36.1540, -95.9928, 'America/Chicago', 75),
|
||||||
|
|
||||||
|
-- Oregon
|
||||||
|
('portland_or', 'Portland, OR', 'OR', 'Portland', 45.5152, -122.6784, 'America/Los_Angeles', 75),
|
||||||
|
|
||||||
|
-- Washington
|
||||||
|
('seattle_wa', 'Seattle, WA', 'WA', 'Seattle', 47.6062, -122.3321, 'America/Los_Angeles', 100)
|
||||||
|
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- FUNCTION: Assign dispensary to nearest pool
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION assign_dispensary_to_pool(disp_id INTEGER)
|
||||||
|
RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
disp_lat DECIMAL(10,6);
|
||||||
|
disp_lng DECIMAL(10,6);
|
||||||
|
nearest_pool_id INTEGER;
|
||||||
|
BEGIN
|
||||||
|
-- Get dispensary coordinates
|
||||||
|
SELECT latitude, longitude INTO disp_lat, disp_lng
|
||||||
|
FROM dispensaries WHERE id = disp_id;
|
||||||
|
|
||||||
|
IF disp_lat IS NULL OR disp_lng IS NULL THEN
|
||||||
|
RETURN NULL;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Find nearest active pool within radius
|
||||||
|
-- Using Haversine approximation (accurate enough for 100mi)
|
||||||
|
SELECT id INTO nearest_pool_id
|
||||||
|
FROM task_pools
|
||||||
|
WHERE is_active = true
|
||||||
|
AND (
|
||||||
|
3959 * acos(
|
||||||
|
cos(radians(latitude)) * cos(radians(disp_lat)) *
|
||||||
|
cos(radians(disp_lng) - radians(longitude)) +
|
||||||
|
sin(radians(latitude)) * sin(radians(disp_lat))
|
||||||
|
)
|
||||||
|
) <= radius_miles
|
||||||
|
ORDER BY (
|
||||||
|
3959 * acos(
|
||||||
|
cos(radians(latitude)) * cos(radians(disp_lat)) *
|
||||||
|
cos(radians(disp_lng) - radians(longitude)) +
|
||||||
|
sin(radians(latitude)) * sin(radians(disp_lat))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
-- Update dispensary
|
||||||
|
IF nearest_pool_id IS NOT NULL THEN
|
||||||
|
UPDATE dispensaries SET pool_id = nearest_pool_id WHERE id = disp_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN nearest_pool_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- FUNCTION: Assign all dispensaries to pools (batch)
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION assign_all_dispensaries_to_pools()
|
||||||
|
RETURNS TABLE(assigned INTEGER, unassigned INTEGER) AS $$
|
||||||
|
DECLARE
|
||||||
|
assigned_count INTEGER := 0;
|
||||||
|
unassigned_count INTEGER := 0;
|
||||||
|
disp RECORD;
|
||||||
|
pool_id INTEGER;
|
||||||
|
BEGIN
|
||||||
|
FOR disp IN SELECT id FROM dispensaries WHERE pool_id IS NULL AND latitude IS NOT NULL LOOP
|
||||||
|
pool_id := assign_dispensary_to_pool(disp.id);
|
||||||
|
IF pool_id IS NOT NULL THEN
|
||||||
|
assigned_count := assigned_count + 1;
|
||||||
|
ELSE
|
||||||
|
unassigned_count := unassigned_count + 1;
|
||||||
|
END IF;
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
RETURN QUERY SELECT assigned_count, unassigned_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- FUNCTION: Get pools with pending tasks
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION get_pools_with_pending_tasks()
|
||||||
|
RETURNS TABLE(
|
||||||
|
pool_id INTEGER,
|
||||||
|
pool_name VARCHAR(100),
|
||||||
|
display_name VARCHAR(100),
|
||||||
|
state_code VARCHAR(2),
|
||||||
|
city VARCHAR(100),
|
||||||
|
timezone VARCHAR(50),
|
||||||
|
pending_count BIGINT,
|
||||||
|
store_count BIGINT
|
||||||
|
) AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
tp.id as pool_id,
|
||||||
|
tp.name as pool_name,
|
||||||
|
tp.display_name,
|
||||||
|
tp.state_code,
|
||||||
|
tp.city,
|
||||||
|
tp.timezone,
|
||||||
|
COUNT(DISTINCT t.id) as pending_count,
|
||||||
|
COUNT(DISTINCT d.id) as store_count
|
||||||
|
FROM task_pools tp
|
||||||
|
JOIN dispensaries d ON d.pool_id = tp.id
|
||||||
|
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
|
||||||
|
WHERE tp.is_active = true
|
||||||
|
GROUP BY tp.id, tp.name, tp.display_name, tp.state_code, tp.city, tp.timezone
|
||||||
|
HAVING COUNT(DISTINCT t.id) > 0
|
||||||
|
ORDER BY COUNT(DISTINCT t.id) DESC;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- FUNCTION: Worker claims a pool
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION worker_claim_pool(
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_pool_id INTEGER DEFAULT NULL
|
||||||
|
)
|
||||||
|
RETURNS TABLE(
|
||||||
|
pool_id INTEGER,
|
||||||
|
pool_name VARCHAR(100),
|
||||||
|
display_name VARCHAR(100),
|
||||||
|
state_code VARCHAR(2),
|
||||||
|
city VARCHAR(100),
|
||||||
|
latitude DECIMAL(10,6),
|
||||||
|
longitude DECIMAL(10,6),
|
||||||
|
timezone VARCHAR(50)
|
||||||
|
) AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_pool_id INTEGER;
|
||||||
|
BEGIN
|
||||||
|
-- If no pool specified, pick the one with most pending tasks
|
||||||
|
IF p_pool_id IS NULL THEN
|
||||||
|
SELECT tp.id INTO claimed_pool_id
|
||||||
|
FROM task_pools tp
|
||||||
|
JOIN dispensaries d ON d.pool_id = tp.id
|
||||||
|
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
|
||||||
|
WHERE tp.is_active = true
|
||||||
|
GROUP BY tp.id
|
||||||
|
ORDER BY COUNT(DISTINCT t.id) DESC
|
||||||
|
LIMIT 1;
|
||||||
|
ELSE
|
||||||
|
claimed_pool_id := p_pool_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF claimed_pool_id IS NULL THEN
|
||||||
|
RETURN;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Update worker registry with pool assignment
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
current_pool_id = claimed_pool_id,
|
||||||
|
pool_claimed_at = NOW(),
|
||||||
|
pool_stores_visited = 0,
|
||||||
|
pool_max_stores = 6,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
-- Return pool info
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
tp.id,
|
||||||
|
tp.name,
|
||||||
|
tp.display_name,
|
||||||
|
tp.state_code,
|
||||||
|
tp.city,
|
||||||
|
tp.latitude,
|
||||||
|
tp.longitude,
|
||||||
|
tp.timezone
|
||||||
|
FROM task_pools tp
|
||||||
|
WHERE tp.id = claimed_pool_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- FUNCTION: Pull tasks from worker's pool (up to 6 stores)
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION pull_tasks_from_pool(
|
||||||
|
p_worker_id VARCHAR(100),
|
||||||
|
p_max_stores INTEGER DEFAULT 6
|
||||||
|
)
|
||||||
|
RETURNS TABLE(
|
||||||
|
task_id INTEGER,
|
||||||
|
dispensary_id INTEGER,
|
||||||
|
dispensary_name VARCHAR(255),
|
||||||
|
role VARCHAR(50),
|
||||||
|
platform VARCHAR(50),
|
||||||
|
method VARCHAR(20)
|
||||||
|
) AS $$
|
||||||
|
DECLARE
|
||||||
|
worker_pool_id INTEGER;
|
||||||
|
stores_visited INTEGER;
|
||||||
|
max_stores INTEGER;
|
||||||
|
stores_remaining INTEGER;
|
||||||
|
BEGIN
|
||||||
|
-- Get worker's current pool and store count
|
||||||
|
SELECT current_pool_id, pool_stores_visited, pool_max_stores
|
||||||
|
INTO worker_pool_id, stores_visited, max_stores
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
IF worker_pool_id IS NULL THEN
|
||||||
|
RAISE EXCEPTION 'Worker % has no pool assigned', p_worker_id;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
stores_remaining := max_stores - stores_visited;
|
||||||
|
IF stores_remaining <= 0 THEN
|
||||||
|
RETURN; -- Worker exhausted
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Claim tasks from pool (one task per store, up to remaining capacity)
|
||||||
|
RETURN QUERY
|
||||||
|
WITH available_stores AS (
|
||||||
|
SELECT DISTINCT ON (d.id)
|
||||||
|
t.id as task_id,
|
||||||
|
d.id as dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
t.role,
|
||||||
|
t.platform,
|
||||||
|
t.method
|
||||||
|
FROM tasks t
|
||||||
|
JOIN dispensaries d ON d.id = t.dispensary_id
|
||||||
|
WHERE d.pool_id = worker_pool_id
|
||||||
|
AND t.status = 'pending'
|
||||||
|
AND t.scheduled_for <= NOW()
|
||||||
|
ORDER BY d.id, t.priority DESC, t.created_at ASC
|
||||||
|
LIMIT stores_remaining
|
||||||
|
),
|
||||||
|
claimed AS (
|
||||||
|
UPDATE tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
claimed_by = p_worker_id,
|
||||||
|
claimed_at = NOW()
|
||||||
|
WHERE id IN (SELECT task_id FROM available_stores)
|
||||||
|
RETURNING id
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
av.task_id,
|
||||||
|
av.dispensary_id,
|
||||||
|
av.dispensary_name,
|
||||||
|
av.role,
|
||||||
|
av.platform,
|
||||||
|
av.method
|
||||||
|
FROM available_stores av
|
||||||
|
WHERE av.task_id IN (SELECT id FROM claimed);
|
||||||
|
|
||||||
|
-- Update worker store count
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
pool_stores_visited = pool_stores_visited + (
|
||||||
|
SELECT COUNT(DISTINCT dispensary_id)
|
||||||
|
FROM tasks
|
||||||
|
WHERE claimed_by = p_worker_id AND status = 'claimed'
|
||||||
|
),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- FUNCTION: Worker releases pool (exhausted or done)
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE OR REPLACE FUNCTION worker_release_pool(p_worker_id VARCHAR(100))
|
||||||
|
RETURNS BOOLEAN AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET
|
||||||
|
current_pool_id = NULL,
|
||||||
|
pool_claimed_at = NULL,
|
||||||
|
pool_stores_visited = 0,
|
||||||
|
current_state = NULL,
|
||||||
|
current_city = NULL,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = p_worker_id;
|
||||||
|
|
||||||
|
RETURN true;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- RUN: Assign existing dispensaries to pools
|
||||||
|
-- ============================================================================
|
||||||
|
SELECT * FROM assign_all_dispensaries_to_pools();
|
||||||
10
backend/migrations/114_schedule_pool_id.sql
Normal file
10
backend/migrations/114_schedule_pool_id.sql
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
-- Migration 114: Add pool_id to task_schedules
|
||||||
|
-- Allows schedules to target specific geo pools
|
||||||
|
|
||||||
|
ALTER TABLE task_schedules
|
||||||
|
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
|
||||||
|
|
||||||
|
-- Index for pool-based schedule queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_task_schedules_pool ON task_schedules(pool_id) WHERE pool_id IS NOT NULL;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN task_schedules.pool_id IS 'Optional geo pool filter. NULL = all pools/dispensaries matching state_code';
|
||||||
17
backend/migrations/115_task_proxy_ip.sql
Normal file
17
backend/migrations/115_task_proxy_ip.sql
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
-- Migration: Add proxy_ip tracking to worker_tasks
|
||||||
|
-- Purpose: Prevent same IP from hitting multiple stores on same platform simultaneously
|
||||||
|
--
|
||||||
|
-- Anti-detection measure: Dutchie/Jane may flag if same IP makes requests
|
||||||
|
-- for multiple different stores. This column lets us track and prevent that.
|
||||||
|
|
||||||
|
-- Add proxy_ip column to track which proxy IP is being used for each task
|
||||||
|
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
|
||||||
|
|
||||||
|
-- Index for quick lookup of active tasks by proxy IP
|
||||||
|
-- Used to check: "Is this IP already hitting another store?"
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip_active
|
||||||
|
ON worker_tasks (proxy_ip, platform)
|
||||||
|
WHERE status IN ('claimed', 'running') AND proxy_ip IS NOT NULL;
|
||||||
|
|
||||||
|
-- Comment
|
||||||
|
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'Proxy IP assigned to this task. Used to prevent same IP hitting multiple stores on same platform.';
|
||||||
16
backend/migrations/116_task_source_tracking.sql
Normal file
16
backend/migrations/116_task_source_tracking.sql
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
-- Migration: Add source tracking columns to worker_tasks
|
||||||
|
-- Purpose: Track where tasks originated from (schedule, API, manual)
|
||||||
|
|
||||||
|
-- Add source tracking columns
|
||||||
|
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source VARCHAR(50);
|
||||||
|
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source_schedule_id INTEGER REFERENCES task_schedules(id);
|
||||||
|
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source_metadata JSONB;
|
||||||
|
|
||||||
|
-- Index for tracking tasks by schedule
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_source_schedule
|
||||||
|
ON worker_tasks (source_schedule_id) WHERE source_schedule_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON COLUMN worker_tasks.source IS 'Origin of task: schedule, api, manual, chain';
|
||||||
|
COMMENT ON COLUMN worker_tasks.source_schedule_id IS 'ID of schedule that created this task';
|
||||||
|
COMMENT ON COLUMN worker_tasks.source_metadata IS 'Additional metadata about task origin';
|
||||||
32
backend/migrations/117_per_store_crawl_interval.sql
Normal file
32
backend/migrations/117_per_store_crawl_interval.sql
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
-- Migration 117: Per-store crawl interval scheduling
|
||||||
|
-- Adds columns for configurable per-store crawl intervals
|
||||||
|
-- Part of Real-Time Inventory Tracking feature
|
||||||
|
|
||||||
|
-- Per-store crawl interval (NULL = use state schedule default 4h)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_interval_minutes INT DEFAULT NULL;
|
||||||
|
|
||||||
|
-- When this store should next be crawled (used by high-frequency scheduler)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS next_crawl_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
-- Track last request time to enforce minimum spacing
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_crawl_started_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
-- Change tracking for optimization
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_inventory_hash TEXT DEFAULT NULL;
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_price_hash TEXT DEFAULT NULL;
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS inventory_changes_24h INT DEFAULT 0;
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS price_changes_24h INT DEFAULT 0;
|
||||||
|
|
||||||
|
-- Index for scheduler query: find stores due for high-frequency crawl
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_next_crawl
|
||||||
|
ON dispensaries(next_crawl_at)
|
||||||
|
WHERE crawl_interval_minutes IS NOT NULL AND crawl_enabled = TRUE;
|
||||||
|
|
||||||
|
-- Comment for documentation
|
||||||
|
COMMENT ON COLUMN dispensaries.crawl_interval_minutes IS 'Custom crawl interval in minutes. NULL = use state schedule (4h default). Set to 15/30/60 for high-frequency tracking.';
|
||||||
|
COMMENT ON COLUMN dispensaries.next_crawl_at IS 'When this store should next be crawled. Updated after each crawl with interval + jitter.';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_crawl_started_at IS 'When the last crawl task was created. Used to enforce minimum spacing.';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_inventory_hash IS 'Hash of inventory state from last crawl. Used to detect changes and skip unchanged payloads.';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_price_hash IS 'Hash of price state from last crawl. Used to detect price changes.';
|
||||||
|
COMMENT ON COLUMN dispensaries.inventory_changes_24h IS 'Number of inventory changes detected in last 24h. Indicates store volatility.';
|
||||||
|
COMMENT ON COLUMN dispensaries.price_changes_24h IS 'Number of price changes detected in last 24h.';
|
||||||
48
backend/migrations/118_inventory_snapshots.sql
Normal file
48
backend/migrations/118_inventory_snapshots.sql
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
-- Migration 118: Inventory snapshots table
|
||||||
|
-- Lightweight per-product tracking for sales velocity estimation
|
||||||
|
-- Part of Real-Time Inventory Tracking feature
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS inventory_snapshots (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
dispensary_id INT NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
product_id TEXT NOT NULL, -- provider_product_id (normalized across platforms)
|
||||||
|
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Platform (for debugging/filtering)
|
||||||
|
platform TEXT NOT NULL, -- 'dutchie' | 'jane' | 'treez'
|
||||||
|
|
||||||
|
-- Inventory fields (normalized from all platforms)
|
||||||
|
quantity_available INT, -- Dutchie: quantityAvailable, Jane: quantity, Treez: quantityAvailable
|
||||||
|
is_below_threshold BOOLEAN, -- Dutchie: isBelowThreshold, Jane: computed, Treez: lowInventory
|
||||||
|
status TEXT, -- Active/Inactive/available
|
||||||
|
|
||||||
|
-- Price fields (normalized)
|
||||||
|
price_rec NUMERIC(10,2), -- recreational price
|
||||||
|
price_med NUMERIC(10,2), -- medical price (if different)
|
||||||
|
|
||||||
|
-- Denormalized for fast queries
|
||||||
|
brand_name TEXT,
|
||||||
|
category TEXT,
|
||||||
|
product_name TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Primary query: get snapshots for a store over time
|
||||||
|
CREATE INDEX idx_inv_snap_store_time ON inventory_snapshots(dispensary_id, captured_at DESC);
|
||||||
|
|
||||||
|
-- Delta calculation: get consecutive snapshots for a product
|
||||||
|
CREATE INDEX idx_inv_snap_product_time ON inventory_snapshots(dispensary_id, product_id, captured_at DESC);
|
||||||
|
|
||||||
|
-- Brand-level analytics
|
||||||
|
CREATE INDEX idx_inv_snap_brand_time ON inventory_snapshots(brand_name, captured_at DESC) WHERE brand_name IS NOT NULL;
|
||||||
|
|
||||||
|
-- Platform filtering
|
||||||
|
CREATE INDEX idx_inv_snap_platform ON inventory_snapshots(platform, captured_at DESC);
|
||||||
|
|
||||||
|
-- Retention cleanup (30 days) - simple index, cleanup job handles the WHERE
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_inv_snap_cleanup ON inventory_snapshots(captured_at);
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON TABLE inventory_snapshots IS 'Lightweight inventory snapshots for sales velocity tracking. Retained 30 days.';
|
||||||
|
COMMENT ON COLUMN inventory_snapshots.product_id IS 'Provider product ID, normalized across platforms';
|
||||||
|
COMMENT ON COLUMN inventory_snapshots.platform IS 'Menu platform: dutchie, jane, or treez';
|
||||||
|
COMMENT ON COLUMN inventory_snapshots.quantity_available IS 'Current quantity in stock (Dutchie: quantityAvailable, Jane: quantity)';
|
||||||
53
backend/migrations/119_product_visibility_events.sql
Normal file
53
backend/migrations/119_product_visibility_events.sql
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
-- Migration 119: Product visibility events table
|
||||||
|
-- Tracks OOS, brand drops, and other notable events for alerts
|
||||||
|
-- Part of Real-Time Inventory Tracking feature
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS product_visibility_events (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
dispensary_id INT NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
|
||||||
|
-- Product identification (null for brand-level events)
|
||||||
|
product_id TEXT, -- provider_product_id
|
||||||
|
product_name TEXT, -- For display in alerts
|
||||||
|
|
||||||
|
-- Brand (always populated)
|
||||||
|
brand_name TEXT,
|
||||||
|
|
||||||
|
-- Event details
|
||||||
|
event_type TEXT NOT NULL, -- 'oos', 'back_in_stock', 'brand_dropped', 'brand_added', 'price_change'
|
||||||
|
detected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Context
|
||||||
|
previous_quantity INT, -- For OOS events: what quantity was before
|
||||||
|
previous_price NUMERIC(10,2), -- For price change events
|
||||||
|
new_price NUMERIC(10,2), -- For price change events
|
||||||
|
price_change_pct NUMERIC(5,2), -- Percentage change (e.g., -15.5 for 15.5% decrease)
|
||||||
|
|
||||||
|
-- Platform
|
||||||
|
platform TEXT, -- 'dutchie' | 'jane' | 'treez'
|
||||||
|
|
||||||
|
-- Alert status
|
||||||
|
notified BOOLEAN DEFAULT FALSE, -- Has external system been notified?
|
||||||
|
acknowledged_at TIMESTAMPTZ, -- When user acknowledged the alert
|
||||||
|
acknowledged_by TEXT -- User who acknowledged
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Primary query: recent events by store
|
||||||
|
CREATE INDEX idx_vis_events_store_time ON product_visibility_events(dispensary_id, detected_at DESC);
|
||||||
|
|
||||||
|
-- Alert queries: unnotified events
|
||||||
|
CREATE INDEX idx_vis_events_unnotified ON product_visibility_events(notified, detected_at DESC) WHERE notified = FALSE;
|
||||||
|
|
||||||
|
-- Event type filtering
|
||||||
|
CREATE INDEX idx_vis_events_type ON product_visibility_events(event_type, detected_at DESC);
|
||||||
|
|
||||||
|
-- Brand-level queries
|
||||||
|
CREATE INDEX idx_vis_events_brand ON product_visibility_events(brand_name, event_type, detected_at DESC) WHERE brand_name IS NOT NULL;
|
||||||
|
|
||||||
|
-- Cleanup (90 days retention) - simple index, cleanup job handles the WHERE
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_vis_events_cleanup ON product_visibility_events(detected_at);
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON TABLE product_visibility_events IS 'Notable inventory events for alerting. OOS, brand drops, significant price changes. Retained 90 days.';
|
||||||
|
COMMENT ON COLUMN product_visibility_events.event_type IS 'Event type: oos (out of stock), back_in_stock, brand_dropped, brand_added, price_change';
|
||||||
|
COMMENT ON COLUMN product_visibility_events.notified IS 'Whether external systems (other apps) have been notified of this event';
|
||||||
13
backend/migrations/120_daily_baseline_tracking.sql
Normal file
13
backend/migrations/120_daily_baseline_tracking.sql
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
-- Migration 120: Daily baseline tracking
|
||||||
|
-- Track when each store's daily baseline payload was last saved
|
||||||
|
-- Part of Real-Time Inventory Tracking feature
|
||||||
|
|
||||||
|
-- Add column to track last baseline save time
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_baseline_at TIMESTAMPTZ DEFAULT NULL;
|
||||||
|
|
||||||
|
-- Index for finding stores that need baselines
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_baseline ON dispensaries(last_baseline_at)
|
||||||
|
WHERE crawl_enabled = TRUE;
|
||||||
|
|
||||||
|
-- Comment
|
||||||
|
COMMENT ON COLUMN dispensaries.last_baseline_at IS 'Timestamp of last daily baseline payload save. Baselines saved once per day between 12:01 AM - 3:00 AM.';
|
||||||
239
backend/package-lock.json
generated
239
backend/package-lock.json
generated
@@ -35,6 +35,8 @@
|
|||||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||||
"sharp": "^0.32.0",
|
"sharp": "^0.32.0",
|
||||||
"socks-proxy-agent": "^8.0.2",
|
"socks-proxy-agent": "^8.0.2",
|
||||||
|
"swagger-jsdoc": "^6.2.8",
|
||||||
|
"swagger-ui-express": "^5.0.1",
|
||||||
"user-agents": "^1.1.669",
|
"user-agents": "^1.1.669",
|
||||||
"uuid": "^9.0.1",
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.22.4"
|
"zod": "^3.22.4"
|
||||||
@@ -47,11 +49,53 @@
|
|||||||
"@types/node": "^20.10.5",
|
"@types/node": "^20.10.5",
|
||||||
"@types/node-cron": "^3.0.11",
|
"@types/node-cron": "^3.0.11",
|
||||||
"@types/pg": "^8.15.6",
|
"@types/pg": "^8.15.6",
|
||||||
|
"@types/swagger-jsdoc": "^6.0.4",
|
||||||
|
"@types/swagger-ui-express": "^4.1.8",
|
||||||
"@types/uuid": "^9.0.7",
|
"@types/uuid": "^9.0.7",
|
||||||
"tsx": "^4.7.0",
|
"tsx": "^4.7.0",
|
||||||
"typescript": "^5.3.3"
|
"typescript": "^5.3.3"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@apidevtools/json-schema-ref-parser": {
|
||||||
|
"version": "9.1.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@apidevtools/json-schema-ref-parser/-/json-schema-ref-parser-9.1.2.tgz",
|
||||||
|
"integrity": "sha512-r1w81DpR+KyRWd3f+rk6TNqMgedmAxZP5v5KWlXQWlgMUUtyEJch0DKEci1SorPMiSeM8XPl7MZ3miJ60JIpQg==",
|
||||||
|
"dependencies": {
|
||||||
|
"@jsdevtools/ono": "^7.1.3",
|
||||||
|
"@types/json-schema": "^7.0.6",
|
||||||
|
"call-me-maybe": "^1.0.1",
|
||||||
|
"js-yaml": "^4.1.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@apidevtools/openapi-schemas": {
|
||||||
|
"version": "2.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@apidevtools/openapi-schemas/-/openapi-schemas-2.1.0.tgz",
|
||||||
|
"integrity": "sha512-Zc1AlqrJlX3SlpupFGpiLi2EbteyP7fXmUOGup6/DnkRgjP9bgMM/ag+n91rsv0U1Gpz0H3VILA/o3bW7Ua6BQ==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@apidevtools/swagger-methods": {
|
||||||
|
"version": "3.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-methods/-/swagger-methods-3.0.2.tgz",
|
||||||
|
"integrity": "sha512-QAkD5kK2b1WfjDS/UQn/qQkbwF31uqRjPTrsCs5ZG9BQGAkjwvqGFjjPqAuzac/IYzpPtRzjCP1WrTuAIjMrXg=="
|
||||||
|
},
|
||||||
|
"node_modules/@apidevtools/swagger-parser": {
|
||||||
|
"version": "10.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-parser/-/swagger-parser-10.0.3.tgz",
|
||||||
|
"integrity": "sha512-sNiLY51vZOmSPFZA5TF35KZ2HbgYklQnTSDnkghamzLb3EkNtcQnrBQEj5AOCxHpTtXpqMCRM1CrmV2rG6nw4g==",
|
||||||
|
"dependencies": {
|
||||||
|
"@apidevtools/json-schema-ref-parser": "^9.0.6",
|
||||||
|
"@apidevtools/openapi-schemas": "^2.0.4",
|
||||||
|
"@apidevtools/swagger-methods": "^3.0.2",
|
||||||
|
"@jsdevtools/ono": "^7.1.3",
|
||||||
|
"call-me-maybe": "^1.0.1",
|
||||||
|
"z-schema": "^5.0.1"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"openapi-types": ">=7"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@babel/code-frame": {
|
"node_modules/@babel/code-frame": {
|
||||||
"version": "7.27.1",
|
"version": "7.27.1",
|
||||||
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz",
|
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz",
|
||||||
@@ -494,6 +538,11 @@
|
|||||||
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
||||||
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
||||||
},
|
},
|
||||||
|
"node_modules/@jsdevtools/ono": {
|
||||||
|
"version": "7.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@jsdevtools/ono/-/ono-7.1.3.tgz",
|
||||||
|
"integrity": "sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg=="
|
||||||
|
},
|
||||||
"node_modules/@jsep-plugin/assignment": {
|
"node_modules/@jsep-plugin/assignment": {
|
||||||
"version": "1.3.0",
|
"version": "1.3.0",
|
||||||
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
|
||||||
@@ -761,6 +810,12 @@
|
|||||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
|
||||||
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
|
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
|
||||||
},
|
},
|
||||||
|
"node_modules/@scarf/scarf": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@scarf/scarf/-/scarf-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ==",
|
||||||
|
"hasInstallScript": true
|
||||||
|
},
|
||||||
"node_modules/@tootallnate/quickjs-emscripten": {
|
"node_modules/@tootallnate/quickjs-emscripten": {
|
||||||
"version": "0.23.0",
|
"version": "0.23.0",
|
||||||
"resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz",
|
"resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz",
|
||||||
@@ -855,6 +910,11 @@
|
|||||||
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
||||||
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
|
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/json-schema": {
|
||||||
|
"version": "7.0.15",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
|
||||||
|
"integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA=="
|
||||||
|
},
|
||||||
"node_modules/@types/jsonwebtoken": {
|
"node_modules/@types/jsonwebtoken": {
|
||||||
"version": "9.0.10",
|
"version": "9.0.10",
|
||||||
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
||||||
@@ -960,6 +1020,22 @@
|
|||||||
"@types/node": "*"
|
"@types/node": "*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/swagger-jsdoc": {
|
||||||
|
"version": "6.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/swagger-jsdoc/-/swagger-jsdoc-6.0.4.tgz",
|
||||||
|
"integrity": "sha512-W+Xw5epcOZrF/AooUM/PccNMSAFOKWZA5dasNyMujTwsBkU74njSJBpvCCJhHAJ95XRMzQrrW844Btu0uoetwQ==",
|
||||||
|
"dev": true
|
||||||
|
},
|
||||||
|
"node_modules/@types/swagger-ui-express": {
|
||||||
|
"version": "4.1.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/swagger-ui-express/-/swagger-ui-express-4.1.8.tgz",
|
||||||
|
"integrity": "sha512-AhZV8/EIreHFmBV5wAs0gzJUNq9JbbSXgJLQubCC0jtIo6prnI9MIRRxnU4MZX9RB9yXxF1V4R7jtLl/Wcj31g==",
|
||||||
|
"dev": true,
|
||||||
|
"dependencies": {
|
||||||
|
"@types/express": "*",
|
||||||
|
"@types/serve-static": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/uuid": {
|
"node_modules/@types/uuid": {
|
||||||
"version": "9.0.8",
|
"version": "9.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||||
@@ -1434,6 +1510,11 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/call-me-maybe": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/call-me-maybe/-/call-me-maybe-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-HpX65o1Hnr9HH25ojC1YGs7HCQLq0GCOibSaWER0eNpgJ/Z1MZv2mTc7+xh6WOPxbRVcmgbv4hGU+uSQ/2xFZQ=="
|
||||||
|
},
|
||||||
"node_modules/callsites": {
|
"node_modules/callsites": {
|
||||||
"version": "3.1.0",
|
"version": "3.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
|
||||||
@@ -1594,6 +1675,14 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/commander": {
|
||||||
|
"version": "6.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/commander/-/commander-6.2.0.tgz",
|
||||||
|
"integrity": "sha512-zP4jEKbe8SHzKJYQmq8Y9gYjtO/POJLgIdKgV7B9qNmABVFVc+ctqSX6iXh4mCpJfRBOabiZ2YKPg8ciDw6C+Q==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 6"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/concat-map": {
|
"node_modules/concat-map": {
|
||||||
"version": "0.0.1",
|
"version": "0.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
|
||||||
@@ -1863,6 +1952,17 @@
|
|||||||
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1232444.tgz",
|
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1232444.tgz",
|
||||||
"integrity": "sha512-pM27vqEfxSxRkTMnF+XCmxSEb6duO5R+t8A9DEEJgy4Wz2RVanje2mmj99B6A3zv2r/qGfYlOvYznUhuokizmg=="
|
"integrity": "sha512-pM27vqEfxSxRkTMnF+XCmxSEb6duO5R+t8A9DEEJgy4Wz2RVanje2mmj99B6A3zv2r/qGfYlOvYznUhuokizmg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/doctrine": {
|
||||||
|
"version": "3.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz",
|
||||||
|
"integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==",
|
||||||
|
"dependencies": {
|
||||||
|
"esutils": "^2.0.2"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=6.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/dom-serializer": {
|
"node_modules/dom-serializer": {
|
||||||
"version": "2.0.0",
|
"version": "2.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
|
||||||
@@ -3258,6 +3358,12 @@
|
|||||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||||
"integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ=="
|
"integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ=="
|
||||||
},
|
},
|
||||||
|
"node_modules/lodash.get": {
|
||||||
|
"version": "4.4.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz",
|
||||||
|
"integrity": "sha512-z+Uw/vLuy6gQe8cfaFWD7p0wVv8fJl3mbzXh33RS+0oW2wvUqiRXiQ69gLWSLpgB5/6sU+r6BlQR0MBILadqTQ==",
|
||||||
|
"deprecated": "This package is deprecated. Use the optional chaining (?.) operator instead."
|
||||||
|
},
|
||||||
"node_modules/lodash.includes": {
|
"node_modules/lodash.includes": {
|
||||||
"version": "4.3.0",
|
"version": "4.3.0",
|
||||||
"resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz",
|
||||||
@@ -3273,6 +3379,12 @@
|
|||||||
"resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz",
|
||||||
"integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg=="
|
"integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/lodash.isequal": {
|
||||||
|
"version": "4.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/lodash.isequal/-/lodash.isequal-4.5.0.tgz",
|
||||||
|
"integrity": "sha512-pDo3lu8Jhfjqls6GkMgpahsF9kCyayhgykjyLMNFTKWrpVdAQtYyB4muAMWozBB4ig/dtWAmsMxLEI8wuz+DYQ==",
|
||||||
|
"deprecated": "This package is deprecated. Use require('node:util').isDeepStrictEqual instead."
|
||||||
|
},
|
||||||
"node_modules/lodash.isinteger": {
|
"node_modules/lodash.isinteger": {
|
||||||
"version": "4.0.4",
|
"version": "4.0.4",
|
||||||
"resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz",
|
||||||
@@ -3293,6 +3405,11 @@
|
|||||||
"resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz",
|
||||||
"integrity": "sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw=="
|
"integrity": "sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw=="
|
||||||
},
|
},
|
||||||
|
"node_modules/lodash.mergewith": {
|
||||||
|
"version": "4.6.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/lodash.mergewith/-/lodash.mergewith-4.6.2.tgz",
|
||||||
|
"integrity": "sha512-GK3g5RPZWTRSeLSpgP8Xhra+pnjBC56q9FZYe1d5RN3TJ35dbkGy3YqBSMbyCrlbi+CM9Z3Jk5yTL7RCsqboyQ=="
|
||||||
|
},
|
||||||
"node_modules/lodash.once": {
|
"node_modules/lodash.once": {
|
||||||
"version": "4.1.1",
|
"version": "4.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz",
|
||||||
@@ -3748,6 +3865,12 @@
|
|||||||
"wrappy": "1"
|
"wrappy": "1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/openapi-types": {
|
||||||
|
"version": "12.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz",
|
||||||
|
"integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==",
|
||||||
|
"peer": true
|
||||||
|
},
|
||||||
"node_modules/openid-client": {
|
"node_modules/openid-client": {
|
||||||
"version": "6.8.1",
|
"version": "6.8.1",
|
||||||
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
|
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
|
||||||
@@ -5188,6 +5311,78 @@
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"node_modules/swagger-jsdoc": {
|
||||||
|
"version": "6.2.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/swagger-jsdoc/-/swagger-jsdoc-6.2.8.tgz",
|
||||||
|
"integrity": "sha512-VPvil1+JRpmJ55CgAtn8DIcpBs0bL5L3q5bVQvF4tAW/k/9JYSj7dCpaYCAv5rufe0vcCbBRQXGvzpkWjvLklQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"commander": "6.2.0",
|
||||||
|
"doctrine": "3.0.0",
|
||||||
|
"glob": "7.1.6",
|
||||||
|
"lodash.mergewith": "^4.6.2",
|
||||||
|
"swagger-parser": "^10.0.3",
|
||||||
|
"yaml": "2.0.0-1"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"swagger-jsdoc": "bin/swagger-jsdoc.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/swagger-jsdoc/node_modules/glob": {
|
||||||
|
"version": "7.1.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz",
|
||||||
|
"integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==",
|
||||||
|
"deprecated": "Glob versions prior to v9 are no longer supported",
|
||||||
|
"dependencies": {
|
||||||
|
"fs.realpath": "^1.0.0",
|
||||||
|
"inflight": "^1.0.4",
|
||||||
|
"inherits": "2",
|
||||||
|
"minimatch": "^3.0.4",
|
||||||
|
"once": "^1.3.0",
|
||||||
|
"path-is-absolute": "^1.0.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": "*"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/isaacs"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/swagger-parser": {
|
||||||
|
"version": "10.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/swagger-parser/-/swagger-parser-10.0.3.tgz",
|
||||||
|
"integrity": "sha512-nF7oMeL4KypldrQhac8RyHerJeGPD1p2xDh900GPvc+Nk7nWP6jX2FcC7WmkinMoAmoO774+AFXcWsW8gMWEIg==",
|
||||||
|
"dependencies": {
|
||||||
|
"@apidevtools/swagger-parser": "10.0.3"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/swagger-ui-dist": {
|
||||||
|
"version": "5.31.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/swagger-ui-dist/-/swagger-ui-dist-5.31.0.tgz",
|
||||||
|
"integrity": "sha512-zSUTIck02fSga6rc0RZP3b7J7wgHXwLea8ZjgLA3Vgnb8QeOl3Wou2/j5QkzSGeoz6HusP/coYuJl33aQxQZpg==",
|
||||||
|
"dependencies": {
|
||||||
|
"@scarf/scarf": "=1.4.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/swagger-ui-express": {
|
||||||
|
"version": "5.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/swagger-ui-express/-/swagger-ui-express-5.0.1.tgz",
|
||||||
|
"integrity": "sha512-SrNU3RiBGTLLmFU8GIJdOdanJTl4TOmT27tt3bWWHppqYmAZ6IDuEuBvMU6nZq0zLEe6b/1rACXCgLZqO6ZfrA==",
|
||||||
|
"dependencies": {
|
||||||
|
"swagger-ui-dist": ">=5.0.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= v0.10.32"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"express": ">=4.0.0 || >=5.0.0-beta"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/tar": {
|
"node_modules/tar": {
|
||||||
"version": "6.2.1",
|
"version": "6.2.1",
|
||||||
"resolved": "https://registry.npmjs.org/tar/-/tar-6.2.1.tgz",
|
"resolved": "https://registry.npmjs.org/tar/-/tar-6.2.1.tgz",
|
||||||
@@ -5406,6 +5601,14 @@
|
|||||||
"uuid": "dist/bin/uuid"
|
"uuid": "dist/bin/uuid"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/validator": {
|
||||||
|
"version": "13.15.23",
|
||||||
|
"resolved": "https://registry.npmjs.org/validator/-/validator-13.15.23.tgz",
|
||||||
|
"integrity": "sha512-4yoz1kEWqUjzi5zsPbAS/903QXSYp0UOtHsPpp7p9rHAw/W+dkInskAE386Fat3oKRROwO98d9ZB0G4cObgUyw==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.10"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/vary": {
|
"node_modules/vary": {
|
||||||
"version": "1.1.2",
|
"version": "1.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
|
||||||
@@ -5584,6 +5787,14 @@
|
|||||||
"resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
|
||||||
"integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
|
"integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
|
||||||
},
|
},
|
||||||
|
"node_modules/yaml": {
|
||||||
|
"version": "2.0.0-1",
|
||||||
|
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.0.0-1.tgz",
|
||||||
|
"integrity": "sha512-W7h5dEhywMKenDJh2iX/LABkbFnBxasD27oyXWDS/feDsxiw0dD5ncXdYXgkvAsXIY2MpW/ZKkr9IU30DBdMNQ==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 6"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/yargs": {
|
"node_modules/yargs": {
|
||||||
"version": "17.7.2",
|
"version": "17.7.2",
|
||||||
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
|
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
|
||||||
@@ -5618,6 +5829,34 @@
|
|||||||
"fd-slicer": "~1.1.0"
|
"fd-slicer": "~1.1.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/z-schema": {
|
||||||
|
"version": "5.0.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/z-schema/-/z-schema-5.0.5.tgz",
|
||||||
|
"integrity": "sha512-D7eujBWkLa3p2sIpJA0d1pr7es+a7m0vFAnZLlCEKq/Ij2k0MLi9Br2UPxoxdYystm5K1yeBGzub0FlYUEWj2Q==",
|
||||||
|
"dependencies": {
|
||||||
|
"lodash.get": "^4.4.2",
|
||||||
|
"lodash.isequal": "^4.5.0",
|
||||||
|
"validator": "^13.7.0"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"z-schema": "bin/z-schema"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=8.0.0"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"commander": "^9.4.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/z-schema/node_modules/commander": {
|
||||||
|
"version": "9.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/commander/-/commander-9.5.0.tgz",
|
||||||
|
"integrity": "sha512-KRs7WVDKg86PWiuAqhDrAQnTXZKraVcCc6vFdL14qrZ/DcWwuRo7VoiYXalXO7S5GKpqYiVEwCbgFDfxNHKJBQ==",
|
||||||
|
"optional": true,
|
||||||
|
"engines": {
|
||||||
|
"node": "^12.20.0 || >=14"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/zod": {
|
"node_modules/zod": {
|
||||||
"version": "3.25.76",
|
"version": "3.25.76",
|
||||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
|
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
|
||||||
|
|||||||
@@ -49,6 +49,8 @@
|
|||||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||||
"sharp": "^0.32.0",
|
"sharp": "^0.32.0",
|
||||||
"socks-proxy-agent": "^8.0.2",
|
"socks-proxy-agent": "^8.0.2",
|
||||||
|
"swagger-jsdoc": "^6.2.8",
|
||||||
|
"swagger-ui-express": "^5.0.1",
|
||||||
"user-agents": "^1.1.669",
|
"user-agents": "^1.1.669",
|
||||||
"uuid": "^9.0.1",
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.22.4"
|
"zod": "^3.22.4"
|
||||||
@@ -61,6 +63,8 @@
|
|||||||
"@types/node": "^20.10.5",
|
"@types/node": "^20.10.5",
|
||||||
"@types/node-cron": "^3.0.11",
|
"@types/node-cron": "^3.0.11",
|
||||||
"@types/pg": "^8.15.6",
|
"@types/pg": "^8.15.6",
|
||||||
|
"@types/swagger-jsdoc": "^6.0.4",
|
||||||
|
"@types/swagger-ui-express": "^4.1.8",
|
||||||
"@types/uuid": "^9.0.7",
|
"@types/uuid": "^9.0.7",
|
||||||
"tsx": "^4.7.0",
|
"tsx": "^4.7.0",
|
||||||
"typescript": "^5.3.3"
|
"typescript": "^5.3.3"
|
||||||
|
|||||||
BIN
backend/public/downloads/cannaiq-menus-1.7.0.zip
Normal file
BIN
backend/public/downloads/cannaiq-menus-1.7.0.zip
Normal file
Binary file not shown.
@@ -1 +1 @@
|
|||||||
cannaiq-menus-1.6.0.zip
|
cannaiq-menus-1.7.0.zip
|
||||||
130
backend/scripts/count-jane-stores-v2.ts
Normal file
130
backend/scripts/count-jane-stores-v2.ts
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
/**
|
||||||
|
* Count Jane stores - v2: Try Algolia store search
|
||||||
|
* Usage: npx ts-node scripts/count-jane-stores-v2.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer from 'puppeteer-extra';
|
||||||
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||||
|
|
||||||
|
puppeteer.use(StealthPlugin());
|
||||||
|
|
||||||
|
const STATES = [
|
||||||
|
'AZ', 'CA', 'CO', 'FL', 'IL', 'MA', 'MI', 'NV', 'NJ', 'NY', 'OH', 'PA', 'WA', 'OR'
|
||||||
|
];
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('Counting Jane stores by exploring state pages...\n');
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
const allStores: Map<number, any> = new Map();
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
const type = req.resourceType();
|
||||||
|
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
page.on('response', async (response) => {
|
||||||
|
const url = response.url();
|
||||||
|
const contentType = response.headers()['content-type'] || '';
|
||||||
|
if (url.includes('iheartjane.com') && contentType.includes('json')) {
|
||||||
|
try {
|
||||||
|
const json = await response.json();
|
||||||
|
// Look for stores in any response
|
||||||
|
if (json.stores && Array.isArray(json.stores)) {
|
||||||
|
for (const s of json.stores) {
|
||||||
|
if (s.id) allStores.set(s.id, s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Also check hits (Algolia format)
|
||||||
|
if (json.hits && Array.isArray(json.hits)) {
|
||||||
|
for (const s of json.hits) {
|
||||||
|
if (s.id) allStores.set(s.id, s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// First visit the main stores page
|
||||||
|
console.log('Visiting main stores page...');
|
||||||
|
await page.goto('https://www.iheartjane.com/stores', {
|
||||||
|
waitUntil: 'networkidle0',
|
||||||
|
timeout: 60000,
|
||||||
|
});
|
||||||
|
await new Promise(r => setTimeout(r, 3000));
|
||||||
|
|
||||||
|
// Try to scroll to load more stores
|
||||||
|
console.log('Scrolling to load more...');
|
||||||
|
for (let i = 0; i < 5; i++) {
|
||||||
|
await page.evaluate(() => window.scrollBy(0, 1000));
|
||||||
|
await new Promise(r => setTimeout(r, 1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try clicking "Load More" if it exists
|
||||||
|
try {
|
||||||
|
const loadMore = await page.$('button:has-text("Load More"), [class*="load-more"]');
|
||||||
|
if (loadMore) {
|
||||||
|
console.log('Clicking Load More...');
|
||||||
|
await loadMore.click();
|
||||||
|
await new Promise(r => setTimeout(r, 3000));
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
|
||||||
|
// Extract stores from DOM as fallback
|
||||||
|
const domStores = await page.evaluate(() => {
|
||||||
|
const storeElements = document.querySelectorAll('[data-store-id], [class*="StoreCard"], [class*="store-card"]');
|
||||||
|
return storeElements.length;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`\nStores from DOM elements: ${domStores}`);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
// Count by state
|
||||||
|
const byState: Record<string, number> = {};
|
||||||
|
for (const store of allStores.values()) {
|
||||||
|
const state = store.state || 'Unknown';
|
||||||
|
byState[state] = (byState[state] || 0) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n=== JANE STORE COUNTS ===\n');
|
||||||
|
console.log(`Unique stores captured: ${allStores.size}`);
|
||||||
|
|
||||||
|
if (allStores.size > 0) {
|
||||||
|
console.log('\nBy State:');
|
||||||
|
const sorted = Object.entries(byState).sort((a, b) => b[1] - a[1]);
|
||||||
|
for (const [state, count] of sorted.slice(0, 20)) {
|
||||||
|
console.log(` ${state}: ${count}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check Arizona specifically
|
||||||
|
const azStores = Array.from(allStores.values()).filter(s =>
|
||||||
|
s.state === 'Arizona' || s.state === 'AZ'
|
||||||
|
);
|
||||||
|
console.log(`\nArizona stores: ${azStores.length}`);
|
||||||
|
if (azStores.length > 0) {
|
||||||
|
console.log('AZ stores:');
|
||||||
|
for (const s of azStores.slice(0, 10)) {
|
||||||
|
console.log(` - ${s.name} (ID: ${s.id}) - ${s.city}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note about total
|
||||||
|
console.log('\n--- Note ---');
|
||||||
|
console.log('Jane uses server-side rendering. To get full store count,');
|
||||||
|
console.log('you may need to check their public marketing materials or');
|
||||||
|
console.log('iterate through known store IDs.');
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
98
backend/scripts/count-jane-stores.ts
Normal file
98
backend/scripts/count-jane-stores.ts
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
/**
|
||||||
|
* Count Jane stores by state
|
||||||
|
* Usage: npx ts-node scripts/count-jane-stores.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer from 'puppeteer-extra';
|
||||||
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||||
|
|
||||||
|
puppeteer.use(StealthPlugin());
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('Counting Jane stores...\n');
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
// Capture store data from API
|
||||||
|
const stores: any[] = [];
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
const type = req.resourceType();
|
||||||
|
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
page.on('response', async (response) => {
|
||||||
|
const url = response.url();
|
||||||
|
if (url.includes('iheartjane.com') && url.includes('stores')) {
|
||||||
|
try {
|
||||||
|
const json = await response.json();
|
||||||
|
if (json.stores && Array.isArray(json.stores)) {
|
||||||
|
stores.push(...json.stores);
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Visit the store directory
|
||||||
|
console.log('Loading Jane store directory...');
|
||||||
|
await page.goto('https://www.iheartjane.com/stores', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait for stores to load
|
||||||
|
await new Promise(r => setTimeout(r, 5000));
|
||||||
|
|
||||||
|
// Also try to get store count from page content
|
||||||
|
const pageStoreCount = await page.evaluate(() => {
|
||||||
|
// Look for store count in page text
|
||||||
|
const text = document.body.innerText;
|
||||||
|
const match = text.match(/(\d+)\s*stores?/i);
|
||||||
|
return match ? parseInt(match[1]) : null;
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
// Count by state
|
||||||
|
const byState: Record<string, number> = {};
|
||||||
|
for (const store of stores) {
|
||||||
|
const state = store.state || 'Unknown';
|
||||||
|
byState[state] = (byState[state] || 0) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n=== JANE STORE COUNTS ===\n');
|
||||||
|
console.log(`Total stores captured from API: ${stores.length}`);
|
||||||
|
if (pageStoreCount) {
|
||||||
|
console.log(`Page claims: ${pageStoreCount} stores`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\nBy State:');
|
||||||
|
const sorted = Object.entries(byState).sort((a, b) => b[1] - a[1]);
|
||||||
|
for (const [state, count] of sorted) {
|
||||||
|
console.log(` ${state}: ${count}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check Arizona specifically
|
||||||
|
const azStores = stores.filter(s =>
|
||||||
|
s.state === 'Arizona' || s.state === 'AZ'
|
||||||
|
);
|
||||||
|
console.log(`\nArizona stores: ${azStores.length}`);
|
||||||
|
if (azStores.length > 0) {
|
||||||
|
console.log('Sample AZ stores:');
|
||||||
|
for (const s of azStores.slice(0, 5)) {
|
||||||
|
console.log(` - ${s.name} (ID: ${s.id}) - ${s.city}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
184
backend/scripts/explore-treez-pages.ts
Normal file
184
backend/scripts/explore-treez-pages.ts
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
/**
|
||||||
|
* Explore all Treez page URLs to find the full product catalog
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
|
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function bypassAgeGate(page: Page): Promise<void> {
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log(' Age gate detected, bypassing...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function countProducts(page: Page): Promise<number> {
|
||||||
|
return page.evaluate(() =>
|
||||||
|
document.querySelectorAll('[class*="product_product__"]').length
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrollAndCount(page: Page, maxScrolls: number = 30): Promise<{ products: number; scrolls: number }> {
|
||||||
|
let previousHeight = 0;
|
||||||
|
let scrollCount = 0;
|
||||||
|
let sameHeightCount = 0;
|
||||||
|
|
||||||
|
while (scrollCount < maxScrolls) {
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
sameHeightCount++;
|
||||||
|
if (sameHeightCount >= 3) break;
|
||||||
|
} else {
|
||||||
|
sameHeightCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1500);
|
||||||
|
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
scrollCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
const products = await countProducts(page);
|
||||||
|
return { products, scrolls: scrollCount };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function testUrl(page: Page, path: string): Promise<{ products: number; scrolls: number; error?: string }> {
|
||||||
|
const url = `https://${STORE_ID}.treez.io${path}`;
|
||||||
|
console.log(`\nTesting: ${url}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
await sleep(2000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(1000);
|
||||||
|
|
||||||
|
const initialCount = await countProducts(page);
|
||||||
|
console.log(` Initial products: ${initialCount}`);
|
||||||
|
|
||||||
|
if (initialCount > 0) {
|
||||||
|
const result = await scrollAndCount(page);
|
||||||
|
console.log(` After scroll: ${result.products} products (${result.scrolls} scrolls)`);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for brand/category cards instead
|
||||||
|
const cardCount = await page.evaluate(() => {
|
||||||
|
const selectors = [
|
||||||
|
'[class*="brand"]',
|
||||||
|
'[class*="Brand"]',
|
||||||
|
'[class*="category"]',
|
||||||
|
'[class*="Category"]',
|
||||||
|
'[class*="card"]',
|
||||||
|
'a[href*="/brand/"]',
|
||||||
|
'a[href*="/category/"]',
|
||||||
|
];
|
||||||
|
let count = 0;
|
||||||
|
selectors.forEach(sel => {
|
||||||
|
count += document.querySelectorAll(sel).length;
|
||||||
|
});
|
||||||
|
return count;
|
||||||
|
});
|
||||||
|
console.log(` Cards/links found: ${cardCount}`);
|
||||||
|
|
||||||
|
return { products: initialCount, scrolls: 0 };
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log(` Error: ${error.message}`);
|
||||||
|
return { products: 0, scrolls: 0, error: error.message };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Exploring Treez Page URLs');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
// Block images to speed up
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media', 'stylesheet'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const urlsToTest = [
|
||||||
|
'/onlinemenu/?customerType=ADULT', // Homepage
|
||||||
|
'/onlinemenu/brands?customerType=ADULT', // Brands page
|
||||||
|
'/onlinemenu/shop?customerType=ADULT', // Shop page?
|
||||||
|
'/onlinemenu/products?customerType=ADULT', // Products page?
|
||||||
|
'/onlinemenu/menu?customerType=ADULT', // Menu page?
|
||||||
|
'/onlinemenu/all?customerType=ADULT', // All products?
|
||||||
|
'/onlinemenu/flower?customerType=ADULT', // Flower category
|
||||||
|
'/onlinemenu/vapes?customerType=ADULT', // Vapes category
|
||||||
|
'/onlinemenu/edibles?customerType=ADULT', // Edibles category
|
||||||
|
'/onlinemenu/concentrates?customerType=ADULT', // Concentrates category
|
||||||
|
];
|
||||||
|
|
||||||
|
const results: { path: string; products: number; scrolls: number }[] = [];
|
||||||
|
|
||||||
|
for (const path of urlsToTest) {
|
||||||
|
const result = await testUrl(page, path);
|
||||||
|
results.push({ path, ...result });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for navigation links on the main page
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('Checking navigation structure on homepage...');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
await page.goto(`https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`, {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 30000,
|
||||||
|
});
|
||||||
|
await sleep(2000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(1000);
|
||||||
|
|
||||||
|
const navLinks = await page.evaluate(() => {
|
||||||
|
const links: { text: string; href: string }[] = [];
|
||||||
|
document.querySelectorAll('a[href*="/onlinemenu/"]').forEach(el => {
|
||||||
|
const text = el.textContent?.trim() || '';
|
||||||
|
const href = el.getAttribute('href') || '';
|
||||||
|
if (text && !links.some(l => l.href === href)) {
|
||||||
|
links.push({ text: text.slice(0, 50), href });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return links;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\nNavigation links found:');
|
||||||
|
navLinks.forEach(l => console.log(` "${l.text}" → ${l.href}`));
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('Summary');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
results.sort((a, b) => b.products - a.products);
|
||||||
|
results.forEach(r => {
|
||||||
|
console.log(`${r.products.toString().padStart(4)} products | ${r.path}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
247
backend/scripts/explore-treez-structure.ts
Normal file
247
backend/scripts/explore-treez-structure.ts
Normal file
@@ -0,0 +1,247 @@
|
|||||||
|
/**
|
||||||
|
* Explore Treez site structure to find full product catalog
|
||||||
|
*
|
||||||
|
* Usage: npx ts-node scripts/explore-treez-structure.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Exploring Treez Site Structure');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Navigate to base menu URL
|
||||||
|
const baseUrl = `https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`;
|
||||||
|
console.log(`\n[1] Navigating to: ${baseUrl}`);
|
||||||
|
await page.goto(baseUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate if present
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log('[1] Age gate detected, bypassing...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all navigation links
|
||||||
|
console.log('\n[2] Extracting navigation structure...');
|
||||||
|
const navInfo = await page.evaluate(() => {
|
||||||
|
const links: { text: string; href: string }[] = [];
|
||||||
|
|
||||||
|
// Look for nav links
|
||||||
|
document.querySelectorAll('nav a, [class*="nav"] a, [class*="menu"] a, header a').forEach(el => {
|
||||||
|
const text = el.textContent?.trim() || '';
|
||||||
|
const href = el.getAttribute('href') || '';
|
||||||
|
if (text && href && !links.some(l => l.href === href)) {
|
||||||
|
links.push({ text, href });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Look for category tabs/buttons
|
||||||
|
document.querySelectorAll('[class*="category"], [class*="tab"], [role="tab"]').forEach(el => {
|
||||||
|
const text = el.textContent?.trim() || '';
|
||||||
|
const href = el.getAttribute('href') || el.getAttribute('data-href') || '';
|
||||||
|
if (text && !links.some(l => l.text === text)) {
|
||||||
|
links.push({ text, href: href || `(click: ${el.className})` });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get current URL
|
||||||
|
const currentUrl = window.location.href;
|
||||||
|
|
||||||
|
// Count products on page
|
||||||
|
const productCount = document.querySelectorAll('[class*="product_product__"]').length;
|
||||||
|
|
||||||
|
return { links, currentUrl, productCount };
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Current URL: ${navInfo.currentUrl}`);
|
||||||
|
console.log(`Products on homepage: ${navInfo.productCount}`);
|
||||||
|
console.log('\nNavigation links found:');
|
||||||
|
navInfo.links.forEach(l => {
|
||||||
|
console.log(` "${l.text}" → ${l.href}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Look for category buttons/tabs specifically
|
||||||
|
console.log('\n[3] Looking for category navigation...');
|
||||||
|
const categories = await page.evaluate(() => {
|
||||||
|
const cats: { text: string; className: string; tagName: string }[] = [];
|
||||||
|
|
||||||
|
// Find all clickable elements that might be categories
|
||||||
|
const selectors = [
|
||||||
|
'[class*="CategoryNav"]',
|
||||||
|
'[class*="category"]',
|
||||||
|
'[class*="Category"]',
|
||||||
|
'[class*="nav"] button',
|
||||||
|
'[class*="tab"]',
|
||||||
|
'[role="tablist"] *',
|
||||||
|
'.MuiTab-root',
|
||||||
|
'[class*="filter"]',
|
||||||
|
];
|
||||||
|
|
||||||
|
selectors.forEach(sel => {
|
||||||
|
document.querySelectorAll(sel).forEach(el => {
|
||||||
|
const text = el.textContent?.trim() || '';
|
||||||
|
if (text && text.length < 50 && !cats.some(c => c.text === text)) {
|
||||||
|
cats.push({
|
||||||
|
text,
|
||||||
|
className: el.className?.toString().slice(0, 80) || '',
|
||||||
|
tagName: el.tagName,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return cats;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Category-like elements:');
|
||||||
|
categories.forEach(c => {
|
||||||
|
console.log(` [${c.tagName}] "${c.text}" (class: ${c.className})`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Try clicking on "Flower" or "All" if found
|
||||||
|
console.log('\n[4] Looking for "Flower" or "All Products" link...');
|
||||||
|
const clickTargets = ['Flower', 'All', 'All Products', 'Shop All', 'View All'];
|
||||||
|
|
||||||
|
for (const target of clickTargets) {
|
||||||
|
const element = await page.evaluate((targetText) => {
|
||||||
|
const els = Array.from(document.querySelectorAll('a, button, [role="tab"], [class*="category"]'));
|
||||||
|
const match = els.find(el =>
|
||||||
|
el.textContent?.trim().toLowerCase() === targetText.toLowerCase()
|
||||||
|
);
|
||||||
|
if (match) {
|
||||||
|
return {
|
||||||
|
found: true,
|
||||||
|
text: match.textContent?.trim(),
|
||||||
|
tag: match.tagName,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return { found: false };
|
||||||
|
}, target);
|
||||||
|
|
||||||
|
if (element.found) {
|
||||||
|
console.log(`Found "${element.text}" (${element.tag}), clicking...`);
|
||||||
|
|
||||||
|
await page.evaluate((targetText) => {
|
||||||
|
const els = Array.from(document.querySelectorAll('a, button, [role="tab"], [class*="category"]'));
|
||||||
|
const match = els.find(el =>
|
||||||
|
el.textContent?.trim().toLowerCase() === targetText.toLowerCase()
|
||||||
|
);
|
||||||
|
if (match) (match as HTMLElement).click();
|
||||||
|
}, target);
|
||||||
|
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
const newUrl = page.url();
|
||||||
|
const newCount = await page.evaluate(() =>
|
||||||
|
document.querySelectorAll('[class*="product_product__"]').length
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` New URL: ${newUrl}`);
|
||||||
|
console.log(` Products after click: ${newCount}`);
|
||||||
|
|
||||||
|
if (newCount > navInfo.productCount) {
|
||||||
|
console.log(` ✓ Found more products! (${navInfo.productCount} → ${newCount})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check page height and scroll behavior
|
||||||
|
console.log('\n[5] Checking scroll behavior on current page...');
|
||||||
|
let previousHeight = 0;
|
||||||
|
let scrollCount = 0;
|
||||||
|
let previousProductCount = await page.evaluate(() =>
|
||||||
|
document.querySelectorAll('[class*="product_product__"]').length
|
||||||
|
);
|
||||||
|
|
||||||
|
while (scrollCount < 10) {
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
console.log(` Scroll ${scrollCount + 1}: No height change, stopping`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1500);
|
||||||
|
|
||||||
|
const currentProductCount = await page.evaluate(() =>
|
||||||
|
document.querySelectorAll('[class*="product_product__"]').length
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` Scroll ${scrollCount + 1}: height=${currentHeight}, products=${currentProductCount}`);
|
||||||
|
|
||||||
|
if (currentProductCount === previousProductCount && scrollCount > 2) {
|
||||||
|
console.log(' No new products loading, stopping');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
previousProductCount = currentProductCount;
|
||||||
|
scrollCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try direct URL patterns
|
||||||
|
console.log('\n[6] Testing URL patterns...');
|
||||||
|
const urlPatterns = [
|
||||||
|
'/onlinemenu/flower?customerType=ADULT',
|
||||||
|
'/onlinemenu/all?customerType=ADULT',
|
||||||
|
'/onlinemenu?category=flower&customerType=ADULT',
|
||||||
|
'/onlinemenu?view=all&customerType=ADULT',
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const pattern of urlPatterns) {
|
||||||
|
const testUrl = `https://${STORE_ID}.treez.io${pattern}`;
|
||||||
|
console.log(`\nTrying: ${testUrl}`);
|
||||||
|
|
||||||
|
await page.goto(testUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
// Bypass age gate again if needed
|
||||||
|
const gate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (gate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
const productCount = await page.evaluate(() =>
|
||||||
|
document.querySelectorAll('[class*="product_product__"]').length
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` Products found: ${productCount}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Screenshot the final state
|
||||||
|
await page.screenshot({ path: '/tmp/treez-explore.png', fullPage: true });
|
||||||
|
console.log('\n[7] Screenshot saved to /tmp/treez-explore.png');
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Error:', error.message);
|
||||||
|
} finally {
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
138
backend/scripts/run-jane-product-discovery.ts
Normal file
138
backend/scripts/run-jane-product-discovery.ts
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
/**
|
||||||
|
* Run Jane product discovery for stores in database
|
||||||
|
* Usage: npx ts-node scripts/run-jane-product-discovery.ts [DISPENSARY_ID]
|
||||||
|
* Example: npx ts-node scripts/run-jane-product-discovery.ts 4220
|
||||||
|
* Or run for all Jane stores: npx ts-node scripts/run-jane-product-discovery.ts all
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import { fetchProductsByStoreIdDirect } from '../src/platforms/jane';
|
||||||
|
import { saveRawPayload } from '../src/utils/payload-storage';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const arg = process.argv[2];
|
||||||
|
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Jane Product Discovery');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const pool = new Pool({
|
||||||
|
connectionString: process.env.DATABASE_URL,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get dispensaries to process
|
||||||
|
let dispensaries: any[];
|
||||||
|
|
||||||
|
if (arg === 'all') {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT id, name, menu_url, platform_dispensary_id
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE platform = 'jane' AND menu_url IS NOT NULL
|
||||||
|
ORDER BY id`
|
||||||
|
);
|
||||||
|
dispensaries = result.rows;
|
||||||
|
} else if (arg) {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT id, name, menu_url, platform_dispensary_id
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1`,
|
||||||
|
[parseInt(arg)]
|
||||||
|
);
|
||||||
|
dispensaries = result.rows;
|
||||||
|
} else {
|
||||||
|
// Default: get first Jane store
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT id, name, menu_url, platform_dispensary_id
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE platform = 'jane' AND menu_url IS NOT NULL
|
||||||
|
ORDER BY id LIMIT 1`
|
||||||
|
);
|
||||||
|
dispensaries = result.rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dispensaries.length === 0) {
|
||||||
|
console.log('No Jane dispensaries found');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Processing ${dispensaries.length} dispensary(ies)...\n`);
|
||||||
|
|
||||||
|
let successCount = 0;
|
||||||
|
let failCount = 0;
|
||||||
|
|
||||||
|
for (const disp of dispensaries) {
|
||||||
|
console.log(`\n${'─'.repeat(60)}`);
|
||||||
|
console.log(`${disp.name} (ID: ${disp.id}, Jane ID: ${disp.platform_dispensary_id})`);
|
||||||
|
console.log('─'.repeat(60));
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await fetchProductsByStoreIdDirect(disp.platform_dispensary_id);
|
||||||
|
|
||||||
|
if (result.products.length === 0) {
|
||||||
|
console.log(' ✗ No products captured');
|
||||||
|
failCount++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(` ✓ Captured ${result.products.length} products`);
|
||||||
|
|
||||||
|
// Build payload
|
||||||
|
const rawPayload = {
|
||||||
|
hits: result.products.map(p => p.raw),
|
||||||
|
store: result.store?.raw || null,
|
||||||
|
capturedAt: new Date().toISOString(),
|
||||||
|
platform: 'jane',
|
||||||
|
dispensaryId: disp.id,
|
||||||
|
storeId: disp.platform_dispensary_id,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Save payload
|
||||||
|
const { id: payloadId, sizeBytes } = await saveRawPayload(
|
||||||
|
pool,
|
||||||
|
disp.id,
|
||||||
|
rawPayload,
|
||||||
|
null,
|
||||||
|
result.products.length,
|
||||||
|
'jane'
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` ✓ Saved payload ${payloadId} (${Math.round(sizeBytes / 1024)}KB)`);
|
||||||
|
|
||||||
|
// Update dispensary
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE dispensaries
|
||||||
|
SET stage = 'hydrating',
|
||||||
|
last_fetch_at = NOW(),
|
||||||
|
product_count = $2,
|
||||||
|
consecutive_successes = consecutive_successes + 1,
|
||||||
|
consecutive_failures = 0,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1`,
|
||||||
|
[disp.id, result.products.length]
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` ✓ Updated dispensary (product_count: ${result.products.length})`);
|
||||||
|
successCount++;
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log(` ✗ Error: ${error.message}`);
|
||||||
|
failCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('RESULTS');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Success: ${successCount}`);
|
||||||
|
console.log(`Failed: ${failCount}`);
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Error:', error.message);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
137
backend/scripts/run-jane-store-discovery.ts
Normal file
137
backend/scripts/run-jane-store-discovery.ts
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
/**
|
||||||
|
* Run Jane store discovery and insert into database
|
||||||
|
* Usage: npx ts-node scripts/run-jane-store-discovery.ts [STATE_CODE]
|
||||||
|
* Example: npx ts-node scripts/run-jane-store-discovery.ts AZ
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import { discoverStoresByState } from '../src/platforms/jane';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate slug from store name
|
||||||
|
* e.g., "Hana Meds - Phoenix (REC)" -> "hana-meds-phoenix-rec"
|
||||||
|
*/
|
||||||
|
function generateSlug(name: string): string {
|
||||||
|
return name
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[()]/g, '') // Remove parentheses
|
||||||
|
.replace(/[^a-z0-9\s-]/g, '') // Remove special chars
|
||||||
|
.replace(/\s+/g, '-') // Spaces to hyphens
|
||||||
|
.replace(/-+/g, '-') // Collapse multiple hyphens
|
||||||
|
.replace(/^-|-$/g, ''); // Trim hyphens
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const stateCode = process.argv[2] || 'AZ';
|
||||||
|
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Jane Store Discovery - ${stateCode}`);
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
// Connect to database
|
||||||
|
const pool = new Pool({
|
||||||
|
connectionString: process.env.DATABASE_URL,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Test connection
|
||||||
|
const testResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']);
|
||||||
|
console.log(`Current Jane stores in DB: ${testResult.rows[0].count}`);
|
||||||
|
|
||||||
|
// Discover stores
|
||||||
|
console.log(`\nDiscovering Jane stores in ${stateCode}...`);
|
||||||
|
const stores = await discoverStoresByState(stateCode);
|
||||||
|
|
||||||
|
if (stores.length === 0) {
|
||||||
|
console.log(`No stores found in ${stateCode}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\nFound ${stores.length} stores. Inserting into database...`);
|
||||||
|
|
||||||
|
// Insert stores
|
||||||
|
let inserted = 0;
|
||||||
|
let updated = 0;
|
||||||
|
const newIds: number[] = [];
|
||||||
|
|
||||||
|
for (const store of stores) {
|
||||||
|
const menuUrl = `https://www.iheartjane.com/stores/${store.storeId}/${store.urlSlug || 'menu'}`;
|
||||||
|
const slug = generateSlug(store.name);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await pool.query(
|
||||||
|
`INSERT INTO dispensaries (
|
||||||
|
name, slug, address1, city, state, zipcode,
|
||||||
|
latitude, longitude, menu_url, menu_type, platform,
|
||||||
|
platform_dispensary_id, is_medical, is_recreational,
|
||||||
|
stage, created_at, updated_at
|
||||||
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, NOW(), NOW())
|
||||||
|
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
|
||||||
|
DO UPDATE SET
|
||||||
|
name = EXCLUDED.name,
|
||||||
|
slug = EXCLUDED.slug,
|
||||||
|
address1 = EXCLUDED.address1,
|
||||||
|
city = EXCLUDED.city,
|
||||||
|
latitude = EXCLUDED.latitude,
|
||||||
|
longitude = EXCLUDED.longitude,
|
||||||
|
menu_url = EXCLUDED.menu_url,
|
||||||
|
is_medical = EXCLUDED.is_medical,
|
||||||
|
is_recreational = EXCLUDED.is_recreational,
|
||||||
|
updated_at = NOW()
|
||||||
|
RETURNING id, (xmax = 0) AS is_new`,
|
||||||
|
[
|
||||||
|
store.name,
|
||||||
|
slug,
|
||||||
|
store.address,
|
||||||
|
store.city,
|
||||||
|
stateCode,
|
||||||
|
store.zip,
|
||||||
|
store.lat,
|
||||||
|
store.long,
|
||||||
|
menuUrl,
|
||||||
|
'embedded', // menu_type: how it's displayed
|
||||||
|
'jane', // platform: who provides the menu
|
||||||
|
store.storeId,
|
||||||
|
store.medical,
|
||||||
|
store.recreational,
|
||||||
|
'discovered',
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length > 0) {
|
||||||
|
const { id, is_new } = result.rows[0];
|
||||||
|
if (is_new) {
|
||||||
|
inserted++;
|
||||||
|
newIds.push(id);
|
||||||
|
console.log(` + Inserted: ${store.name} (DB ID: ${id}, Jane ID: ${store.storeId})`);
|
||||||
|
} else {
|
||||||
|
updated++;
|
||||||
|
console.log(` ~ Updated: ${store.name} (DB ID: ${id})`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(` ! Error inserting ${store.name}: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('RESULTS');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Stores discovered: ${stores.length}`);
|
||||||
|
console.log(`New stores inserted: ${inserted}`);
|
||||||
|
console.log(`Existing stores updated: ${updated}`);
|
||||||
|
console.log(`New dispensary IDs: ${newIds.join(', ') || '(none)'}`);
|
||||||
|
|
||||||
|
// Show final count
|
||||||
|
const finalResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']);
|
||||||
|
console.log(`\nTotal Jane stores in DB: ${finalResult.rows[0].count}`);
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Error:', error.message);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
179
backend/scripts/test-all-brands-scroll.ts
Normal file
179
backend/scripts/test-all-brands-scroll.ts
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('Loading ALL brands from https://shop.bestdispensary.com/brands');
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log('Age gate detected, bypassing...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Current URL:', page.url());
|
||||||
|
|
||||||
|
// Get initial brand count
|
||||||
|
let brandCount = await page.evaluate(() => {
|
||||||
|
const seen = new Set<string>();
|
||||||
|
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||||
|
const href = a.getAttribute('href');
|
||||||
|
if (href) seen.add(href);
|
||||||
|
});
|
||||||
|
return seen.size;
|
||||||
|
});
|
||||||
|
console.log(`Initial brand count: ${brandCount}`);
|
||||||
|
|
||||||
|
// Aggressive scrolling
|
||||||
|
console.log('\nScrolling to load ALL brands...');
|
||||||
|
let previousCount = 0;
|
||||||
|
let sameCount = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < 50; i++) {
|
||||||
|
// Scroll to bottom
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1000);
|
||||||
|
|
||||||
|
brandCount = await page.evaluate(() => {
|
||||||
|
const seen = new Set<string>();
|
||||||
|
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||||
|
const href = a.getAttribute('href');
|
||||||
|
if (href) seen.add(href);
|
||||||
|
});
|
||||||
|
return seen.size;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (brandCount === previousCount) {
|
||||||
|
sameCount++;
|
||||||
|
if (sameCount >= 5) {
|
||||||
|
console.log(` Scroll ${i+1}: ${brandCount} brands (stopping - no change)`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sameCount = 0;
|
||||||
|
console.log(` Scroll ${i+1}: ${brandCount} brands`);
|
||||||
|
}
|
||||||
|
previousCount = brandCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all unique brands
|
||||||
|
const brands = await page.evaluate(() => {
|
||||||
|
const results: { name: string; href: string }[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
const normalizedHref = href.toLowerCase();
|
||||||
|
if (seen.has(normalizedHref)) return;
|
||||||
|
seen.add(normalizedHref);
|
||||||
|
|
||||||
|
// Get brand name
|
||||||
|
let name = '';
|
||||||
|
const heading = a.querySelector('h3, h4, h5, [class*="name"]');
|
||||||
|
if (heading) {
|
||||||
|
name = heading.textContent?.trim() || '';
|
||||||
|
}
|
||||||
|
if (!name) {
|
||||||
|
name = a.textContent?.trim().split('\n')[0] || '';
|
||||||
|
}
|
||||||
|
if (!name) {
|
||||||
|
name = href.split('/brand/')[1]?.replace(/-/g, ' ') || '';
|
||||||
|
}
|
||||||
|
|
||||||
|
results.push({ name: name.slice(0, 50), href });
|
||||||
|
});
|
||||||
|
|
||||||
|
return results.sort((a, b) => a.name.localeCompare(b.name));
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('TOTAL BRANDS FOUND: ' + brands.length);
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
brands.forEach((b, i) => {
|
||||||
|
const num = (i + 1).toString().padStart(3, ' ');
|
||||||
|
console.log(`${num}. ${b.name} (${b.href})`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Now visit each brand page and count products
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('PRODUCTS PER BRAND');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const brandProducts: { brand: string; products: number }[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < brands.length; i++) {
|
||||||
|
const brand = brands[i];
|
||||||
|
try {
|
||||||
|
const brandUrl = brand.href.startsWith('http')
|
||||||
|
? brand.href
|
||||||
|
: `https://shop.bestdispensary.com${brand.href}`;
|
||||||
|
|
||||||
|
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
await sleep(1500);
|
||||||
|
|
||||||
|
// Scroll to load products
|
||||||
|
for (let j = 0; j < 10; j++) {
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(800);
|
||||||
|
}
|
||||||
|
|
||||||
|
const productCount = await page.evaluate(() => {
|
||||||
|
const seen = new Set<string>();
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
|
||||||
|
const img = a.querySelector('img');
|
||||||
|
const name = img?.getAttribute('alt') || a.textContent?.trim() || '';
|
||||||
|
if (name) seen.add(name);
|
||||||
|
});
|
||||||
|
return seen.size;
|
||||||
|
});
|
||||||
|
|
||||||
|
brandProducts.push({ brand: brand.name, products: productCount });
|
||||||
|
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ${productCount} products`);
|
||||||
|
|
||||||
|
} catch (err: any) {
|
||||||
|
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ERROR - ${err.message?.slice(0, 30)}`);
|
||||||
|
brandProducts.push({ brand: brand.name, products: 0 });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
const totalProducts = brandProducts.reduce((sum, b) => sum + b.products, 0);
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('SUMMARY');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Total brands: ${brands.length}`);
|
||||||
|
console.log(`Total products: ${totalProducts}`);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
92
backend/scripts/test-bestdispensary-brands.ts
Normal file
92
backend/scripts/test-bestdispensary-brands.ts
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('Navigating to https://shop.bestdispensary.com/brands');
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Go directly to the brands page
|
||||||
|
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate if present
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log('Age gate detected, bypassing...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Current URL:', page.url());
|
||||||
|
|
||||||
|
// Scroll to load all content
|
||||||
|
console.log('\nScrolling to load all brands...');
|
||||||
|
let previousHeight = 0;
|
||||||
|
for (let i = 0; i < 20; i++) {
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1500);
|
||||||
|
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
console.log(` Scroll ${i+1}: No new content`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
|
||||||
|
const brandCount = await page.evaluate(() =>
|
||||||
|
document.querySelectorAll('a[href*="/brand/"]').length
|
||||||
|
);
|
||||||
|
console.log(` Scroll ${i+1}: height=${currentHeight}, brand links=${brandCount}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all brand links
|
||||||
|
const brands = await page.evaluate(() => {
|
||||||
|
const results: { name: string; href: string }[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
if (seen.has(href)) return;
|
||||||
|
seen.add(href);
|
||||||
|
|
||||||
|
const name = a.textContent?.trim() || href.split('/brand/')[1] || '';
|
||||||
|
results.push({ name, href });
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`\nFound ${brands.length} brands:`);
|
||||||
|
brands.forEach(b => console.log(` - ${b.name} (${b.href})`));
|
||||||
|
|
||||||
|
// Take screenshot
|
||||||
|
await page.screenshot({ path: '/tmp/bestdispensary-brands.png', fullPage: true });
|
||||||
|
console.log('\nScreenshot saved to /tmp/bestdispensary-brands.png');
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
108
backend/scripts/test-brands-debug.ts
Normal file
108
backend/scripts/test-brands-debug.ts
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check Load More button
|
||||||
|
const btnInfo = await page.evaluate(() => {
|
||||||
|
const btn = document.querySelector('button.collection__load-more');
|
||||||
|
if (!btn) return { found: false };
|
||||||
|
|
||||||
|
const rect = btn.getBoundingClientRect();
|
||||||
|
return {
|
||||||
|
found: true,
|
||||||
|
text: btn.textContent?.trim(),
|
||||||
|
visible: rect.width > 0 && rect.height > 0,
|
||||||
|
top: rect.top,
|
||||||
|
disabled: (btn as HTMLButtonElement).disabled,
|
||||||
|
class: btn.className,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Load More button:', btnInfo);
|
||||||
|
|
||||||
|
// Scroll to button and click
|
||||||
|
console.log('\nScrolling to button and clicking...');
|
||||||
|
|
||||||
|
for (let i = 0; i < 10; i++) {
|
||||||
|
const btn = await page.$('button.collection__load-more');
|
||||||
|
if (!btn) {
|
||||||
|
console.log('Button not found');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scroll button into view
|
||||||
|
await page.evaluate((b) => b.scrollIntoView({ behavior: 'smooth', block: 'center' }), btn);
|
||||||
|
await sleep(500);
|
||||||
|
|
||||||
|
// Check if button is still there and clickable
|
||||||
|
const stillThere = await page.evaluate(() => {
|
||||||
|
const b = document.querySelector('button.collection__load-more');
|
||||||
|
return b ? b.textContent?.trim() : null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!stillThere) {
|
||||||
|
console.log('Button disappeared - all loaded');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Click it
|
||||||
|
await btn.click();
|
||||||
|
console.log(`Click ${i+1}...`);
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
const count = await page.evaluate(() =>
|
||||||
|
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').length
|
||||||
|
);
|
||||||
|
console.log(` Brands: ${count}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final count
|
||||||
|
const brands = await page.evaluate(() => {
|
||||||
|
const list: string[] = [];
|
||||||
|
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
|
||||||
|
list.push(a.textContent?.trim() || '');
|
||||||
|
});
|
||||||
|
return list;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`\nTotal brands: ${brands.length}`);
|
||||||
|
console.log(brands.join(', '));
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
157
backend/scripts/test-brands-load-all.ts
Normal file
157
backend/scripts/test-brands-load-all.ts
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log('Bypassing age gate...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Click "LOAD MORE" until all brands are loaded
|
||||||
|
console.log('Loading all brands...\n');
|
||||||
|
|
||||||
|
let loadMoreClicks = 0;
|
||||||
|
while (true) {
|
||||||
|
const loadMoreBtn = await page.$('button.collection__load-more');
|
||||||
|
if (!loadMoreBtn) {
|
||||||
|
console.log('No more "Load More" button - all brands loaded!');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const isVisible = await page.evaluate((btn) => {
|
||||||
|
const rect = btn.getBoundingClientRect();
|
||||||
|
return rect.width > 0 && rect.height > 0;
|
||||||
|
}, loadMoreBtn);
|
||||||
|
|
||||||
|
if (!isVisible) {
|
||||||
|
console.log('Load More button not visible - all brands loaded!');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
await loadMoreBtn.click();
|
||||||
|
loadMoreClicks++;
|
||||||
|
await sleep(1500);
|
||||||
|
|
||||||
|
const brandCount = await page.evaluate(() =>
|
||||||
|
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').length
|
||||||
|
);
|
||||||
|
console.log(` Click ${loadMoreClicks}: ${brandCount} brands loaded`);
|
||||||
|
|
||||||
|
if (loadMoreClicks > 20) break; // Safety limit
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all brands
|
||||||
|
const brands = await page.evaluate(() => {
|
||||||
|
const results: { name: string; href: string }[] = [];
|
||||||
|
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
const name = a.textContent?.trim() || '';
|
||||||
|
if (name && href) {
|
||||||
|
results.push({ name, href });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log(`TOTAL BRANDS: ${brands.length}`);
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
// Visit each brand and count products
|
||||||
|
console.log('\nCounting products per brand...\n');
|
||||||
|
|
||||||
|
const results: { brand: string; products: number }[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < brands.length; i++) {
|
||||||
|
const brand = brands[i];
|
||||||
|
const brandUrl = `https://shop.bestdispensary.com${brand.href}`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
await sleep(1000);
|
||||||
|
|
||||||
|
// Click load more on brand page too
|
||||||
|
for (let j = 0; j < 10; j++) {
|
||||||
|
const loadMore = await page.$('button.collection__load-more');
|
||||||
|
if (!loadMore) break;
|
||||||
|
|
||||||
|
const isVisible = await page.evaluate((btn) => {
|
||||||
|
const rect = btn.getBoundingClientRect();
|
||||||
|
return rect.width > 0 && rect.height > 0;
|
||||||
|
}, loadMore);
|
||||||
|
|
||||||
|
if (!isVisible) break;
|
||||||
|
await loadMore.click();
|
||||||
|
await sleep(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
const productCount = await page.evaluate(() => {
|
||||||
|
const seen = new Set<string>();
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
|
||||||
|
const href = a.getAttribute('href');
|
||||||
|
if (href) seen.add(href);
|
||||||
|
});
|
||||||
|
return seen.size;
|
||||||
|
});
|
||||||
|
|
||||||
|
results.push({ brand: brand.name, products: productCount });
|
||||||
|
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ${productCount} products`);
|
||||||
|
|
||||||
|
} catch (err: any) {
|
||||||
|
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ERROR`);
|
||||||
|
results.push({ brand: brand.name, products: 0 });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
const totalProducts = results.reduce((sum, r) => sum + r.products, 0);
|
||||||
|
const brandsWithProducts = results.filter(r => r.products > 0).length;
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('SUMMARY');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Total brands: ${brands.length}`);
|
||||||
|
console.log(`Brands with products: ${brandsWithProducts}`);
|
||||||
|
console.log(`Total products: ${totalProducts}`);
|
||||||
|
|
||||||
|
// Top brands by product count
|
||||||
|
console.log('\nTop 20 brands by product count:');
|
||||||
|
results
|
||||||
|
.sort((a, b) => b.products - a.products)
|
||||||
|
.slice(0, 20)
|
||||||
|
.forEach((r, i) => console.log(` ${i+1}. ${r.brand}: ${r.products}`));
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
108
backend/scripts/test-brands-products.ts
Normal file
108
backend/scripts/test-brands-products.ts
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try clicking Load More multiple times with JS
|
||||||
|
console.log('Loading all brands...');
|
||||||
|
for (let i = 0; i < 15; i++) {
|
||||||
|
const clicked = await page.evaluate(() => {
|
||||||
|
const btn = document.querySelector('button.collection__load-more') as HTMLButtonElement;
|
||||||
|
if (btn) { btn.click(); return true; }
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
if (!clicked) break;
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all brands
|
||||||
|
const brands = await page.evaluate(() => {
|
||||||
|
const list: { name: string; href: string }[] = [];
|
||||||
|
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
|
||||||
|
list.push({
|
||||||
|
name: a.textContent?.trim() || '',
|
||||||
|
href: a.getAttribute('href') || '',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
return list;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Total brands found: ' + brands.length + '\n');
|
||||||
|
console.log('PRODUCTS PER BRAND');
|
||||||
|
console.log('==================\n');
|
||||||
|
|
||||||
|
const results: { brand: string; products: number }[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < brands.length; i++) {
|
||||||
|
const brand = brands[i];
|
||||||
|
const url = 'https://shop.bestdispensary.com' + brand.href;
|
||||||
|
|
||||||
|
try {
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
await sleep(1000);
|
||||||
|
|
||||||
|
// Click load more on brand page
|
||||||
|
for (let j = 0; j < 20; j++) {
|
||||||
|
const clicked = await page.evaluate(() => {
|
||||||
|
const btn = document.querySelector('button.collection__load-more') as HTMLButtonElement;
|
||||||
|
if (btn) { btn.click(); return true; }
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
if (!clicked) break;
|
||||||
|
await sleep(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
const productCount = await page.evaluate(() => {
|
||||||
|
const seen = new Set<string>();
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
|
||||||
|
const href = a.getAttribute('href');
|
||||||
|
if (href) seen.add(href);
|
||||||
|
});
|
||||||
|
return seen.size;
|
||||||
|
});
|
||||||
|
|
||||||
|
results.push({ brand: brand.name, products: productCount });
|
||||||
|
const num = (i + 1).toString().padStart(2, ' ');
|
||||||
|
console.log(num + '. ' + brand.name + ': ' + productCount);
|
||||||
|
|
||||||
|
} catch (err) {
|
||||||
|
results.push({ brand: brand.name, products: 0 });
|
||||||
|
const num = (i + 1).toString().padStart(2, ' ');
|
||||||
|
console.log(num + '. ' + brand.name + ': ERROR');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
const total = results.reduce((s, r) => s + r.products, 0);
|
||||||
|
console.log('\n==================');
|
||||||
|
console.log('TOTAL: ' + brands.length + ' brands, ' + total + ' products');
|
||||||
|
console.log('==================');
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
130
backend/scripts/test-brands-selector.ts
Normal file
130
backend/scripts/test-brands-selector.ts
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the selector hint: /html/body/main/section
|
||||||
|
console.log('Looking at main > section structure...\n');
|
||||||
|
|
||||||
|
const sectionInfo = await page.evaluate(() => {
|
||||||
|
const main = document.querySelector('main');
|
||||||
|
if (!main) return { error: 'No main element' };
|
||||||
|
|
||||||
|
const sections = main.querySelectorAll('section');
|
||||||
|
const results: any[] = [];
|
||||||
|
|
||||||
|
sections.forEach((section, i) => {
|
||||||
|
const children = section.children;
|
||||||
|
const childInfo: string[] = [];
|
||||||
|
|
||||||
|
for (let j = 0; j < Math.min(children.length, 10); j++) {
|
||||||
|
const child = children[j];
|
||||||
|
childInfo.push(child.tagName + '.' + (child.className?.slice(0, 30) || ''));
|
||||||
|
}
|
||||||
|
|
||||||
|
results.push({
|
||||||
|
index: i,
|
||||||
|
class: section.className?.slice(0, 50),
|
||||||
|
childCount: children.length,
|
||||||
|
sampleChildren: childInfo,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Sections in main:');
|
||||||
|
console.log(JSON.stringify(sectionInfo, null, 2));
|
||||||
|
|
||||||
|
// Look for brand cards within the section
|
||||||
|
console.log('\nLooking for brand cards in main > section...');
|
||||||
|
|
||||||
|
const brandCards = await page.evaluate(() => {
|
||||||
|
const section = document.querySelector('main > section');
|
||||||
|
if (!section) return [];
|
||||||
|
|
||||||
|
// Get all child elements that might be brand cards
|
||||||
|
const cards: { tag: string; text: string; href: string }[] = [];
|
||||||
|
|
||||||
|
section.querySelectorAll('a').forEach((a: Element) => {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
const text = a.textContent?.trim().slice(0, 50) || '';
|
||||||
|
cards.push({ tag: 'a', text, href });
|
||||||
|
});
|
||||||
|
|
||||||
|
return cards;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Found ${brandCards.length} links in section:`);
|
||||||
|
brandCards.slice(0, 30).forEach(c => console.log(` ${c.text} -> ${c.href}`));
|
||||||
|
|
||||||
|
// Get the grid of brand cards
|
||||||
|
console.log('\nLooking for grid container...');
|
||||||
|
|
||||||
|
const gridCards = await page.evaluate(() => {
|
||||||
|
// Look for grid-like containers
|
||||||
|
const grids = document.querySelectorAll('[class*="grid"], [class*="Grid"], main section > div');
|
||||||
|
const results: any[] = [];
|
||||||
|
|
||||||
|
grids.forEach((grid, i) => {
|
||||||
|
const links = grid.querySelectorAll('a[href*="/brand/"]');
|
||||||
|
if (links.length > 5) {
|
||||||
|
const brands: string[] = [];
|
||||||
|
links.forEach((a: Element) => {
|
||||||
|
const text = a.textContent?.trim().split('\n')[0] || '';
|
||||||
|
if (text && !brands.includes(text)) brands.push(text);
|
||||||
|
});
|
||||||
|
results.push({
|
||||||
|
class: grid.className?.slice(0, 40),
|
||||||
|
brandCount: brands.length,
|
||||||
|
brands: brands.slice(0, 50),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Grid containers with brands:');
|
||||||
|
gridCards.forEach(g => {
|
||||||
|
console.log(`\n[${g.brandCount} brands] class="${g.class}"`);
|
||||||
|
g.brands.forEach((b: string, i: number) => console.log(` ${i+1}. ${b}`));
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
188
backend/scripts/test-iheartjane.ts
Normal file
188
backend/scripts/test-iheartjane.ts
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
/**
|
||||||
|
* One-off script to test iHeartJane scraping
|
||||||
|
* Mimics remote worker: Puppeteer + stealth + proxy
|
||||||
|
*
|
||||||
|
* Usage: npx ts-node scripts/test-iheartjane.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer from 'puppeteer-extra';
|
||||||
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||||
|
|
||||||
|
puppeteer.use(StealthPlugin());
|
||||||
|
|
||||||
|
const TARGET_URL = 'https://theflowershopusa.com/mesa/menu/';
|
||||||
|
const STORE_ID = 2788;
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('[iHeartJane Test] Starting...');
|
||||||
|
|
||||||
|
// No proxy for local testing
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: [
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-setuid-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-blink-features=AutomationControlled',
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
// Intercept network requests to capture API calls
|
||||||
|
const apiResponses: any[] = [];
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
// Block heavy resources
|
||||||
|
const type = req.resourceType();
|
||||||
|
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
page.on('response', async (response) => {
|
||||||
|
const url = response.url();
|
||||||
|
const contentType = response.headers()['content-type'] || '';
|
||||||
|
|
||||||
|
// Capture any JSON response from iheartjane domains
|
||||||
|
if ((url.includes('iheartjane.com') || url.includes('algolia')) && contentType.includes('json')) {
|
||||||
|
try {
|
||||||
|
const json = await response.json();
|
||||||
|
const type = url.includes('store') ? 'STORE' :
|
||||||
|
url.includes('product') ? 'PRODUCT' :
|
||||||
|
url.includes('algolia') ? 'ALGOLIA' : 'API';
|
||||||
|
apiResponses.push({ type, url, data: json });
|
||||||
|
console.log(`[${type}] ${url.substring(0, 120)}...`);
|
||||||
|
} catch {
|
||||||
|
// Not JSON
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`[iHeartJane Test] Navigating to ${TARGET_URL}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await page.goto(TARGET_URL, {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000,
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('[iHeartJane Test] Menu page loaded, waiting for data...');
|
||||||
|
|
||||||
|
// Wait a bit for all API calls to complete
|
||||||
|
await new Promise(r => setTimeout(r, 3000));
|
||||||
|
|
||||||
|
// Also try to get store info by visiting the store page
|
||||||
|
console.log('[iHeartJane Test] Fetching store info...');
|
||||||
|
const storeInfoUrl = `https://api.iheartjane.com/v1/stores/${STORE_ID}`;
|
||||||
|
|
||||||
|
// Try to fetch store info via page.evaluate (uses browser context)
|
||||||
|
const storeInfo = await page.evaluate(async (storeId) => {
|
||||||
|
try {
|
||||||
|
const resp = await fetch(`https://api.iheartjane.com/v1/stores/${storeId}`);
|
||||||
|
if (resp.ok) return await resp.json();
|
||||||
|
return { error: resp.status };
|
||||||
|
} catch (e: any) {
|
||||||
|
return { error: e.message };
|
||||||
|
}
|
||||||
|
}, STORE_ID);
|
||||||
|
|
||||||
|
if (storeInfo && !storeInfo.error) {
|
||||||
|
apiResponses.push({ type: 'STORE_DIRECT', url: storeInfoUrl, data: storeInfo });
|
||||||
|
console.log('[STORE_DIRECT] Got store info via fetch');
|
||||||
|
} else {
|
||||||
|
console.log(`[STORE_DIRECT] Failed: ${JSON.stringify(storeInfo)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('[iHeartJane Test] Processing results...');
|
||||||
|
|
||||||
|
// Wait for products to load
|
||||||
|
await page.waitForSelector('[data-testid="product-card"], .product-card, [class*="ProductCard"]', {
|
||||||
|
timeout: 30000,
|
||||||
|
}).catch(() => console.log('[iHeartJane Test] No product cards found via selector'));
|
||||||
|
|
||||||
|
// Try to extract product data from the page
|
||||||
|
const products = await page.evaluate(() => {
|
||||||
|
// Look for product data in various places
|
||||||
|
const results: any[] = [];
|
||||||
|
|
||||||
|
// Method 1: Look for __INITIAL_STATE__ or similar
|
||||||
|
const scripts = Array.from(document.querySelectorAll('script'));
|
||||||
|
for (const script of scripts) {
|
||||||
|
const text = script.textContent || '';
|
||||||
|
if (text.includes('products') && text.includes('price')) {
|
||||||
|
try {
|
||||||
|
// Try to find JSON object
|
||||||
|
const match = text.match(/\{[\s\S]*"products"[\s\S]*\}/);
|
||||||
|
if (match) {
|
||||||
|
results.push({ source: 'script', data: match[0].substring(0, 500) });
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Method 2: Look for product elements in DOM
|
||||||
|
const productElements = document.querySelectorAll('[data-testid="product-card"], .product-card, [class*="product"]');
|
||||||
|
for (const el of Array.from(productElements).slice(0, 5)) {
|
||||||
|
const name = el.querySelector('[class*="name"], h3, h4')?.textContent;
|
||||||
|
const price = el.querySelector('[class*="price"]')?.textContent;
|
||||||
|
if (name) {
|
||||||
|
results.push({ source: 'dom', name, price });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\n[iHeartJane Test] === RESULTS ===');
|
||||||
|
console.log(`Total API responses captured: ${apiResponses.length}`);
|
||||||
|
|
||||||
|
// Group by type
|
||||||
|
const byType: Record<string, any[]> = {};
|
||||||
|
for (const r of apiResponses) {
|
||||||
|
byType[r.type] = byType[r.type] || [];
|
||||||
|
byType[r.type].push(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const [type, items] of Object.entries(byType)) {
|
||||||
|
console.log(`\n--- ${type} (${items.length} responses) ---`);
|
||||||
|
for (const item of items) {
|
||||||
|
console.log(`URL: ${item.url}`);
|
||||||
|
// Show structure
|
||||||
|
if (item.data.hits) {
|
||||||
|
console.log(` Products: ${item.data.hits.length} hits`);
|
||||||
|
if (item.data.hits[0]) {
|
||||||
|
console.log(` Fields: ${Object.keys(item.data.hits[0]).join(', ')}`);
|
||||||
|
}
|
||||||
|
} else if (item.data.store) {
|
||||||
|
console.log(` Store: ${JSON.stringify(item.data.store, null, 2).substring(0, 1000)}`);
|
||||||
|
} else {
|
||||||
|
console.log(` Keys: ${Object.keys(item.data).join(', ')}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write full data to file
|
||||||
|
const fs = await import('fs');
|
||||||
|
fs.writeFileSync('/tmp/iheartjane-data.json', JSON.stringify(apiResponses, null, 2));
|
||||||
|
console.log('\n[iHeartJane Test] Full data saved to /tmp/iheartjane-data.json');
|
||||||
|
|
||||||
|
// Take screenshot
|
||||||
|
await page.screenshot({ path: '/tmp/iheartjane-test.png', fullPage: false });
|
||||||
|
console.log('[iHeartJane Test] Screenshot saved to /tmp/iheartjane-test.png');
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[iHeartJane Test] Error:', error.message);
|
||||||
|
await page.screenshot({ path: '/tmp/iheartjane-error.png' });
|
||||||
|
} finally {
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('[iHeartJane Test] Done');
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
224
backend/scripts/test-jane-api-explore.ts
Normal file
224
backend/scripts/test-jane-api-explore.ts
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
/**
|
||||||
|
* Explore Jane API to understand data structure
|
||||||
|
* Usage: npx ts-node scripts/test-jane-api-explore.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer from 'puppeteer-extra';
|
||||||
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||||
|
|
||||||
|
puppeteer.use(StealthPlugin());
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('Exploring Jane API from browser context...\n');
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: 'new',
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
// Intercept network requests to find store data API calls
|
||||||
|
const capturedResponses: Array<{ url: string; data: any }> = [];
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => req.continue());
|
||||||
|
|
||||||
|
page.on('response', async (response) => {
|
||||||
|
const url = response.url();
|
||||||
|
if (url.includes('iheartjane.com') &&
|
||||||
|
(url.includes('/stores') || url.includes('/search') || url.includes('algolia'))) {
|
||||||
|
try {
|
||||||
|
const text = await response.text();
|
||||||
|
if (text.startsWith('{') || text.startsWith('[')) {
|
||||||
|
const data = JSON.parse(text);
|
||||||
|
capturedResponses.push({ url, data });
|
||||||
|
console.log(`Captured: ${url.substring(0, 100)}...`);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Not JSON
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Visit Jane to establish session
|
||||||
|
console.log('Visiting Jane stores page to capture network requests...');
|
||||||
|
await page.goto('https://www.iheartjane.com/stores', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000,
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`\nCaptured ${capturedResponses.length} API responses`);
|
||||||
|
|
||||||
|
for (const resp of capturedResponses) {
|
||||||
|
console.log(`\n--- ${resp.url.substring(0, 80)} ---`);
|
||||||
|
const keys = Object.keys(resp.data);
|
||||||
|
console.log('Keys:', keys);
|
||||||
|
|
||||||
|
// Check for stores array
|
||||||
|
if (resp.data.stores && Array.isArray(resp.data.stores)) {
|
||||||
|
console.log(`Stores count: ${resp.data.stores.length}`);
|
||||||
|
const firstStore = resp.data.stores[0];
|
||||||
|
if (firstStore) {
|
||||||
|
console.log('First store keys:', Object.keys(firstStore));
|
||||||
|
console.log('Sample:', JSON.stringify(firstStore, null, 2).substring(0, 500));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for hits (Algolia)
|
||||||
|
if (resp.data.hits && Array.isArray(resp.data.hits)) {
|
||||||
|
console.log(`Hits count: ${resp.data.hits.length}`);
|
||||||
|
const firstHit = resp.data.hits[0];
|
||||||
|
if (firstHit) {
|
||||||
|
console.log('First hit keys:', Object.keys(firstHit));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for __NEXT_DATA__ or similar embedded data
|
||||||
|
console.log('\n--- Checking for embedded page data ---');
|
||||||
|
const pageData = await page.evaluate(() => {
|
||||||
|
// Check for Next.js data
|
||||||
|
const nextData = (window as any).__NEXT_DATA__;
|
||||||
|
if (nextData?.props?.pageProps?.stores) {
|
||||||
|
return {
|
||||||
|
source: '__NEXT_DATA__',
|
||||||
|
storeCount: nextData.props.pageProps.stores.length,
|
||||||
|
firstStore: nextData.props.pageProps.stores[0],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for any global store data
|
||||||
|
const win = window as any;
|
||||||
|
if (win.stores) return { source: 'window.stores', data: win.stores };
|
||||||
|
if (win.__stores) return { source: 'window.__stores', data: win.__stores };
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (pageData) {
|
||||||
|
console.log('Found embedded data:', pageData.source);
|
||||||
|
console.log('Store count:', pageData.storeCount);
|
||||||
|
if (pageData.firstStore) {
|
||||||
|
console.log('First store keys:', Object.keys(pageData.firstStore));
|
||||||
|
console.log('Sample:', JSON.stringify({
|
||||||
|
id: pageData.firstStore.id,
|
||||||
|
name: pageData.firstStore.name,
|
||||||
|
city: pageData.firstStore.city,
|
||||||
|
state: pageData.firstStore.state,
|
||||||
|
}, null, 2));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log('No embedded page data found');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try alternative API endpoints from browser context
|
||||||
|
console.log('\n--- Testing alternative API endpoints ---');
|
||||||
|
|
||||||
|
// Try the map endpoint
|
||||||
|
const mapData = await page.evaluate(async () => {
|
||||||
|
try {
|
||||||
|
const res = await fetch('https://api.iheartjane.com/v1/stores/map?per_page=100');
|
||||||
|
if (res.ok) return await res.json();
|
||||||
|
} catch {}
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (mapData) {
|
||||||
|
console.log('\n/v1/stores/map response:');
|
||||||
|
console.log('Keys:', Object.keys(mapData));
|
||||||
|
if (mapData.stores?.[0]) {
|
||||||
|
console.log('First store keys:', Object.keys(mapData.stores[0]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try index endpoint
|
||||||
|
const indexData = await page.evaluate(async () => {
|
||||||
|
try {
|
||||||
|
const res = await fetch('https://api.iheartjane.com/v1/stores/index?per_page=10');
|
||||||
|
if (res.ok) return await res.json();
|
||||||
|
} catch {}
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (indexData) {
|
||||||
|
console.log('\n/v1/stores/index response:');
|
||||||
|
console.log('Keys:', Object.keys(indexData));
|
||||||
|
if (indexData.stores?.[0]) {
|
||||||
|
console.log('First store keys:', Object.keys(indexData.stores[0]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try with state parameter
|
||||||
|
const stateData = await page.evaluate(async () => {
|
||||||
|
try {
|
||||||
|
const res = await fetch('https://api.iheartjane.com/v1/stores?state=AZ&per_page=10');
|
||||||
|
if (res.ok) return await res.json();
|
||||||
|
} catch {}
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (stateData) {
|
||||||
|
console.log('\n/v1/stores?state=AZ response:');
|
||||||
|
console.log('Keys:', Object.keys(stateData));
|
||||||
|
console.log('Stores count:', stateData.stores?.length);
|
||||||
|
if (stateData.stores?.[0]) {
|
||||||
|
console.log('First store keys:', Object.keys(stateData.stores[0]));
|
||||||
|
console.log('Sample:', JSON.stringify(stateData.stores[0], null, 2).substring(0, 300));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try Algolia directly for stores
|
||||||
|
console.log('\n--- Testing Algolia for stores ---');
|
||||||
|
const algoliaStores = await page.evaluate(async () => {
|
||||||
|
try {
|
||||||
|
// Common Algolia search pattern
|
||||||
|
const res = await fetch('https://search.iheartjane.com/1/indexes/stores-production/query', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'X-Algolia-Application-Id': 'HKXSXRD7RA',
|
||||||
|
'X-Algolia-API-Key': 'YjZhYjQxZjU4ZTNjMTRhYzExZTk2YjU2MzliMGE4ZTE5YjJkMmZkZTI2ODllYTY2MThlMzQ3Y2QxOTFkMjI5Y3RhZ0ZpbHRlcnM9',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
query: 'Arizona',
|
||||||
|
hitsPerPage: 20,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
if (res.ok) return await res.json();
|
||||||
|
} catch {}
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (algoliaStores) {
|
||||||
|
console.log('Algolia stores-production response:');
|
||||||
|
console.log('Keys:', Object.keys(algoliaStores));
|
||||||
|
console.log('Hits count:', algoliaStores.hits?.length);
|
||||||
|
if (algoliaStores.hits?.[0]) {
|
||||||
|
console.log('First hit keys:', Object.keys(algoliaStores.hits[0]));
|
||||||
|
console.log('Sample:', JSON.stringify(algoliaStores.hits[0], null, 2).substring(0, 500));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if there's a /v2 endpoint
|
||||||
|
const v2Data = await page.evaluate(async () => {
|
||||||
|
try {
|
||||||
|
const res = await fetch('https://api.iheartjane.com/v2/stores?per_page=10');
|
||||||
|
if (res.ok) return await res.json();
|
||||||
|
} catch {}
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (v2Data) {
|
||||||
|
console.log('\n/v2/stores response:');
|
||||||
|
console.log('Keys:', Object.keys(v2Data));
|
||||||
|
if (v2Data.stores?.[0]) {
|
||||||
|
console.log('First store keys:', Object.keys(v2Data.stores[0]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
console.log('\nDone!');
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
126
backend/scripts/test-jane-client.ts
Normal file
126
backend/scripts/test-jane-client.ts
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
/**
|
||||||
|
* Test script for Jane platform client
|
||||||
|
* Tests the new Jane integration with The Flower Shop Mesa
|
||||||
|
*
|
||||||
|
* Usage: npx ts-node scripts/test-jane-client.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
startSession,
|
||||||
|
endSession,
|
||||||
|
fetchProductsFromUrl,
|
||||||
|
resolveStoreFromUrl,
|
||||||
|
} from '../src/platforms/jane';
|
||||||
|
import { JaneNormalizer } from '../src/hydration/normalizers/jane';
|
||||||
|
|
||||||
|
const TEST_URL = 'https://theflowershopusa.com/mesa/menu/';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Jane Platform Client Test');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Test URL: ${TEST_URL}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Test 1: Fetch products from URL
|
||||||
|
console.log('[Test 1] Fetching products from menu URL...');
|
||||||
|
const result = await fetchProductsFromUrl(TEST_URL);
|
||||||
|
|
||||||
|
console.log('');
|
||||||
|
console.log('[Results]');
|
||||||
|
console.log(` Store: ${result.store?.name || 'Not captured'}`);
|
||||||
|
console.log(` Store ID: ${result.store?.id || 'N/A'}`);
|
||||||
|
console.log(` Products captured: ${result.products.length}`);
|
||||||
|
console.log(` API responses: ${result.responses.length}`);
|
||||||
|
|
||||||
|
if (result.store) {
|
||||||
|
console.log('');
|
||||||
|
console.log('[Store Info]');
|
||||||
|
console.log(` Address: ${result.store.address}, ${result.store.city}, ${result.store.state} ${result.store.zip}`);
|
||||||
|
console.log(` Phone: ${result.store.phone}`);
|
||||||
|
console.log(` Coordinates: ${result.store.lat}, ${result.store.long}`);
|
||||||
|
console.log(` Medical: ${result.store.medical}, Recreational: ${result.store.recreational}`);
|
||||||
|
console.log(` Rating: ${result.store.rating} (${result.store.reviews_count} reviews)`);
|
||||||
|
console.log(` Product count (store): ${result.store.product_count}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.products.length > 0) {
|
||||||
|
console.log('');
|
||||||
|
console.log('[Sample Products (first 5)]');
|
||||||
|
for (const p of result.products.slice(0, 5)) {
|
||||||
|
const price = p.price_gram || p.price_each || 'N/A';
|
||||||
|
console.log(` - ${p.name} (${p.brand}) - $${price}`);
|
||||||
|
console.log(` Kind: ${p.kind}, Category: ${p.category}, THC: ${p.percent_thc}%`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test 2: Normalize products
|
||||||
|
console.log('');
|
||||||
|
console.log('[Test 2] Testing normalizer...');
|
||||||
|
const normalizer = new JaneNormalizer();
|
||||||
|
|
||||||
|
// Build a fake payload structure
|
||||||
|
const fakePayload = {
|
||||||
|
id: 'test-payload',
|
||||||
|
dispensary_id: 9999,
|
||||||
|
crawl_run_id: null,
|
||||||
|
platform: 'jane',
|
||||||
|
payload_version: 1,
|
||||||
|
raw_json: { hits: result.products.map(p => p.raw) },
|
||||||
|
product_count: result.products.length,
|
||||||
|
pricing_type: null,
|
||||||
|
crawl_mode: null,
|
||||||
|
fetched_at: new Date(),
|
||||||
|
processed: false,
|
||||||
|
normalized_at: null,
|
||||||
|
hydration_error: null,
|
||||||
|
hydration_attempts: 0,
|
||||||
|
created_at: new Date(),
|
||||||
|
};
|
||||||
|
|
||||||
|
const normalized = normalizer.normalize(fakePayload);
|
||||||
|
|
||||||
|
console.log(` Products normalized: ${normalized.products.length}`);
|
||||||
|
console.log(` Brands extracted: ${normalized.brands.length}`);
|
||||||
|
console.log(` Categories extracted: ${normalized.categories.length}`);
|
||||||
|
console.log(` Errors: ${normalized.errors.length}`);
|
||||||
|
|
||||||
|
if (normalized.products.length > 0) {
|
||||||
|
console.log('');
|
||||||
|
console.log('[Sample Normalized Product]');
|
||||||
|
const np = normalized.products[0];
|
||||||
|
console.log(` External ID: ${np.externalProductId}`);
|
||||||
|
console.log(` Name: ${np.name}`);
|
||||||
|
console.log(` Brand: ${np.brandName}`);
|
||||||
|
console.log(` Category: ${np.category}`);
|
||||||
|
console.log(` Type: ${np.type}`);
|
||||||
|
console.log(` Strain: ${np.strainType}`);
|
||||||
|
console.log(` THC: ${np.thcPercent}%`);
|
||||||
|
console.log(` CBD: ${np.cbdPercent}%`);
|
||||||
|
console.log(` Image: ${np.primaryImageUrl?.slice(0, 60)}...`);
|
||||||
|
|
||||||
|
const pricing = normalized.pricing.get(np.externalProductId);
|
||||||
|
if (pricing) {
|
||||||
|
console.log(` Price (cents): ${pricing.priceRec}`);
|
||||||
|
console.log(` On Special: ${pricing.isOnSpecial}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('TEST PASSED');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('');
|
||||||
|
console.error('='.repeat(60));
|
||||||
|
console.error('TEST FAILED');
|
||||||
|
console.error('='.repeat(60));
|
||||||
|
console.error(`Error: ${error.message}`);
|
||||||
|
console.error(error.stack);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
50
backend/scripts/test-jane-discovery-az.ts
Normal file
50
backend/scripts/test-jane-discovery-az.ts
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
/**
|
||||||
|
* Smoke test: Discover Jane stores in Arizona
|
||||||
|
* Usage: npx ts-node scripts/test-jane-discovery-az.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { discoverStoresByState } from '../src/platforms/jane';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Jane Store Discovery - Arizona Smoke Test');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Using local IP (no proxy)\n');
|
||||||
|
|
||||||
|
try {
|
||||||
|
const stores = await discoverStoresByState('AZ');
|
||||||
|
|
||||||
|
console.log(`\n${'='.repeat(60)}`);
|
||||||
|
console.log(`RESULTS: Found ${stores.length} Jane stores in Arizona`);
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
if (stores.length > 0) {
|
||||||
|
console.log('\nSample stores:');
|
||||||
|
for (const store of stores.slice(0, 10)) {
|
||||||
|
console.log(` - ${store.name}`);
|
||||||
|
console.log(` ID: ${store.storeId} | ${store.city}, AZ`);
|
||||||
|
console.log(` Types: ${store.storeTypes?.join(', ') || 'unknown'}`);
|
||||||
|
console.log(` Products: ${store.productCount || 'N/A'}`);
|
||||||
|
console.log('');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stores.length > 10) {
|
||||||
|
console.log(` ... and ${stores.length - 10} more stores`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('SMOKE TEST PASSED');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('\n' + '='.repeat(60));
|
||||||
|
console.error('SMOKE TEST FAILED');
|
||||||
|
console.error('='.repeat(60));
|
||||||
|
console.error(`Error: ${error.message}`);
|
||||||
|
console.error(error.stack);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
55
backend/scripts/test-jane-med-rec-compare.ts
Normal file
55
backend/scripts/test-jane-med-rec-compare.ts
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
/**
|
||||||
|
* Compare MED vs REC product menus for same location
|
||||||
|
*/
|
||||||
|
import puppeteer from 'puppeteer-extra';
|
||||||
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||||
|
puppeteer.use(StealthPlugin());
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox'] });
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
await page.goto('https://www.iheartjane.com/stores', { waitUntil: 'domcontentloaded' });
|
||||||
|
await new Promise(r => setTimeout(r, 2000));
|
||||||
|
|
||||||
|
// Fetch REC products (store 3379)
|
||||||
|
const recProducts: number[] = await page.evaluate(async () => {
|
||||||
|
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=3379' }),
|
||||||
|
});
|
||||||
|
const data = await res.json();
|
||||||
|
return data.hits?.map((h: any) => h.product_id) || [];
|
||||||
|
});
|
||||||
|
|
||||||
|
// Fetch MED products (store 4540)
|
||||||
|
const medProducts: number[] = await page.evaluate(async () => {
|
||||||
|
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=4540' }),
|
||||||
|
});
|
||||||
|
const data = await res.json();
|
||||||
|
return data.hits?.map((h: any) => h.product_id) || [];
|
||||||
|
});
|
||||||
|
|
||||||
|
const recSet = new Set(recProducts);
|
||||||
|
const medSet = new Set(medProducts);
|
||||||
|
|
||||||
|
const recOnly = recProducts.filter(id => !medSet.has(id)).length;
|
||||||
|
const medOnly = medProducts.filter(id => !recSet.has(id)).length;
|
||||||
|
const shared = recProducts.filter(id => medSet.has(id)).length;
|
||||||
|
|
||||||
|
console.log('\nHana Phoenix - MED vs REC comparison (100 products each):');
|
||||||
|
console.log(' REC products fetched:', recProducts.length);
|
||||||
|
console.log(' MED products fetched:', medProducts.length);
|
||||||
|
console.log(' REC-only:', recOnly);
|
||||||
|
console.log(' MED-only:', medOnly);
|
||||||
|
console.log(' Shared:', shared);
|
||||||
|
console.log(' Menus are:', shared === 0 ? 'COMPLETELY DIFFERENT' : shared === recProducts.length ? 'IDENTICAL' : 'PARTIALLY OVERLAPPING');
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
79
backend/scripts/test-jane-med-rec-diff.ts
Normal file
79
backend/scripts/test-jane-med-rec-diff.ts
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
/**
|
||||||
|
* Find ALL differing fields between MED and REC product payloads
|
||||||
|
*/
|
||||||
|
import puppeteer from 'puppeteer-extra';
|
||||||
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||||
|
puppeteer.use(StealthPlugin());
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox'] });
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
await page.goto('https://www.iheartjane.com/stores', { waitUntil: 'domcontentloaded' });
|
||||||
|
await new Promise(r => setTimeout(r, 2000));
|
||||||
|
|
||||||
|
// Get full product payload from REC store
|
||||||
|
const recProduct = await page.evaluate(async () => {
|
||||||
|
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ query: '', hitsPerPage: 1, filters: 'store_id=3379' }),
|
||||||
|
});
|
||||||
|
const data = await res.json();
|
||||||
|
return data.hits?.[0];
|
||||||
|
});
|
||||||
|
|
||||||
|
const productId = recProduct?.product_id;
|
||||||
|
|
||||||
|
// Get same product from MED store
|
||||||
|
const medProduct = await page.evaluate(async (pid: number) => {
|
||||||
|
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=4540' }),
|
||||||
|
});
|
||||||
|
const data = await res.json();
|
||||||
|
return data.hits?.find((h: any) => h.product_id === pid);
|
||||||
|
}, productId);
|
||||||
|
|
||||||
|
console.log('Product:', recProduct?.name, '(ID:', productId, ')\n');
|
||||||
|
|
||||||
|
// Get all keys
|
||||||
|
const allKeys = new Set([...Object.keys(recProduct || {}), ...Object.keys(medProduct || {})]);
|
||||||
|
const sortedKeys = [...allKeys].sort();
|
||||||
|
|
||||||
|
console.log('=== ALL KEYS IN PAYLOAD ===');
|
||||||
|
console.log(sortedKeys.join(', '));
|
||||||
|
|
||||||
|
console.log('\n=== FIELDS THAT DIFFER ===');
|
||||||
|
let diffCount = 0;
|
||||||
|
for (const key of sortedKeys) {
|
||||||
|
const recVal = JSON.stringify(recProduct?.[key]);
|
||||||
|
const medVal = JSON.stringify(medProduct?.[key]);
|
||||||
|
if (recVal !== medVal) {
|
||||||
|
diffCount++;
|
||||||
|
console.log(`${key}:`);
|
||||||
|
console.log(` REC: ${recVal?.substring(0, 100)}`);
|
||||||
|
console.log(` MED: ${medVal?.substring(0, 100)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (diffCount === 0) {
|
||||||
|
console.log('(none - payloads are identical)');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for limit/allowance related fields
|
||||||
|
console.log('\n=== LIMIT-RELATED FIELDS ===');
|
||||||
|
const limitFields = sortedKeys.filter(k =>
|
||||||
|
k.includes('limit') || k.includes('max') || k.includes('allow') ||
|
||||||
|
k.includes('quantity') || k.includes('cart') || k.includes('medical') ||
|
||||||
|
k.includes('rec') || k.includes('weight')
|
||||||
|
);
|
||||||
|
for (const key of limitFields) {
|
||||||
|
console.log(`${key}: REC=${JSON.stringify(recProduct?.[key])} | MED=${JSON.stringify(medProduct?.[key])}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
35
backend/scripts/test-jane-payload.ts
Normal file
35
backend/scripts/test-jane-payload.ts
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
/**
|
||||||
|
* Test script to capture and save full Jane payload
|
||||||
|
* Usage: npx ts-node scripts/test-jane-payload.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import { fetchProductsFromUrl } from '../src/platforms/jane';
|
||||||
|
|
||||||
|
const TEST_URL = 'https://theflowershopusa.com/mesa/menu/';
|
||||||
|
const OUTPUT_FILE = '/tmp/jane-test-payload.json';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('Fetching Jane payload...');
|
||||||
|
|
||||||
|
const result = await fetchProductsFromUrl(TEST_URL);
|
||||||
|
|
||||||
|
// Build payload structure matching what would be saved
|
||||||
|
const payload = {
|
||||||
|
hits: result.products.map(p => p.raw),
|
||||||
|
store: result.store?.raw || null,
|
||||||
|
capturedAt: new Date().toISOString(),
|
||||||
|
platform: 'jane',
|
||||||
|
storeId: result.store?.id,
|
||||||
|
productCount: result.products.length,
|
||||||
|
responseCount: result.responses.length,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Save to file
|
||||||
|
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(payload, null, 2));
|
||||||
|
console.log(`\nPayload saved to: ${OUTPUT_FILE}`);
|
||||||
|
console.log(`Products: ${result.products.length}`);
|
||||||
|
console.log(`Size: ${Math.round(fs.statSync(OUTPUT_FILE).size / 1024)}KB`);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
138
backend/scripts/test-treez-all-endpoints.ts
Normal file
138
backend/scripts/test-treez-all-endpoints.ts
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
// Capture ALL requests to treez.io
|
||||||
|
const treezRequests: any[] = [];
|
||||||
|
|
||||||
|
page.on('request', (req) => {
|
||||||
|
const url = req.url();
|
||||||
|
if (url.includes('treez.io') && !url.includes('.js') && !url.includes('.css')) {
|
||||||
|
treezRequests.push({
|
||||||
|
url: url,
|
||||||
|
method: req.method(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Also intercept and capture ES API responses
|
||||||
|
page.on('response', async (res) => {
|
||||||
|
const url = res.url();
|
||||||
|
if (url.includes('gapcommerceapi.com') && res.status() === 200) {
|
||||||
|
try {
|
||||||
|
const json = await res.json();
|
||||||
|
const total = json.hits?.total?.value;
|
||||||
|
const count = json.hits?.hits?.length;
|
||||||
|
if (total || count) {
|
||||||
|
console.log('\nES Response: total=' + total + ', returned=' + count);
|
||||||
|
if (json.hits?.hits?.[0]?._source) {
|
||||||
|
const src = json.hits.hits[0]._source;
|
||||||
|
console.log('First product fields: ' + Object.keys(src).slice(0, 20).join(', '));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Loading /shop page...\n');
|
||||||
|
|
||||||
|
await page.goto('https://shop.bestdispensary.com/shop', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Click load more several times
|
||||||
|
console.log('\nClicking Load More...');
|
||||||
|
for (let i = 0; i < 5; i++) {
|
||||||
|
const btn = await page.$('button.collection__load-more');
|
||||||
|
if (!btn) break;
|
||||||
|
await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n=== TREEZ API ENDPOINTS CALLED ===\n');
|
||||||
|
const uniqueUrls = [...new Set(treezRequests.map(r => r.url.split('?')[0]))];
|
||||||
|
uniqueUrls.forEach(url => console.log(url));
|
||||||
|
|
||||||
|
// Now intercept the ES response data by making a request from browser context
|
||||||
|
console.log('\n=== FETCHING ALL PRODUCTS VIA BROWSER ===\n');
|
||||||
|
|
||||||
|
const allProducts = await page.evaluate(async () => {
|
||||||
|
const apiKey = 'V3jHL9dFzi3Gj4UISM4lr38Nm0GSxcps5OBz1PbS';
|
||||||
|
const url = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
|
||||||
|
|
||||||
|
const query = {
|
||||||
|
from: 0,
|
||||||
|
size: 1000,
|
||||||
|
query: {
|
||||||
|
bool: {
|
||||||
|
must: [
|
||||||
|
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
|
||||||
|
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
|
||||||
|
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': apiKey,
|
||||||
|
},
|
||||||
|
body: JSON.stringify(query),
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
return {
|
||||||
|
total: data.hits?.total?.value,
|
||||||
|
count: data.hits?.hits?.length,
|
||||||
|
sample: data.hits?.hits?.[0]?._source,
|
||||||
|
allProducts: data.hits?.hits?.map((h: any) => h._source),
|
||||||
|
};
|
||||||
|
} catch (err: any) {
|
||||||
|
return { error: err.message };
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (allProducts.error) {
|
||||||
|
console.log('Error: ' + allProducts.error);
|
||||||
|
} else {
|
||||||
|
console.log('Total products: ' + allProducts.total);
|
||||||
|
console.log('Returned: ' + allProducts.count);
|
||||||
|
|
||||||
|
if (allProducts.sample) {
|
||||||
|
console.log('\n=== PRODUCT FIELDS ===\n');
|
||||||
|
console.log(Object.keys(allProducts.sample).sort().join('\n'));
|
||||||
|
|
||||||
|
console.log('\n=== SAMPLE PRODUCT ===\n');
|
||||||
|
console.log(JSON.stringify(allProducts.sample, null, 2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
203
backend/scripts/test-treez-all-products.ts
Normal file
203
backend/scripts/test-treez-all-products.ts
Normal file
@@ -0,0 +1,203 @@
|
|||||||
|
/**
|
||||||
|
* Extract ALL product elements and find unique products
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
|
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function bypassAgeGate(page: Page): Promise<void> {
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Extracting ALL product elements');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
await sleep(3000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
// Get ALL elements with product_product__ class
|
||||||
|
console.log('\n[1] Counting all product_product__ elements...');
|
||||||
|
|
||||||
|
const elementAnalysis = await page.evaluate(() => {
|
||||||
|
const all = document.querySelectorAll('[class*="product_product__"]');
|
||||||
|
const byTag: Record<string, number> = {};
|
||||||
|
const anchorHrefs: string[] = [];
|
||||||
|
const imgAlts: string[] = [];
|
||||||
|
|
||||||
|
all.forEach(el => {
|
||||||
|
const tag = el.tagName;
|
||||||
|
byTag[tag] = (byTag[tag] || 0) + 1;
|
||||||
|
|
||||||
|
if (tag === 'A') {
|
||||||
|
const href = el.getAttribute('href');
|
||||||
|
if (href && href.includes('/product/')) {
|
||||||
|
anchorHrefs.push(href);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tag === 'IMG') {
|
||||||
|
const alt = el.getAttribute('alt');
|
||||||
|
if (alt) imgAlts.push(alt);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
total: all.length,
|
||||||
|
byTag,
|
||||||
|
anchorHrefs: anchorHrefs.slice(0, 20),
|
||||||
|
uniqueAnchors: new Set(anchorHrefs).size,
|
||||||
|
imgAlts: imgAlts.slice(0, 20),
|
||||||
|
uniqueImgAlts: new Set(imgAlts).size,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Total elements: ${elementAnalysis.total}`);
|
||||||
|
console.log(`By tag:`, elementAnalysis.byTag);
|
||||||
|
console.log(`Unique anchor hrefs: ${elementAnalysis.uniqueAnchors}`);
|
||||||
|
console.log(`Unique image alts: ${elementAnalysis.uniqueImgAlts}`);
|
||||||
|
console.log(`\nSample anchor hrefs:`, elementAnalysis.anchorHrefs.slice(0, 5));
|
||||||
|
console.log(`Sample image alts:`, elementAnalysis.imgAlts.slice(0, 5));
|
||||||
|
|
||||||
|
// Try to extract using different approaches
|
||||||
|
console.log('\n[2] Testing extraction approaches...');
|
||||||
|
|
||||||
|
const approaches = await page.evaluate(() => {
|
||||||
|
const results: Record<string, { count: number; unique: number; sample: string[] }> = {};
|
||||||
|
|
||||||
|
// Approach 1: Anchor elements with product links
|
||||||
|
const anchors = document.querySelectorAll('a[href*="/product/"]');
|
||||||
|
const anchorNames = new Set<string>();
|
||||||
|
anchors.forEach(a => {
|
||||||
|
const img = a.querySelector('img');
|
||||||
|
const name = img?.getAttribute('alt') || a.textContent?.trim().split('\n')[0] || '';
|
||||||
|
if (name) anchorNames.add(name);
|
||||||
|
});
|
||||||
|
results['a[href*="/product/"]'] = {
|
||||||
|
count: anchors.length,
|
||||||
|
unique: anchorNames.size,
|
||||||
|
sample: Array.from(anchorNames).slice(0, 5),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Approach 2: Images with alt text inside product areas
|
||||||
|
const productImgs = document.querySelectorAll('[class*="product_product__"] img[alt]');
|
||||||
|
const imgNames = new Set<string>();
|
||||||
|
productImgs.forEach(img => {
|
||||||
|
const alt = img.getAttribute('alt');
|
||||||
|
if (alt && alt.length > 2) imgNames.add(alt);
|
||||||
|
});
|
||||||
|
results['[class*="product_product__"] img[alt]'] = {
|
||||||
|
count: productImgs.length,
|
||||||
|
unique: imgNames.size,
|
||||||
|
sample: Array.from(imgNames).slice(0, 5),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Approach 3: H5 elements (product names)
|
||||||
|
const h5s = document.querySelectorAll('h5.product_product__name__JcEk0, h5[class*="product__name"]');
|
||||||
|
const h5Names = new Set<string>();
|
||||||
|
h5s.forEach(h5 => {
|
||||||
|
const text = h5.textContent?.trim();
|
||||||
|
if (text) h5Names.add(text);
|
||||||
|
});
|
||||||
|
results['h5[class*="product__name"]'] = {
|
||||||
|
count: h5s.length,
|
||||||
|
unique: h5Names.size,
|
||||||
|
sample: Array.from(h5Names).slice(0, 5),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Approach 4: Link class with product_product__
|
||||||
|
const links = document.querySelectorAll('a.product_product__ERWtJ, a[class*="product_product__"][class*="link"]');
|
||||||
|
const linkNames = new Set<string>();
|
||||||
|
links.forEach(link => {
|
||||||
|
const h5 = link.querySelector('h5');
|
||||||
|
const img = link.querySelector('img');
|
||||||
|
const name = h5?.textContent?.trim() || img?.getAttribute('alt') || '';
|
||||||
|
if (name) linkNames.add(name);
|
||||||
|
});
|
||||||
|
results['a.product_product__ERWtJ'] = {
|
||||||
|
count: links.length,
|
||||||
|
unique: linkNames.size,
|
||||||
|
sample: Array.from(linkNames).slice(0, 5),
|
||||||
|
};
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
Object.entries(approaches).forEach(([sel, data]) => {
|
||||||
|
console.log(`\n${sel}:`);
|
||||||
|
console.log(` Count: ${data.count}, Unique: ${data.unique}`);
|
||||||
|
console.log(` Sample: ${data.sample.join(', ')}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// The best approach: use images with alt as the source of truth
|
||||||
|
console.log('\n[3] Full product extraction using img[alt] approach...');
|
||||||
|
|
||||||
|
const products = await page.evaluate(() => {
|
||||||
|
const seen = new Set<string>();
|
||||||
|
const products: { name: string; href: string; price: string }[] = [];
|
||||||
|
|
||||||
|
// Get all product links
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
|
||||||
|
const img = a.querySelector('img');
|
||||||
|
const name = img?.getAttribute('alt') || '';
|
||||||
|
|
||||||
|
if (!name || seen.has(name)) return;
|
||||||
|
seen.add(name);
|
||||||
|
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
|
||||||
|
// Get price from within the link or parent
|
||||||
|
let price = '';
|
||||||
|
const priceEl = a.querySelector('[class*="price"]');
|
||||||
|
if (priceEl) {
|
||||||
|
const priceMatch = priceEl.textContent?.match(/\$(\d+(?:\.\d{2})?)/);
|
||||||
|
price = priceMatch ? priceMatch[1] : '';
|
||||||
|
}
|
||||||
|
|
||||||
|
products.push({ name, href, price });
|
||||||
|
});
|
||||||
|
|
||||||
|
return products;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Extracted ${products.length} unique products`);
|
||||||
|
console.log('\nSample products:');
|
||||||
|
products.slice(0, 10).forEach(p => {
|
||||||
|
console.log(` - ${p.name} | ${p.price ? '$' + p.price : 'N/A'} | ${p.href.slice(0, 40)}...`);
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
52
backend/scripts/test-treez-api.ts
Normal file
52
backend/scripts/test-treez-api.ts
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import axios from 'axios';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const url = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
|
||||||
|
|
||||||
|
const query = {
|
||||||
|
from: 0,
|
||||||
|
size: 500,
|
||||||
|
query: {
|
||||||
|
bool: {
|
||||||
|
must: [
|
||||||
|
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
|
||||||
|
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
|
||||||
|
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log('Querying Treez Elasticsearch API...\n');
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.post(url, query, {
|
||||||
|
headers: { 'Content-Type': 'application/json' }
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = response.data;
|
||||||
|
const total = data.hits?.total?.value || data.hits?.total;
|
||||||
|
const products = data.hits?.hits || [];
|
||||||
|
|
||||||
|
console.log('Total products: ' + total);
|
||||||
|
console.log('Products returned: ' + products.length + '\n');
|
||||||
|
|
||||||
|
if (products.length > 0) {
|
||||||
|
const first = products[0]._source;
|
||||||
|
console.log('=== PRODUCT FIELDS AVAILABLE ===\n');
|
||||||
|
console.log(Object.keys(first).sort().join('\n'));
|
||||||
|
|
||||||
|
console.log('\n=== SAMPLE PRODUCT ===\n');
|
||||||
|
console.log(JSON.stringify(first, null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (err: any) {
|
||||||
|
console.log('Error: ' + err.message);
|
||||||
|
if (err.response) {
|
||||||
|
console.log('Status: ' + err.response.status);
|
||||||
|
console.log('Data: ' + JSON.stringify(err.response.data));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
97
backend/scripts/test-treez-auth-api.ts
Normal file
97
backend/scripts/test-treez-auth-api.ts
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
import axios from 'axios';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
// Test Elasticsearch API with API key
|
||||||
|
console.log('=== ELASTICSEARCH API ===\n');
|
||||||
|
|
||||||
|
const esUrl = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
|
||||||
|
const apiKey = 'V3jHL9dFzi3Gj4UISM4lr38Nm0GSxcps5OBz1PbS';
|
||||||
|
|
||||||
|
const query = {
|
||||||
|
from: 0,
|
||||||
|
size: 1000,
|
||||||
|
query: {
|
||||||
|
bool: {
|
||||||
|
must: [
|
||||||
|
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
|
||||||
|
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
|
||||||
|
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.post(esUrl, query, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': apiKey,
|
||||||
|
'Origin': 'https://shop.bestdispensary.com',
|
||||||
|
'Referer': 'https://shop.bestdispensary.com/',
|
||||||
|
},
|
||||||
|
timeout: 30000,
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = response.data;
|
||||||
|
const total = data.hits?.total?.value || data.hits?.total;
|
||||||
|
const products = data.hits?.hits || [];
|
||||||
|
|
||||||
|
console.log('Total products: ' + total);
|
||||||
|
console.log('Products returned: ' + products.length);
|
||||||
|
|
||||||
|
if (products.length > 0) {
|
||||||
|
const first = products[0]._source;
|
||||||
|
console.log('\n=== PRODUCT FIELDS ===\n');
|
||||||
|
console.log(Object.keys(first).sort().join('\n'));
|
||||||
|
|
||||||
|
console.log('\n=== SAMPLE PRODUCT ===\n');
|
||||||
|
console.log(JSON.stringify(first, null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (err: any) {
|
||||||
|
console.log('Elasticsearch Error: ' + err.message);
|
||||||
|
if (err.response) {
|
||||||
|
console.log('Status: ' + err.response.status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test Treez Headless API
|
||||||
|
console.log('\n\n=== TREEZ HEADLESS API ===\n');
|
||||||
|
|
||||||
|
const treezUrl = 'https://headless.treez.io/v2.0/dispensary/best/ecommerce/discounts?excludeInactive=true&hideUnset=true&includeProdInfo=true';
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.get(treezUrl, {
|
||||||
|
headers: {
|
||||||
|
'client_id': '29dce682258145c6b1cf71027282d083',
|
||||||
|
'client_secret': 'A57bB49AfD7F4233B1750a0B501B4E16',
|
||||||
|
'cache-control': 'max-age=0, no-cache, must-revalidate, proxy-revalidate',
|
||||||
|
'Origin': 'https://shop.bestdispensary.com',
|
||||||
|
'Referer': 'https://shop.bestdispensary.com/',
|
||||||
|
},
|
||||||
|
timeout: 30000,
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = response.data;
|
||||||
|
console.log('Response type: ' + typeof data);
|
||||||
|
|
||||||
|
if (Array.isArray(data)) {
|
||||||
|
console.log('Array length: ' + data.length);
|
||||||
|
if (data.length > 0) {
|
||||||
|
console.log('First item: ' + JSON.stringify(data[0], null, 2).slice(0, 1000));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log('Keys: ' + Object.keys(data).join(', '));
|
||||||
|
console.log('Data: ' + JSON.stringify(data, null, 2).slice(0, 2000));
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (err: any) {
|
||||||
|
console.log('Treez Error: ' + err.message);
|
||||||
|
if (err.response) {
|
||||||
|
console.log('Status: ' + err.response.status);
|
||||||
|
console.log('Data: ' + JSON.stringify(err.response.data).slice(0, 500));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
243
backend/scripts/test-treez-brand-products.ts
Normal file
243
backend/scripts/test-treez-brand-products.ts
Normal file
@@ -0,0 +1,243 @@
|
|||||||
|
/**
|
||||||
|
* Visit each brand page and extract products
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
|
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function bypassAgeGate(page: Page): Promise<void> {
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrollToLoadAll(page: Page): Promise<void> {
|
||||||
|
let previousHeight = 0;
|
||||||
|
let sameCount = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < 30; i++) {
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
sameCount++;
|
||||||
|
if (sameCount >= 3) break;
|
||||||
|
} else {
|
||||||
|
sameCount = 0;
|
||||||
|
}
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1000);
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function extractProducts(page: Page): Promise<{ name: string; price: string; href: string }[]> {
|
||||||
|
return page.evaluate(() => {
|
||||||
|
const products: { name: string; price: string; href: string }[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
const img = a.querySelector('img');
|
||||||
|
const h5 = a.querySelector('h5');
|
||||||
|
const name = img?.getAttribute('alt') || h5?.textContent?.trim() || '';
|
||||||
|
|
||||||
|
if (!name || seen.has(name)) return;
|
||||||
|
seen.add(name);
|
||||||
|
|
||||||
|
const priceEl = a.querySelector('[class*="price"]');
|
||||||
|
const priceMatch = priceEl?.textContent?.match(/\$(\d+(?:\.\d{2})?)/);
|
||||||
|
const price = priceMatch ? priceMatch[1] : '';
|
||||||
|
|
||||||
|
products.push({ name, price, href });
|
||||||
|
});
|
||||||
|
|
||||||
|
return products;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Extracting Products from All Brands');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Go to brands page and get all brand links
|
||||||
|
const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||||
|
console.log(`\n[1] Getting brand list from ${brandsUrl}`);
|
||||||
|
|
||||||
|
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
await sleep(3000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
// The 142 items on /brands ARE brands (shown as product cards with brand info)
|
||||||
|
// Get the brand names from the product hrefs (they contain brand name in URL)
|
||||||
|
const brandInfo = await page.evaluate(() => {
|
||||||
|
const brands: { name: string; slug: string }[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
// Extract brand info from product URLs
|
||||||
|
// URL pattern: /product/{brand}-{product}-{details}
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
// Try to extract brand from URL - first segment before product name
|
||||||
|
const match = href.match(/\/product\/([^-]+(?:-[^-]+)?)-/);
|
||||||
|
if (match) {
|
||||||
|
const slug = match[1];
|
||||||
|
if (!seen.has(slug)) {
|
||||||
|
seen.add(slug);
|
||||||
|
// Also look for brand text in the card
|
||||||
|
const brandEl = a.querySelector('[class*="brand"], [class*="Brand"]');
|
||||||
|
const name = brandEl?.textContent?.trim() || slug;
|
||||||
|
brands.push({ name, slug });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return brands;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Found ${brandInfo.length} potential brands from product URLs`);
|
||||||
|
console.log('Sample:', brandInfo.slice(0, 5));
|
||||||
|
|
||||||
|
// Actually, let's look for brand page links directly
|
||||||
|
console.log('\n[2] Looking for brand page links...');
|
||||||
|
|
||||||
|
const brandLinks = await page.evaluate(() => {
|
||||||
|
const links: { name: string; href: string }[] = [];
|
||||||
|
|
||||||
|
// Look for links to /brand/ pages
|
||||||
|
document.querySelectorAll('a[href*="/brand/"]').forEach(a => {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
const text = a.textContent?.trim() || '';
|
||||||
|
if (href && !links.some(l => l.href === href)) {
|
||||||
|
links.push({ name: text, href });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return links;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Found ${brandLinks.length} brand page links`);
|
||||||
|
if (brandLinks.length > 0) {
|
||||||
|
console.log('Sample:', brandLinks.slice(0, 10));
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no brand links, try to find them in section headers
|
||||||
|
console.log('\n[3] Looking for brand sections...');
|
||||||
|
|
||||||
|
const brandSections = await page.evaluate(() => {
|
||||||
|
const sections: { brandName: string; sampleProduct: string }[] = [];
|
||||||
|
|
||||||
|
document.querySelectorAll('[class*="products_product__section"]').forEach(section => {
|
||||||
|
const header = section.querySelector('h2, h3, [class*="heading"]');
|
||||||
|
const brandName = header?.textContent?.trim() || '';
|
||||||
|
const firstProduct = section.querySelector('a[href*="/product/"]');
|
||||||
|
const productName = firstProduct?.querySelector('h5')?.textContent?.trim() ||
|
||||||
|
firstProduct?.querySelector('img')?.getAttribute('alt') || '';
|
||||||
|
|
||||||
|
if (brandName) {
|
||||||
|
sections.push({ brandName, sampleProduct: productName });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return sections;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Found ${brandSections.length} brand sections`);
|
||||||
|
brandSections.slice(0, 10).forEach(s => {
|
||||||
|
console.log(` - Brand: "${s.brandName}" | Sample: "${s.sampleProduct}"`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Try visiting a brand page directly using the section name
|
||||||
|
if (brandSections.length > 0) {
|
||||||
|
console.log('\n[4] Testing brand page URLs...');
|
||||||
|
|
||||||
|
// Try different URL patterns for first brand
|
||||||
|
const testBrand = brandSections[0].brandName;
|
||||||
|
const testSlug = testBrand.toLowerCase().replace(/[^a-z0-9]+/g, '-');
|
||||||
|
|
||||||
|
const urlPatterns = [
|
||||||
|
`/onlinemenu/brand/${encodeURIComponent(testBrand)}`,
|
||||||
|
`/onlinemenu/brand/${testSlug}`,
|
||||||
|
`/brand/${encodeURIComponent(testBrand)}`,
|
||||||
|
`/brand/${testSlug}`,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const path of urlPatterns) {
|
||||||
|
const testUrl = `https://${STORE_ID}.treez.io${path}?customerType=ADULT`;
|
||||||
|
try {
|
||||||
|
console.log(` Trying: ${testUrl}`);
|
||||||
|
await page.goto(testUrl, { waitUntil: 'networkidle2', timeout: 15000 });
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
const products = await extractProducts(page);
|
||||||
|
console.log(` Products found: ${products.length}`);
|
||||||
|
|
||||||
|
if (products.length > 0) {
|
||||||
|
console.log(` ✓ Working URL pattern: ${path}`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (e: any) {
|
||||||
|
console.log(` Error: ${e.message.slice(0, 50)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if clicking on a brand section leads to a brand page
|
||||||
|
console.log('\n[5] Checking if brand sections have clickable headers...');
|
||||||
|
|
||||||
|
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
const clickableHeaders = await page.evaluate(() => {
|
||||||
|
const results: { text: string; tag: string; href: string; clickable: boolean }[] = [];
|
||||||
|
|
||||||
|
document.querySelectorAll('[class*="products_product__section"] h2, [class*="products_product__section"] h3').forEach(header => {
|
||||||
|
const link = header.closest('a') || header.querySelector('a');
|
||||||
|
const text = header.textContent?.trim() || '';
|
||||||
|
const href = link?.getAttribute('href') || '';
|
||||||
|
|
||||||
|
results.push({
|
||||||
|
text,
|
||||||
|
tag: header.tagName,
|
||||||
|
href,
|
||||||
|
clickable: !!link,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Section headers:');
|
||||||
|
clickableHeaders.slice(0, 10).forEach(h => {
|
||||||
|
console.log(` [${h.tag}] "${h.text}" - ${h.clickable ? `Link: ${h.href}` : 'Not clickable'}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
183
backend/scripts/test-treez-brands-detailed.ts
Normal file
183
backend/scripts/test-treez-brands-detailed.ts
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
/**
|
||||||
|
* Detailed brand section analysis
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
|
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function bypassAgeGate(page: Page): Promise<void> {
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log(' Age gate detected, bypassing...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Detailed Brand Section Analysis');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||||
|
console.log(`\nNavigating to ${url}`);
|
||||||
|
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
await sleep(3000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
// Scroll multiple times to load all content
|
||||||
|
console.log('\n[1] Scrolling to load all content...');
|
||||||
|
let previousHeight = 0;
|
||||||
|
let scrollCount = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < 30; i++) {
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1500);
|
||||||
|
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
const productCount = await page.evaluate(() =>
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').length
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` Scroll ${i + 1}: height=${currentHeight}, products=${productCount}`);
|
||||||
|
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
scrollCount++;
|
||||||
|
if (scrollCount >= 3) break;
|
||||||
|
} else {
|
||||||
|
scrollCount = 0;
|
||||||
|
}
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look at ALL h2/h3 headers on page
|
||||||
|
console.log('\n[2] Finding ALL h2/h3 headers on page...');
|
||||||
|
|
||||||
|
const headers = await page.evaluate(() => {
|
||||||
|
const results: { tag: string; text: string; parentClass: string }[] = [];
|
||||||
|
|
||||||
|
document.querySelectorAll('h2, h3').forEach((el: Element) => {
|
||||||
|
results.push({
|
||||||
|
tag: el.tagName,
|
||||||
|
text: el.textContent?.trim().slice(0, 80) || '',
|
||||||
|
parentClass: el.parentElement?.className?.slice(0, 50) || '',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Found ${headers.length} headers:`);
|
||||||
|
headers.forEach((h: { tag: string; text: string }) =>
|
||||||
|
console.log(` [${h.tag}] "${h.text}"`)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Get products grouped by their section heading
|
||||||
|
console.log('\n[3] Getting products per section...');
|
||||||
|
|
||||||
|
const sectionProducts = await page.evaluate(() => {
|
||||||
|
const results: { heading: string; products: number }[] = [];
|
||||||
|
|
||||||
|
// Find all sections that contain products
|
||||||
|
document.querySelectorAll('[class*="products_product__section"]').forEach((section: Element) => {
|
||||||
|
const heading = section.querySelector('h2, h3');
|
||||||
|
const headingText = heading?.textContent?.trim() || 'Unknown';
|
||||||
|
const products = section.querySelectorAll('a[href*="/product/"]');
|
||||||
|
|
||||||
|
results.push({
|
||||||
|
heading: headingText,
|
||||||
|
products: products.length,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Found ${sectionProducts.length} brand sections:`);
|
||||||
|
let totalProducts = 0;
|
||||||
|
sectionProducts.forEach((s: { heading: string; products: number }) => {
|
||||||
|
console.log(` ${s.heading}: ${s.products} products`);
|
||||||
|
totalProducts += s.products;
|
||||||
|
});
|
||||||
|
console.log(`\nTotal products across all sections: ${totalProducts}`);
|
||||||
|
|
||||||
|
// Also extract brand from each product's URL/card
|
||||||
|
console.log('\n[4] Extracting brand from product URLs/cards...');
|
||||||
|
|
||||||
|
const brandCounts = await page.evaluate(() => {
|
||||||
|
const byBrand: Record<string, number> = {};
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
const img = a.querySelector('img');
|
||||||
|
const name = img?.getAttribute('alt') || '';
|
||||||
|
|
||||||
|
if (!name || seen.has(name)) return;
|
||||||
|
seen.add(name);
|
||||||
|
|
||||||
|
// Try to find brand from the card
|
||||||
|
const brandEl = a.querySelector('[class*="brand"], [class*="Brand"], span, p');
|
||||||
|
let brand = '';
|
||||||
|
|
||||||
|
// Try various methods to find brand
|
||||||
|
const allSpans = a.querySelectorAll('span, p');
|
||||||
|
allSpans.forEach((span: Element) => {
|
||||||
|
const text = span.textContent?.trim() || '';
|
||||||
|
if (text && text.length < 50 && text !== name && !text.includes('$')) {
|
||||||
|
if (!brand) brand = text;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Fallback: get brand from parent section heading
|
||||||
|
if (!brand) {
|
||||||
|
const section = a.closest('[class*="products_product__section"]');
|
||||||
|
const heading = section?.querySelector('h2, h3');
|
||||||
|
brand = heading?.textContent?.trim() || 'Unknown';
|
||||||
|
}
|
||||||
|
|
||||||
|
byBrand[brand] = (byBrand[brand] || 0) + 1;
|
||||||
|
});
|
||||||
|
|
||||||
|
return byBrand;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Products by brand:');
|
||||||
|
Object.entries(brandCounts)
|
||||||
|
.sort((a, b) => (b[1] as number) - (a[1] as number))
|
||||||
|
.forEach(([brand, count]) => {
|
||||||
|
console.log(` ${brand}: ${count}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
const uniqueTotal = Object.values(brandCounts).reduce((sum: number, c) => sum + (c as number), 0);
|
||||||
|
console.log(`\nTotal unique products: ${uniqueTotal}`);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
257
backend/scripts/test-treez-brands.ts
Normal file
257
backend/scripts/test-treez-brands.ts
Normal file
@@ -0,0 +1,257 @@
|
|||||||
|
/**
|
||||||
|
* Test Treez brand-based product extraction
|
||||||
|
* 1. Load /brands page
|
||||||
|
* 2. Click "load more brands" to get all brands
|
||||||
|
* 3. Extract brand URLs
|
||||||
|
* 4. Visit each brand and extract products
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
|
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function bypassAgeGate(page: Page): Promise<void> {
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log('[AgeGate] Detected, bypassing...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadAllBrands(page: Page): Promise<void> {
|
||||||
|
console.log('[Brands] Looking for "load more" option...');
|
||||||
|
|
||||||
|
// Look for select/dropdown with "load more" or "all brands" option
|
||||||
|
const selectInfo = await page.evaluate(() => {
|
||||||
|
const selects = document.querySelectorAll('select');
|
||||||
|
const info: { selector: string; options: string[] }[] = [];
|
||||||
|
|
||||||
|
selects.forEach((sel, i) => {
|
||||||
|
const options = Array.from(sel.options).map(o => o.text);
|
||||||
|
info.push({ selector: `select:nth-of-type(${i + 1})`, options });
|
||||||
|
});
|
||||||
|
|
||||||
|
return info;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('[Brands] Found selects:', JSON.stringify(selectInfo, null, 2));
|
||||||
|
|
||||||
|
// Look for any button or link with "load more" or "show all"
|
||||||
|
const loadMoreButtons = await page.evaluate(() => {
|
||||||
|
const elements = document.querySelectorAll('button, a, [role="button"]');
|
||||||
|
const matches: { text: string; tag: string }[] = [];
|
||||||
|
|
||||||
|
elements.forEach(el => {
|
||||||
|
const text = el.textContent?.toLowerCase() || '';
|
||||||
|
if (text.includes('load more') || text.includes('show all') || text.includes('view all')) {
|
||||||
|
matches.push({ text: el.textContent?.trim() || '', tag: el.tagName });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return matches;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('[Brands] Found load more buttons:', loadMoreButtons);
|
||||||
|
|
||||||
|
// Try to find and interact with the brands dropdown
|
||||||
|
// First, let's see all interactive elements with "brand" in them
|
||||||
|
const brandElements = await page.evaluate(() => {
|
||||||
|
const all = document.querySelectorAll('*');
|
||||||
|
const matches: { tag: string; class: string; text: string }[] = [];
|
||||||
|
|
||||||
|
all.forEach(el => {
|
||||||
|
const className = el.className?.toString?.() || '';
|
||||||
|
const text = el.textContent?.trim().slice(0, 100) || '';
|
||||||
|
if (className.toLowerCase().includes('brand') || className.toLowerCase().includes('select')) {
|
||||||
|
matches.push({
|
||||||
|
tag: el.tagName,
|
||||||
|
class: className.slice(0, 100),
|
||||||
|
text: text.slice(0, 50),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return matches.slice(0, 20);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('[Brands] Brand-related elements:', JSON.stringify(brandElements.slice(0, 10), null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function extractBrandLinks(page: Page): Promise<{ name: string; url: string }[]> {
|
||||||
|
const brands = await page.evaluate(() => {
|
||||||
|
const links: { name: string; url: string }[] = [];
|
||||||
|
|
||||||
|
// Look for brand cards/links
|
||||||
|
const selectors = [
|
||||||
|
'a[href*="/brand/"]',
|
||||||
|
'a[href*="/brands/"]',
|
||||||
|
'[class*="brand"] a',
|
||||||
|
'[class*="Brand"] a',
|
||||||
|
];
|
||||||
|
|
||||||
|
selectors.forEach(sel => {
|
||||||
|
document.querySelectorAll(sel).forEach(el => {
|
||||||
|
const href = el.getAttribute('href');
|
||||||
|
const name = el.textContent?.trim() || '';
|
||||||
|
if (href && name && !links.some(l => l.url === href)) {
|
||||||
|
links.push({ name, url: href });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return links;
|
||||||
|
});
|
||||||
|
|
||||||
|
return brands;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function extractProductsFromBrandPage(page: Page): Promise<any[]> {
|
||||||
|
// Scroll to load all products
|
||||||
|
let previousHeight = 0;
|
||||||
|
let scrollCount = 0;
|
||||||
|
let sameHeightCount = 0;
|
||||||
|
|
||||||
|
while (scrollCount < 20) {
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
sameHeightCount++;
|
||||||
|
if (sameHeightCount >= 3) break;
|
||||||
|
} else {
|
||||||
|
sameHeightCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1000);
|
||||||
|
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
scrollCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract products
|
||||||
|
const products = await page.evaluate(() => {
|
||||||
|
const results: any[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
document.querySelectorAll('[class*="product_product__"]').forEach(el => {
|
||||||
|
const nameEl = el.querySelector('[class*="product__name"], [class*="name__"]');
|
||||||
|
const name = nameEl?.textContent?.trim() || '';
|
||||||
|
|
||||||
|
if (!name || seen.has(name)) return;
|
||||||
|
seen.add(name);
|
||||||
|
|
||||||
|
const priceEl = el.querySelector('[class*="price"]');
|
||||||
|
const priceText = priceEl?.textContent || '';
|
||||||
|
const priceMatch = priceText.match(/\$(\d+(?:\.\d{2})?)/);
|
||||||
|
const price = priceMatch ? parseFloat(priceMatch[1]) : null;
|
||||||
|
|
||||||
|
const linkEl = el.querySelector('a[href*="/product/"]');
|
||||||
|
let productId = '';
|
||||||
|
if (linkEl) {
|
||||||
|
const href = linkEl.getAttribute('href') || '';
|
||||||
|
const match = href.match(/\/product\/([^\/?]+)/);
|
||||||
|
productId = match ? match[1] : '';
|
||||||
|
}
|
||||||
|
|
||||||
|
results.push({
|
||||||
|
productId: productId || `treez_${name.replace(/\s+/g, '_').toLowerCase().slice(0, 30)}`,
|
||||||
|
name,
|
||||||
|
price,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
return products;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Testing Treez Brand-Based Extraction');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
// Block images
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Navigate to brands page
|
||||||
|
const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||||
|
console.log(`\n[1] Navigating to ${brandsUrl}`);
|
||||||
|
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
await sleep(2000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(1000);
|
||||||
|
|
||||||
|
// Screenshot to see what we're working with
|
||||||
|
await page.screenshot({ path: '/tmp/treez-brands-page.png', fullPage: false });
|
||||||
|
console.log('[1] Screenshot saved to /tmp/treez-brands-page.png');
|
||||||
|
|
||||||
|
// Try to load all brands
|
||||||
|
console.log('\n[2] Exploring brand selection options...');
|
||||||
|
await loadAllBrands(page);
|
||||||
|
|
||||||
|
// Extract brand links
|
||||||
|
console.log('\n[3] Extracting brand links...');
|
||||||
|
const brandLinks = await extractBrandLinks(page);
|
||||||
|
console.log(`Found ${brandLinks.length} brand links:`);
|
||||||
|
brandLinks.slice(0, 10).forEach(b => console.log(` - ${b.name}: ${b.url}`));
|
||||||
|
|
||||||
|
// If we found brand links, visit a couple to test
|
||||||
|
if (brandLinks.length > 0) {
|
||||||
|
console.log('\n[4] Testing product extraction from first 3 brands...');
|
||||||
|
|
||||||
|
let totalProducts = 0;
|
||||||
|
const allProducts: any[] = [];
|
||||||
|
|
||||||
|
for (const brand of brandLinks.slice(0, 3)) {
|
||||||
|
const brandUrl = brand.url.startsWith('http')
|
||||||
|
? brand.url
|
||||||
|
: `https://${STORE_ID}.treez.io${brand.url}`;
|
||||||
|
|
||||||
|
console.log(`\n Visiting brand: ${brand.name}`);
|
||||||
|
console.log(` URL: ${brandUrl}`);
|
||||||
|
|
||||||
|
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
const products = await extractProductsFromBrandPage(page);
|
||||||
|
console.log(` Products found: ${products.length}`);
|
||||||
|
|
||||||
|
allProducts.push(...products.map(p => ({ ...p, brand: brand.name })));
|
||||||
|
totalProducts += products.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n[5] Summary from 3 brands: ${totalProducts} products`);
|
||||||
|
console.log(`Estimated total (${brandLinks.length} brands): ~${Math.round(totalProducts / 3 * brandLinks.length)} products`);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Error:', error.message);
|
||||||
|
} finally {
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
113
backend/scripts/test-treez-capture-auth.ts
Normal file
113
backend/scripts/test-treez-capture-auth.ts
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
// Capture request headers for API calls
|
||||||
|
const apiRequests: any[] = [];
|
||||||
|
|
||||||
|
page.on('request', (req) => {
|
||||||
|
const url = req.url();
|
||||||
|
if (url.includes('treez.io') || url.includes('gapcommerce')) {
|
||||||
|
apiRequests.push({
|
||||||
|
url: url,
|
||||||
|
method: req.method(),
|
||||||
|
headers: req.headers(),
|
||||||
|
postData: req.postData(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Loading page to capture API auth headers...\n');
|
||||||
|
|
||||||
|
await page.goto('https://shop.bestdispensary.com/shop', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('=== API REQUESTS WITH HEADERS ===\n');
|
||||||
|
|
||||||
|
apiRequests.forEach((req, i) => {
|
||||||
|
console.log((i+1) + '. ' + req.method + ' ' + req.url.slice(0, 100));
|
||||||
|
console.log(' Headers:');
|
||||||
|
Object.entries(req.headers).forEach(([k, v]) => {
|
||||||
|
if (k.toLowerCase().includes('auth') ||
|
||||||
|
k.toLowerCase().includes('token') ||
|
||||||
|
k.toLowerCase().includes('key') ||
|
||||||
|
k.toLowerCase().includes('api') ||
|
||||||
|
k.toLowerCase() === 'authorization' ||
|
||||||
|
k.toLowerCase() === 'x-api-key') {
|
||||||
|
console.log(' >>> ' + k + ': ' + v);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
// Show all headers for treez.io requests
|
||||||
|
if (req.url.includes('headless.treez.io')) {
|
||||||
|
console.log(' ALL HEADERS:');
|
||||||
|
Object.entries(req.headers).forEach(([k, v]) => {
|
||||||
|
console.log(' ' + k + ': ' + String(v).slice(0, 80));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
console.log('');
|
||||||
|
});
|
||||||
|
|
||||||
|
// Also check for API keys in page scripts
|
||||||
|
console.log('=== CHECKING FOR API KEYS IN PAGE ===\n');
|
||||||
|
|
||||||
|
const pageData = await page.evaluate(() => {
|
||||||
|
const data: any = {};
|
||||||
|
|
||||||
|
// Check window object for API keys
|
||||||
|
const win = window as any;
|
||||||
|
if (win.__NEXT_DATA__) {
|
||||||
|
data.nextData = win.__NEXT_DATA__;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for any global config
|
||||||
|
if (win.config || win.CONFIG) {
|
||||||
|
data.config = win.config || win.CONFIG;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look for treez-related globals
|
||||||
|
Object.keys(win).forEach(key => {
|
||||||
|
if (key.toLowerCase().includes('treez') ||
|
||||||
|
key.toLowerCase().includes('api') ||
|
||||||
|
key.toLowerCase().includes('config')) {
|
||||||
|
try {
|
||||||
|
data[key] = JSON.stringify(win[key]).slice(0, 500);
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return data;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (pageData.nextData?.props?.pageProps) {
|
||||||
|
console.log('Next.js pageProps keys: ' + Object.keys(pageData.nextData.props.pageProps).join(', '));
|
||||||
|
}
|
||||||
|
if (pageData.nextData?.runtimeConfig) {
|
||||||
|
console.log('Runtime config: ' + JSON.stringify(pageData.nextData.runtimeConfig).slice(0, 500));
|
||||||
|
}
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
100
backend/scripts/test-treez-capture-response.ts
Normal file
100
backend/scripts/test-treez-capture-response.ts
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
import fs from 'fs';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
// Capture ES API responses
|
||||||
|
let allProductData: any[] = [];
|
||||||
|
|
||||||
|
page.on('response', async (res) => {
|
||||||
|
const url = res.url();
|
||||||
|
if (url.includes('gapcommerceapi.com/product/search') && res.status() === 200) {
|
||||||
|
try {
|
||||||
|
const json = await res.json();
|
||||||
|
const products = json.hits?.hits?.map((h: any) => h._source) || [];
|
||||||
|
allProductData = allProductData.concat(products);
|
||||||
|
console.log('Captured ' + products.length + ' products (total: ' + allProductData.length + ')');
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Loading /shop page to capture product data...\n');
|
||||||
|
|
||||||
|
await page.goto('https://shop.bestdispensary.com/shop', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Click load more many times to get all products
|
||||||
|
console.log('\nClicking Load More to capture all products...');
|
||||||
|
for (let i = 0; i < 50; i++) {
|
||||||
|
const btn = await page.$('button.collection__load-more');
|
||||||
|
if (!btn) {
|
||||||
|
console.log('No more Load More button');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const isVisible = await page.evaluate((b) => {
|
||||||
|
const rect = b.getBoundingClientRect();
|
||||||
|
return rect.width > 0 && rect.height > 0;
|
||||||
|
}, btn);
|
||||||
|
|
||||||
|
if (!isVisible) {
|
||||||
|
console.log('Load More not visible');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
await btn.click();
|
||||||
|
await sleep(1500);
|
||||||
|
console.log('Click ' + (i+1) + ': ' + allProductData.length + ' total products');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n=== RESULTS ===\n');
|
||||||
|
console.log('Total products captured: ' + allProductData.length);
|
||||||
|
|
||||||
|
if (allProductData.length > 0) {
|
||||||
|
// Dedupe by some ID
|
||||||
|
const seen = new Set();
|
||||||
|
const unique = allProductData.filter(p => {
|
||||||
|
const id = p.id || p.productId || p.name;
|
||||||
|
if (seen.has(id)) return false;
|
||||||
|
seen.add(id);
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Unique products: ' + unique.length);
|
||||||
|
console.log('\n=== PRODUCT FIELDS ===\n');
|
||||||
|
console.log(Object.keys(unique[0]).sort().join('\n'));
|
||||||
|
|
||||||
|
console.log('\n=== SAMPLE PRODUCT ===\n');
|
||||||
|
console.log(JSON.stringify(unique[0], null, 2));
|
||||||
|
|
||||||
|
// Save to file
|
||||||
|
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(unique, null, 2));
|
||||||
|
console.log('\nSaved to /tmp/treez-products.json');
|
||||||
|
}
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
88
backend/scripts/test-treez-capture-text.ts
Normal file
88
backend/scripts/test-treez-capture-text.ts
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
import fs from 'fs';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
// Capture ES API responses as text
|
||||||
|
let allProducts: any[] = [];
|
||||||
|
|
||||||
|
page.on('response', async (res) => {
|
||||||
|
const url = res.url();
|
||||||
|
if (url.includes('gapcommerceapi.com/product/search')) {
|
||||||
|
console.log('ES Response: status=' + res.status());
|
||||||
|
if (res.status() === 200) {
|
||||||
|
try {
|
||||||
|
const text = await res.text();
|
||||||
|
console.log('Response length: ' + text.length);
|
||||||
|
const json = JSON.parse(text);
|
||||||
|
const products = json.hits?.hits?.map((h: any) => h._source) || [];
|
||||||
|
allProducts = allProducts.concat(products);
|
||||||
|
console.log('Got ' + products.length + ' products (total: ' + allProducts.length + ')');
|
||||||
|
} catch (err: any) {
|
||||||
|
console.log('Parse error: ' + err.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Loading page...\n');
|
||||||
|
|
||||||
|
await page.goto('https://shop.bestdispensary.com/shop', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(5000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log('Bypassing age gate...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(3000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for initial products to load
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
console.log('\nInitial products captured: ' + allProducts.length);
|
||||||
|
|
||||||
|
// Try scrolling to trigger more loads
|
||||||
|
console.log('\nScrolling...');
|
||||||
|
for (let i = 0; i < 20; i++) {
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1500);
|
||||||
|
|
||||||
|
// Also click load more if present
|
||||||
|
try {
|
||||||
|
await page.click('button.collection__load-more');
|
||||||
|
console.log('Clicked load more');
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n=== FINAL RESULTS ===\n');
|
||||||
|
console.log('Total products: ' + allProducts.length);
|
||||||
|
|
||||||
|
if (allProducts.length > 0) {
|
||||||
|
console.log('\nFields: ' + Object.keys(allProducts[0]).sort().join(', '));
|
||||||
|
console.log('\nSample:\n' + JSON.stringify(allProducts[0], null, 2));
|
||||||
|
|
||||||
|
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(allProducts, null, 2));
|
||||||
|
console.log('\nSaved to /tmp/treez-products.json');
|
||||||
|
}
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
192
backend/scripts/test-treez-categories.ts
Normal file
192
backend/scripts/test-treez-categories.ts
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
/**
|
||||||
|
* Navigate to each category page and count products
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
|
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function bypassAgeGate(page: Page): Promise<void> {
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrollToLoadAll(page: Page): Promise<void> {
|
||||||
|
let previousHeight = 0;
|
||||||
|
let scrollCount = 0;
|
||||||
|
let sameCount = 0;
|
||||||
|
|
||||||
|
while (scrollCount < 50) {
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
sameCount++;
|
||||||
|
if (sameCount >= 3) break;
|
||||||
|
} else {
|
||||||
|
sameCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1500);
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
scrollCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function countProducts(page: Page): Promise<number> {
|
||||||
|
return page.evaluate(() => {
|
||||||
|
const seen = new Set<string>();
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
|
||||||
|
const img = a.querySelector('img');
|
||||||
|
const name = img?.getAttribute('alt') || a.querySelector('h5')?.textContent?.trim() || '';
|
||||||
|
if (name) seen.add(name);
|
||||||
|
});
|
||||||
|
return seen.size;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Testing Treez Category Pages');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Categories from the nav menu
|
||||||
|
const categories = [
|
||||||
|
'cartridges',
|
||||||
|
'flower',
|
||||||
|
'pre-rolls',
|
||||||
|
'edibles',
|
||||||
|
'extracts',
|
||||||
|
'tinctures',
|
||||||
|
'capsules',
|
||||||
|
'topicals',
|
||||||
|
'accessories',
|
||||||
|
'drink',
|
||||||
|
];
|
||||||
|
|
||||||
|
const results: { category: string; products: number }[] = [];
|
||||||
|
let ageGateBypassed = false;
|
||||||
|
|
||||||
|
for (const category of categories) {
|
||||||
|
// Try different URL patterns
|
||||||
|
const urls = [
|
||||||
|
`https://${STORE_ID}.treez.io/onlinemenu/${category}?customerType=ADULT`,
|
||||||
|
`https://${STORE_ID}.treez.io/onlinemenu/category/${category}?customerType=ADULT`,
|
||||||
|
`https://${STORE_ID}.treez.io/${category}?customerType=ADULT`,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const url of urls) {
|
||||||
|
try {
|
||||||
|
console.log(`\nTrying: ${url}`);
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
if (!ageGateBypassed) {
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
ageGateBypassed = true;
|
||||||
|
await sleep(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
const initialCount = await countProducts(page);
|
||||||
|
if (initialCount > 0) {
|
||||||
|
console.log(` Initial: ${initialCount} products`);
|
||||||
|
|
||||||
|
await scrollToLoadAll(page);
|
||||||
|
const finalCount = await countProducts(page);
|
||||||
|
console.log(` After scroll: ${finalCount} products`);
|
||||||
|
|
||||||
|
results.push({ category, products: finalCount });
|
||||||
|
break; // Found working URL, move to next category
|
||||||
|
} else {
|
||||||
|
console.log(` No products found`);
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log(` Error: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also try the main shop page
|
||||||
|
console.log('\nTrying main shop page...');
|
||||||
|
try {
|
||||||
|
const shopUrl = `https://${STORE_ID}.treez.io/onlinemenu/shop?customerType=ADULT`;
|
||||||
|
await page.goto(shopUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
const initialCount = await countProducts(page);
|
||||||
|
console.log(`Shop page initial: ${initialCount} products`);
|
||||||
|
|
||||||
|
if (initialCount > 0) {
|
||||||
|
await scrollToLoadAll(page);
|
||||||
|
const finalCount = await countProducts(page);
|
||||||
|
console.log(`Shop page after scroll: ${finalCount} products`);
|
||||||
|
results.push({ category: 'shop', products: finalCount });
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log(`Shop page error: ${error.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to find and click on category links from the nav
|
||||||
|
console.log('\n[Alternative] Trying to find nav category links...');
|
||||||
|
|
||||||
|
const homeUrl = `https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`;
|
||||||
|
await page.goto(homeUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
await sleep(3000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(1000);
|
||||||
|
|
||||||
|
const navLinks = await page.evaluate(() => {
|
||||||
|
const links: { text: string; href: string }[] = [];
|
||||||
|
document.querySelectorAll('nav a, [class*="nav"] a').forEach(a => {
|
||||||
|
const text = a.textContent?.trim() || '';
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
if (href && text && !links.some(l => l.href === href)) {
|
||||||
|
links.push({ text, href });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return links;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Nav links found:');
|
||||||
|
navLinks.forEach(l => console.log(` - "${l.text}" → ${l.href}`));
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('Summary');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
let total = 0;
|
||||||
|
results.forEach(r => {
|
||||||
|
console.log(`${r.category}: ${r.products} products`);
|
||||||
|
total += r.products;
|
||||||
|
});
|
||||||
|
console.log(`\nTotal across categories: ${total} products`);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
178
backend/scripts/test-treez-client.ts
Normal file
178
backend/scripts/test-treez-client.ts
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
/**
|
||||||
|
* ============================================================
|
||||||
|
* TREEZ CLIENT TEST SCRIPT
|
||||||
|
* ============================================================
|
||||||
|
*
|
||||||
|
* Tests the Treez CDP interception client using Best Dispensary.
|
||||||
|
*
|
||||||
|
* This verifies:
|
||||||
|
* - Stealth plugin bypasses headless detection
|
||||||
|
* - CDP intercepts Elasticsearch API responses
|
||||||
|
* - Products are captured and normalized correctly
|
||||||
|
* - Inventory data is available
|
||||||
|
*
|
||||||
|
* Usage: npx ts-node scripts/test-treez-client.ts
|
||||||
|
*
|
||||||
|
* ============================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { fetchProductsFromUrl } from '../src/platforms/treez';
|
||||||
|
|
||||||
|
const TEST_URL = 'https://shop.bestdispensary.com/shop';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('TREEZ CLIENT TEST - CDP INTERCEPTION');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`URL: ${TEST_URL}`);
|
||||||
|
console.log('Method: Puppeteer + Stealth + CDP response capture');
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
try {
|
||||||
|
console.log('[Starting] Launching browser with Stealth plugin...\n');
|
||||||
|
|
||||||
|
const result = await fetchProductsFromUrl(TEST_URL);
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('RESULTS');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Total products: ${result.totalCaptured}`);
|
||||||
|
console.log(`Store ID: ${result.storeId || 'N/A (custom domain)'}`);
|
||||||
|
console.log(`Source URL: ${result.sourceUrl}`);
|
||||||
|
console.log(`Fetched at: ${result.fetchedAt.toISOString()}`);
|
||||||
|
|
||||||
|
if (result.products.length === 0) {
|
||||||
|
console.log('\n[WARNING] No products captured!');
|
||||||
|
console.log('This could mean:');
|
||||||
|
console.log(' - Stealth plugin is not bypassing detection');
|
||||||
|
console.log(' - CDP is not intercepting the correct URLs');
|
||||||
|
console.log(' - Page structure has changed');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show sample raw product
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('SAMPLE RAW PRODUCT (from Elasticsearch)');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
const raw = result.products[0];
|
||||||
|
console.log(JSON.stringify({
|
||||||
|
id: raw.id,
|
||||||
|
name: raw.name,
|
||||||
|
menuTitle: raw.menuTitle,
|
||||||
|
brand: raw.brand,
|
||||||
|
category: raw.category,
|
||||||
|
subtype: raw.subtype,
|
||||||
|
status: raw.status,
|
||||||
|
availableUnits: raw.availableUnits,
|
||||||
|
customMinPrice: raw.customMinPrice,
|
||||||
|
customMaxPrice: raw.customMaxPrice,
|
||||||
|
isActive: raw.isActive,
|
||||||
|
isAboveThreshold: raw.isAboveThreshold,
|
||||||
|
}, null, 2));
|
||||||
|
|
||||||
|
// Show sample normalized product
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('SAMPLE NORMALIZED PRODUCT');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
const normalized = result.normalized[0];
|
||||||
|
console.log(JSON.stringify({
|
||||||
|
id: normalized.id,
|
||||||
|
name: normalized.name,
|
||||||
|
brand: normalized.brand,
|
||||||
|
category: normalized.category,
|
||||||
|
subtype: normalized.subtype,
|
||||||
|
price: normalized.price,
|
||||||
|
priceMin: normalized.priceMin,
|
||||||
|
priceMax: normalized.priceMax,
|
||||||
|
discountedPrice: normalized.discountedPrice,
|
||||||
|
discountPercent: normalized.discountPercent,
|
||||||
|
availableUnits: normalized.availableUnits,
|
||||||
|
inStock: normalized.inStock,
|
||||||
|
thcPercent: normalized.thcPercent,
|
||||||
|
cbdPercent: normalized.cbdPercent,
|
||||||
|
strainType: normalized.strainType,
|
||||||
|
effects: normalized.effects,
|
||||||
|
flavors: normalized.flavors,
|
||||||
|
imageUrl: normalized.imageUrl,
|
||||||
|
images: normalized.images?.slice(0, 2),
|
||||||
|
}, null, 2));
|
||||||
|
|
||||||
|
// Brand breakdown
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('BRANDS (top 15)');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
const brandCounts = new Map<string, number>();
|
||||||
|
for (const p of result.normalized) {
|
||||||
|
const brand = p.brand || 'Unknown';
|
||||||
|
brandCounts.set(brand, (brandCounts.get(brand) || 0) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const sorted = [...brandCounts.entries()].sort((a, b) => b[1] - a[1]);
|
||||||
|
console.log(`Total unique brands: ${sorted.length}\n`);
|
||||||
|
sorted.slice(0, 15).forEach(([brand, count]) => {
|
||||||
|
console.log(` ${brand}: ${count} products`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Category breakdown
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('CATEGORIES');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
const categoryCounts = new Map<string, number>();
|
||||||
|
for (const p of result.normalized) {
|
||||||
|
const cat = p.category || 'Unknown';
|
||||||
|
categoryCounts.set(cat, (categoryCounts.get(cat) || 0) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const catSorted = [...categoryCounts.entries()].sort((a, b) => b[1] - a[1]);
|
||||||
|
catSorted.forEach(([cat, count]) => {
|
||||||
|
console.log(` ${cat}: ${count} products`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Inventory stats
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('INVENTORY STATS');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
const inStock = result.normalized.filter(p => p.inStock).length;
|
||||||
|
const outOfStock = result.normalized.filter(p => !p.inStock).length;
|
||||||
|
const hasInventoryData = result.normalized.filter(p => p.availableUnits > 0).length;
|
||||||
|
|
||||||
|
console.log(`In stock: ${inStock}`);
|
||||||
|
console.log(`Out of stock: ${outOfStock}`);
|
||||||
|
console.log(`With inventory levels: ${hasInventoryData}`);
|
||||||
|
|
||||||
|
// Show inventory examples
|
||||||
|
if (hasInventoryData > 0) {
|
||||||
|
console.log('\nSample inventory levels:');
|
||||||
|
result.normalized
|
||||||
|
.filter(p => p.availableUnits > 0)
|
||||||
|
.slice(0, 5)
|
||||||
|
.forEach(p => {
|
||||||
|
console.log(` ${p.name}: ${p.availableUnits} units`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for THC/CBD data
|
||||||
|
const hasThc = result.normalized.filter(p => p.thcPercent !== null).length;
|
||||||
|
const hasCbd = result.normalized.filter(p => p.cbdPercent !== null).length;
|
||||||
|
console.log(`\nWith THC data: ${hasThc} (${Math.round(hasThc / result.totalCaptured * 100)}%)`);
|
||||||
|
console.log(`With CBD data: ${hasCbd} (${Math.round(hasCbd / result.totalCaptured * 100)}%)`);
|
||||||
|
|
||||||
|
// Check for images
|
||||||
|
const hasImages = result.normalized.filter(p => p.imageUrl).length;
|
||||||
|
console.log(`With images: ${hasImages} (${Math.round(hasImages / result.totalCaptured * 100)}%)`);
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('TEST PASSED');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('\n' + '='.repeat(60));
|
||||||
|
console.error('TEST FAILED');
|
||||||
|
console.error('='.repeat(60));
|
||||||
|
console.error(`Error: ${error.message}`);
|
||||||
|
console.error(error.stack);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
160
backend/scripts/test-treez-containers.ts
Normal file
160
backend/scripts/test-treez-containers.ts
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
/**
|
||||||
|
* Find the correct product card container selector
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
|
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function bypassAgeGate(page: Page): Promise<void> {
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('Finding Treez product card containers...\n');
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
await sleep(3000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
// Find product card containers by looking for elements that contain both name AND price
|
||||||
|
const analysis = await page.evaluate(() => {
|
||||||
|
// Strategy: find all H5 elements (which contain names), then get their parent containers
|
||||||
|
const nameElements = document.querySelectorAll('h5.product_product__name__JcEk0');
|
||||||
|
const containers: Map<string, { count: number; sample: string }> = new Map();
|
||||||
|
|
||||||
|
nameElements.forEach(nameEl => {
|
||||||
|
// Walk up to find the product card container
|
||||||
|
let current = nameEl.parentElement;
|
||||||
|
let depth = 0;
|
||||||
|
|
||||||
|
while (current && depth < 10) {
|
||||||
|
const className = current.className?.toString?.() || '';
|
||||||
|
|
||||||
|
// Look for ProductCard in the class name
|
||||||
|
if (className.includes('ProductCard')) {
|
||||||
|
const key = className.slice(0, 100);
|
||||||
|
const existing = containers.get(key) || { count: 0, sample: '' };
|
||||||
|
existing.count++;
|
||||||
|
if (!existing.sample) {
|
||||||
|
existing.sample = current.outerHTML.slice(0, 300);
|
||||||
|
}
|
||||||
|
containers.set(key, existing);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
current = current.parentElement;
|
||||||
|
depth++;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return Array.from(containers.entries()).map(([cls, data]) => ({
|
||||||
|
class: cls,
|
||||||
|
count: data.count,
|
||||||
|
sample: data.sample,
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Product card containers found:');
|
||||||
|
analysis.forEach(({ class: cls, count, sample }) => {
|
||||||
|
console.log(`\n[${count}x] ${cls}`);
|
||||||
|
console.log(`Sample: ${sample.slice(0, 200)}...`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Now test various container selectors
|
||||||
|
console.log('\n\n--- Testing container selectors ---');
|
||||||
|
|
||||||
|
const selectorTests = await page.evaluate(() => {
|
||||||
|
const tests: Record<string, { total: number; withName: number; withPrice: number }> = {};
|
||||||
|
|
||||||
|
const selectors = [
|
||||||
|
'[class*="ProductCardWithBtn"]',
|
||||||
|
'[class*="ProductCard_product"]',
|
||||||
|
'[class*="ProductCard__"]',
|
||||||
|
'article[class*="product"]',
|
||||||
|
'div[class*="ProductCard"]',
|
||||||
|
'a[class*="ProductCard"]',
|
||||||
|
'[class*="product_product__"][class*="link"]',
|
||||||
|
'article',
|
||||||
|
];
|
||||||
|
|
||||||
|
selectors.forEach(sel => {
|
||||||
|
const elements = document.querySelectorAll(sel);
|
||||||
|
let withName = 0;
|
||||||
|
let withPrice = 0;
|
||||||
|
|
||||||
|
elements.forEach(el => {
|
||||||
|
if (el.querySelector('h5, [class*="product__name"]')) withName++;
|
||||||
|
if (el.querySelector('[class*="price"]')) withPrice++;
|
||||||
|
});
|
||||||
|
|
||||||
|
tests[sel] = { total: elements.length, withName, withPrice };
|
||||||
|
});
|
||||||
|
|
||||||
|
return tests;
|
||||||
|
});
|
||||||
|
|
||||||
|
Object.entries(selectorTests).forEach(([sel, { total, withName, withPrice }]) => {
|
||||||
|
console.log(`${sel}: ${total} total, ${withName} with name, ${withPrice} with price`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get the actual product card class pattern
|
||||||
|
console.log('\n\n--- Finding exact product card class ---');
|
||||||
|
|
||||||
|
const exactClasses = await page.evaluate(() => {
|
||||||
|
// Find elements that have both h5 name AND price child
|
||||||
|
const allElements = document.querySelectorAll('*');
|
||||||
|
const matches: { tag: string; class: string }[] = [];
|
||||||
|
|
||||||
|
allElements.forEach(el => {
|
||||||
|
const hasName = el.querySelector('h5.product_product__name__JcEk0');
|
||||||
|
const hasPrice = el.querySelector('[class*="price__ins"], [class*="price__"]');
|
||||||
|
|
||||||
|
if (hasName && hasPrice) {
|
||||||
|
const className = el.className?.toString?.() || '';
|
||||||
|
if (className && !matches.some(m => m.class === className)) {
|
||||||
|
matches.push({ tag: el.tagName, class: className.slice(0, 150) });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return matches;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Elements containing both name and price:');
|
||||||
|
exactClasses.forEach(({ tag, class: cls }) => {
|
||||||
|
console.log(` [${tag}] ${cls}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
559
backend/scripts/test-treez-discovery.ts
Normal file
559
backend/scripts/test-treez-discovery.ts
Normal file
@@ -0,0 +1,559 @@
|
|||||||
|
/**
|
||||||
|
* Treez Platform Smoke Test
|
||||||
|
*
|
||||||
|
* Discovers DOM structure and extracts products from Treez menu pages.
|
||||||
|
* Used to determine actual CSS selectors for the platform client.
|
||||||
|
*
|
||||||
|
* Usage: npx ts-node scripts/test-treez-discovery.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
|
import puppeteerExtra from 'puppeteer-extra';
|
||||||
|
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||||
|
|
||||||
|
// Register stealth plugin (even though Treez doesn't use Cloudflare, good practice)
|
||||||
|
puppeteerExtra.use(StealthPlugin());
|
||||||
|
|
||||||
|
const TEST_URL = 'https://best.treez.io/onlinemenu/?customerType=ADULT';
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
interface TreezProductRaw {
|
||||||
|
productId: string;
|
||||||
|
name: string;
|
||||||
|
brand: string;
|
||||||
|
category: string;
|
||||||
|
subcategory: string;
|
||||||
|
thcPercent: number | null;
|
||||||
|
cbdPercent: number | null;
|
||||||
|
price: number | null;
|
||||||
|
priceUnit: string;
|
||||||
|
imageUrl: string | null;
|
||||||
|
inStock: boolean;
|
||||||
|
weight: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scroll to load all products (infinite scroll)
|
||||||
|
*/
|
||||||
|
async function scrollToLoadAll(page: Page, maxScrolls = 30): Promise<number> {
|
||||||
|
let previousHeight = 0;
|
||||||
|
let scrollCount = 0;
|
||||||
|
let sameHeightCount = 0;
|
||||||
|
|
||||||
|
console.log('[Scroll] Starting infinite scroll...');
|
||||||
|
|
||||||
|
while (scrollCount < maxScrolls) {
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
sameHeightCount++;
|
||||||
|
if (sameHeightCount >= 3) {
|
||||||
|
console.log('[Scroll] No new content after 3 attempts, stopping');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sameHeightCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1500); // Wait for products to load
|
||||||
|
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
scrollCount++;
|
||||||
|
|
||||||
|
// Check how many products we have
|
||||||
|
const productCount = await page.evaluate(() => {
|
||||||
|
// Try multiple possible selectors
|
||||||
|
const selectors = [
|
||||||
|
'[class*="product"]',
|
||||||
|
'[class*="Product"]',
|
||||||
|
'[data-product]',
|
||||||
|
'.menu-item',
|
||||||
|
'[class*="card"]',
|
||||||
|
'[class*="Card"]',
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const sel of selectors) {
|
||||||
|
const els = document.querySelectorAll(sel);
|
||||||
|
if (els.length > 10) return els.length;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`[Scroll] Scroll ${scrollCount}: height=${currentHeight}, products~${productCount}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return scrollCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze DOM structure to find product selectors
|
||||||
|
*/
|
||||||
|
async function analyzeDOM(page: Page): Promise<void> {
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('DOM STRUCTURE ANALYSIS');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
// Find elements with "product" in class name
|
||||||
|
const productClasses = await page.evaluate(() => {
|
||||||
|
const classes = new Set<string>();
|
||||||
|
document.querySelectorAll('*').forEach((el) => {
|
||||||
|
const className = el.className;
|
||||||
|
if (typeof className === 'string' && className.toLowerCase().includes('product')) {
|
||||||
|
className.split(' ').forEach((c) => {
|
||||||
|
if (c.toLowerCase().includes('product')) {
|
||||||
|
classes.add(c);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return Array.from(classes).slice(0, 20);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\n[Classes containing "product"]:');
|
||||||
|
productClasses.forEach((c: string) => console.log(` .${c}`));
|
||||||
|
|
||||||
|
// Find elements with "card" in class name
|
||||||
|
const cardClasses = await page.evaluate(() => {
|
||||||
|
const classes = new Set<string>();
|
||||||
|
document.querySelectorAll('*').forEach((el) => {
|
||||||
|
const className = el.className;
|
||||||
|
if (typeof className === 'string' && className.toLowerCase().includes('card')) {
|
||||||
|
className.split(' ').forEach((c) => {
|
||||||
|
if (c.toLowerCase().includes('card')) {
|
||||||
|
classes.add(c);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return Array.from(classes).slice(0, 20);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\n[Classes containing "card"]:');
|
||||||
|
cardClasses.forEach((c: string) => console.log(` .${c}`));
|
||||||
|
|
||||||
|
// Find data attributes
|
||||||
|
const dataAttrs = await page.evaluate(() => {
|
||||||
|
const attrs = new Set<string>();
|
||||||
|
document.querySelectorAll('*').forEach((el) => {
|
||||||
|
Array.from(el.attributes).forEach((attr) => {
|
||||||
|
if (attr.name.startsWith('data-') && !attr.name.includes('reactid')) {
|
||||||
|
attrs.add(attr.name);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
return Array.from(attrs).slice(0, 30);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\n[Data attributes found]:');
|
||||||
|
dataAttrs.forEach((attr: string) => console.log(` ${attr}`));
|
||||||
|
|
||||||
|
// Get sample HTML of potential product container
|
||||||
|
const sampleHTML = await page.evaluate(() => {
|
||||||
|
// Try to find a product container
|
||||||
|
const selectors = [
|
||||||
|
'[class*="ProductCard"]',
|
||||||
|
'[class*="product-card"]',
|
||||||
|
'[class*="menuItem"]',
|
||||||
|
'[class*="menu-item"]',
|
||||||
|
'[data-testid*="product"]',
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const sel of selectors) {
|
||||||
|
const el = document.querySelector(sel);
|
||||||
|
if (el) {
|
||||||
|
return {
|
||||||
|
selector: sel,
|
||||||
|
html: el.outerHTML.slice(0, 2000),
|
||||||
|
childCount: el.children.length,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: find repeating structures
|
||||||
|
const containers = document.querySelectorAll('div[class]');
|
||||||
|
const classCounts = new Map<string, number>();
|
||||||
|
|
||||||
|
containers.forEach((el) => {
|
||||||
|
if (el.children.length > 2 && el.className) {
|
||||||
|
classCounts.set(el.className, (classCounts.get(el.className) || 0) + 1);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Find class that appears many times (likely product cards)
|
||||||
|
let bestClass = '';
|
||||||
|
let bestCount = 0;
|
||||||
|
classCounts.forEach((count, className) => {
|
||||||
|
if (count > bestCount && count > 5) {
|
||||||
|
bestCount = count;
|
||||||
|
bestClass = className;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (bestClass) {
|
||||||
|
const el = document.querySelector(`.${bestClass.split(' ')[0]}`);
|
||||||
|
if (el) {
|
||||||
|
return {
|
||||||
|
selector: `.${bestClass.split(' ')[0]}`,
|
||||||
|
html: el.outerHTML.slice(0, 2000),
|
||||||
|
childCount: el.children.length,
|
||||||
|
count: bestCount,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (sampleHTML) {
|
||||||
|
console.log('\n[Sample Product Container]:');
|
||||||
|
console.log(` Selector: ${sampleHTML.selector}`);
|
||||||
|
console.log(` Children: ${sampleHTML.childCount}`);
|
||||||
|
if ((sampleHTML as any).count) {
|
||||||
|
console.log(` Occurrences: ${(sampleHTML as any).count}`);
|
||||||
|
}
|
||||||
|
console.log('\n[Sample HTML (first 1000 chars)]:');
|
||||||
|
console.log(sampleHTML.html.slice(0, 1000));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract products using discovered selectors
|
||||||
|
* Based on DOM analysis of Treez/GapCommerce React app
|
||||||
|
*/
|
||||||
|
async function extractProducts(page: Page): Promise<TreezProductRaw[]> {
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('PRODUCT EXTRACTION');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const products = await page.evaluate(() => {
|
||||||
|
const results: any[] = [];
|
||||||
|
|
||||||
|
// Treez uses classes like: product_product__ERWtJ
|
||||||
|
// Find all product cards using the discovered class patterns
|
||||||
|
const productSelectors = [
|
||||||
|
'[class*="product_product__"]', // Main product container
|
||||||
|
'[class*="ProductCard"]', // Alternative pattern
|
||||||
|
];
|
||||||
|
|
||||||
|
let productElements: Element[] = [];
|
||||||
|
|
||||||
|
for (const selector of productSelectors) {
|
||||||
|
const elements = document.querySelectorAll(selector);
|
||||||
|
// Filter to only get the actual product cards, not child elements
|
||||||
|
const filtered = Array.from(elements).filter(el => {
|
||||||
|
// Must have a name element and price
|
||||||
|
const hasName = el.querySelector('[class*="product__name"]') || el.querySelector('[class*="name__"]');
|
||||||
|
const hasPrice = el.querySelector('[class*="price"]');
|
||||||
|
return hasName || hasPrice;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (filtered.length > 0) {
|
||||||
|
productElements = filtered;
|
||||||
|
console.log(`Found ${filtered.length} products with selector: ${selector}`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dedupe - some cards may be captured multiple times
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
// Extract data from each product element
|
||||||
|
for (const el of productElements) {
|
||||||
|
try {
|
||||||
|
// Get product name - look for name class
|
||||||
|
const nameEl = el.querySelector('[class*="product__name"], [class*="name__"]');
|
||||||
|
const name = nameEl?.textContent?.trim() || '';
|
||||||
|
|
||||||
|
if (!name || seen.has(name)) continue;
|
||||||
|
seen.add(name);
|
||||||
|
|
||||||
|
// Get product ID from link
|
||||||
|
const linkEl = el.querySelector('a[href*="/product/"]');
|
||||||
|
let productId = '';
|
||||||
|
if (linkEl) {
|
||||||
|
const href = linkEl.getAttribute('href') || '';
|
||||||
|
const match = href.match(/\/product\/([^\/\?]+)/);
|
||||||
|
productId = match ? match[1] : '';
|
||||||
|
}
|
||||||
|
if (!productId) {
|
||||||
|
productId = `treez_${name.replace(/\s+/g, '_').toLowerCase().slice(0, 30)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get brand from the info section or product name parsing
|
||||||
|
const brandEl = el.querySelector('[class*="brand"], [class*="Brand"]');
|
||||||
|
let brand = brandEl?.textContent?.trim() || '';
|
||||||
|
|
||||||
|
// Get price - look for price class with $ symbol
|
||||||
|
const priceEl = el.querySelector('[class*="price__ins"], [class*="price"]');
|
||||||
|
const priceText = priceEl?.textContent || '';
|
||||||
|
const priceMatch = priceText.match(/\$(\d+(?:\.\d{2})?)/);
|
||||||
|
const price = priceMatch ? parseFloat(priceMatch[1]) : null;
|
||||||
|
|
||||||
|
// Get image URL
|
||||||
|
const imgEl = el.querySelector('img');
|
||||||
|
let imageUrl = imgEl?.getAttribute('src') || null;
|
||||||
|
// Handle Next.js image optimization URLs
|
||||||
|
if (imageUrl && imageUrl.includes('/_next/image')) {
|
||||||
|
const urlMatch = imageUrl.match(/url=([^&]+)/);
|
||||||
|
if (urlMatch) {
|
||||||
|
imageUrl = decodeURIComponent(urlMatch[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get text content for THC/CBD extraction
|
||||||
|
const text = el.textContent || '';
|
||||||
|
|
||||||
|
// Get THC/CBD - look for patterns like "THC 25.5%" or "25.5% THC"
|
||||||
|
const thcMatch = text.match(/(?:THC[:\s]*)?(\d+(?:\.\d+)?)\s*%?\s*THC/i) ||
|
||||||
|
text.match(/THC[:\s]*(\d+(?:\.\d+)?)\s*%?/i);
|
||||||
|
const cbdMatch = text.match(/(?:CBD[:\s]*)?(\d+(?:\.\d+)?)\s*%?\s*CBD/i) ||
|
||||||
|
text.match(/CBD[:\s]*(\d+(?:\.\d+)?)\s*%?/i);
|
||||||
|
const thcPercent = thcMatch ? parseFloat(thcMatch[1]) : null;
|
||||||
|
const cbdPercent = cbdMatch ? parseFloat(cbdMatch[1]) : null;
|
||||||
|
|
||||||
|
// Get weight from name or text (e.g., "3.5G", "1G")
|
||||||
|
const weightMatch = name.match(/(\d+(?:\.\d+)?)\s*(G|g|MG|mg|OZ|oz)/i) ||
|
||||||
|
text.match(/(\d+(?:\.\d+)?)\s*(G|g|MG|mg|OZ|oz)/i);
|
||||||
|
const weight = weightMatch ? `${weightMatch[1]}${weightMatch[2].toLowerCase()}` : null;
|
||||||
|
|
||||||
|
// Price unit from weight
|
||||||
|
let priceUnit = '';
|
||||||
|
if (weight) {
|
||||||
|
priceUnit = weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get category/strain type
|
||||||
|
const strainTypes = ['indica', 'sativa', 'hybrid'];
|
||||||
|
let subcategory = '';
|
||||||
|
const textLower = text.toLowerCase();
|
||||||
|
for (const strain of strainTypes) {
|
||||||
|
if (textLower.includes(strain)) {
|
||||||
|
subcategory = strain;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine category from various signals
|
||||||
|
let category = '';
|
||||||
|
const categoryPatterns = [
|
||||||
|
{ pattern: /flower|bud/i, category: 'flower' },
|
||||||
|
{ pattern: /vape|cart|pen/i, category: 'vape' },
|
||||||
|
{ pattern: /edible|gummy|chocolate/i, category: 'edible' },
|
||||||
|
{ pattern: /concentrate|dab|wax|shatter/i, category: 'concentrate' },
|
||||||
|
{ pattern: /pre.?roll|joint/i, category: 'pre-roll' },
|
||||||
|
{ pattern: /topical|balm|cream/i, category: 'topical' },
|
||||||
|
{ pattern: /tincture/i, category: 'tincture' },
|
||||||
|
];
|
||||||
|
for (const { pattern, category: cat } of categoryPatterns) {
|
||||||
|
if (pattern.test(text)) {
|
||||||
|
category = cat;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check stock status
|
||||||
|
const inStock = !textLower.includes('out of stock') && !textLower.includes('sold out');
|
||||||
|
|
||||||
|
results.push({
|
||||||
|
productId,
|
||||||
|
name,
|
||||||
|
brand,
|
||||||
|
category,
|
||||||
|
subcategory,
|
||||||
|
thcPercent,
|
||||||
|
cbdPercent,
|
||||||
|
price,
|
||||||
|
priceUnit,
|
||||||
|
imageUrl,
|
||||||
|
inStock,
|
||||||
|
weight,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
console.log('Error extracting product:', err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
return products;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bypass age gate if present
|
||||||
|
*/
|
||||||
|
async function bypassAgeGate(page: Page): Promise<boolean> {
|
||||||
|
console.log('[Age Gate] Checking for age gate...');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Wait for either age gate or main content
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"], [class*="AgeGate"]');
|
||||||
|
|
||||||
|
if (ageGate) {
|
||||||
|
console.log('[Age Gate] Age gate detected, clicking confirm button...');
|
||||||
|
|
||||||
|
// Click the submit button
|
||||||
|
const submitBtn = await page.$('[data-testid="age-gate-submit-button"], button[type="submit"]');
|
||||||
|
if (submitBtn) {
|
||||||
|
await submitBtn.click();
|
||||||
|
console.log('[Age Gate] Clicked confirm button');
|
||||||
|
|
||||||
|
// Wait for age gate to disappear and menu to load
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
// Wait for navigation or content change
|
||||||
|
await page.waitForFunction(
|
||||||
|
() => !document.querySelector('[data-testid="age-gate-modal"]'),
|
||||||
|
{ timeout: 10000 }
|
||||||
|
).catch(() => {
|
||||||
|
console.log('[Age Gate] Gate may still be visible, continuing anyway');
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('[Age Gate] Age gate bypassed');
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
console.log('[Age Gate] No submit button found');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log('[Age Gate] No age gate detected');
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
} catch (err: any) {
|
||||||
|
console.log(`[Age Gate] Error: ${err.message}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('TREEZ PLATFORM SMOKE TEST');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Store ID: ${STORE_ID}`);
|
||||||
|
console.log(`URL: ${TEST_URL}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
const browser = await puppeteerExtra.launch({
|
||||||
|
headless: true,
|
||||||
|
args: [
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-setuid-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-blink-features=AutomationControlled',
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
// Set viewport
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
// Set user agent
|
||||||
|
await page.setUserAgent(
|
||||||
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log('[Navigation] Going to Treez menu page...');
|
||||||
|
await page.goto(TEST_URL, {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000,
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('[Navigation] Page loaded, waiting for React app...');
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
|
||||||
|
// Wait for menu content to load
|
||||||
|
console.log('[Navigation] Waiting for menu content...');
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Check if page loaded correctly
|
||||||
|
const pageTitle = await page.title();
|
||||||
|
console.log(`[Navigation] Page title: ${pageTitle}`);
|
||||||
|
|
||||||
|
// Take a screenshot for debugging
|
||||||
|
await page.screenshot({ path: '/tmp/treez-smoke-test.png', fullPage: false });
|
||||||
|
console.log('[Debug] Screenshot saved to /tmp/treez-smoke-test.png');
|
||||||
|
|
||||||
|
// Analyze DOM structure
|
||||||
|
await analyzeDOM(page);
|
||||||
|
|
||||||
|
// Scroll to load all products
|
||||||
|
await scrollToLoadAll(page);
|
||||||
|
|
||||||
|
// Extract products
|
||||||
|
const products = await extractProducts(page);
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('RESULTS');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Total products extracted: ${products.length}`);
|
||||||
|
|
||||||
|
if (products.length > 0) {
|
||||||
|
// Show statistics
|
||||||
|
const withPrice = products.filter((p) => p.price !== null).length;
|
||||||
|
const withThc = products.filter((p) => p.thcPercent !== null).length;
|
||||||
|
const withBrand = products.filter((p) => p.brand).length;
|
||||||
|
const withImage = products.filter((p) => p.imageUrl).length;
|
||||||
|
|
||||||
|
console.log(`\n[Data Quality]`);
|
||||||
|
console.log(` With price: ${withPrice}/${products.length} (${Math.round((withPrice / products.length) * 100)}%)`);
|
||||||
|
console.log(` With THC%: ${withThc}/${products.length} (${Math.round((withThc / products.length) * 100)}%)`);
|
||||||
|
console.log(` With brand: ${withBrand}/${products.length} (${Math.round((withBrand / products.length) * 100)}%)`);
|
||||||
|
console.log(` With image: ${withImage}/${products.length} (${Math.round((withImage / products.length) * 100)}%)`);
|
||||||
|
|
||||||
|
// Show sample products
|
||||||
|
console.log('\n[Sample Products (first 10)]:');
|
||||||
|
for (const p of products.slice(0, 10)) {
|
||||||
|
console.log(`\n ${p.name}`);
|
||||||
|
console.log(` ID: ${p.productId}`);
|
||||||
|
console.log(` Brand: ${p.brand || 'N/A'}`);
|
||||||
|
console.log(` Category: ${p.category || 'N/A'} / ${p.subcategory || 'N/A'}`);
|
||||||
|
console.log(` THC: ${p.thcPercent !== null ? p.thcPercent + '%' : 'N/A'}`);
|
||||||
|
console.log(` CBD: ${p.cbdPercent !== null ? p.cbdPercent + '%' : 'N/A'}`);
|
||||||
|
console.log(` Price: ${p.price !== null ? '$' + p.price : 'N/A'} ${p.priceUnit}`);
|
||||||
|
console.log(` Weight: ${p.weight || 'N/A'}`);
|
||||||
|
console.log(` Image: ${p.imageUrl?.slice(0, 60) || 'N/A'}...`);
|
||||||
|
console.log(` In Stock: ${p.inStock}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save full results to file
|
||||||
|
const fs = await import('fs');
|
||||||
|
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(products, null, 2));
|
||||||
|
console.log('\n[Debug] Full product list saved to /tmp/treez-products.json');
|
||||||
|
} else {
|
||||||
|
console.log('\n[WARNING] No products extracted!');
|
||||||
|
console.log('Check /tmp/treez-smoke-test.png for page state');
|
||||||
|
|
||||||
|
// Dump page HTML for debugging
|
||||||
|
const html = await page.content();
|
||||||
|
const fs = await import('fs');
|
||||||
|
fs.writeFileSync('/tmp/treez-page.html', html);
|
||||||
|
console.log('[Debug] Page HTML saved to /tmp/treez-page.html');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log(products.length > 0 ? 'SMOKE TEST PASSED' : 'SMOKE TEST NEEDS ADJUSTMENT');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('\n' + '='.repeat(60));
|
||||||
|
console.error('SMOKE TEST FAILED');
|
||||||
|
console.error('='.repeat(60));
|
||||||
|
console.error(`Error: ${error.message}`);
|
||||||
|
console.error(error.stack);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
191
backend/scripts/test-treez-find-brands.ts
Normal file
191
backend/scripts/test-treez-find-brands.ts
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
/**
|
||||||
|
* Find actual brand elements on /brands page
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
|
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function bypassAgeGate(page: Page): Promise<void> {
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log(' Age gate detected, bypassing...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Finding Brand Elements on /brands page');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||||
|
console.log(`\nNavigating to ${url}`);
|
||||||
|
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
await sleep(3000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
// Check current URL
|
||||||
|
const currentUrl = page.url();
|
||||||
|
console.log(`\nCurrent URL: ${currentUrl}`);
|
||||||
|
|
||||||
|
// Look for ANY links on the page that might be brand links
|
||||||
|
console.log('\n[1] Looking for all anchor links with "brand" in href or class...');
|
||||||
|
|
||||||
|
const brandLinks = await page.evaluate(() => {
|
||||||
|
const links: { href: string; text: string }[] = [];
|
||||||
|
|
||||||
|
document.querySelectorAll('a').forEach((a: Element) => {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
const text = a.textContent?.trim().slice(0, 50) || '';
|
||||||
|
const className = a.className || '';
|
||||||
|
|
||||||
|
if (href.includes('brand') || href.includes('Brand') ||
|
||||||
|
className.includes('brand') || className.includes('Brand')) {
|
||||||
|
links.push({ href, text });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return links;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Found ${brandLinks.length} brand-related links:`);
|
||||||
|
brandLinks.slice(0, 30).forEach(l => console.log(` "${l.text}" → ${l.href}`));
|
||||||
|
|
||||||
|
// Look for the navigation/dropdown
|
||||||
|
console.log('\n[2] Looking at navigation structure...');
|
||||||
|
|
||||||
|
const navItems = await page.evaluate(() => {
|
||||||
|
const items: string[] = [];
|
||||||
|
|
||||||
|
document.querySelectorAll('nav a, [class*="nav"] a, header a').forEach((a: Element) => {
|
||||||
|
const text = a.textContent?.trim();
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
if (text && text.length < 30) {
|
||||||
|
items.push(`${text} (${href})`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return [...new Set(items)];
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Navigation items:');
|
||||||
|
navItems.forEach(item => console.log(` - ${item}`));
|
||||||
|
|
||||||
|
// Look for grid containers that might hold brand cards
|
||||||
|
console.log('\n[3] Looking for brand card containers...');
|
||||||
|
|
||||||
|
const containers = await page.evaluate(() => {
|
||||||
|
const results: { selector: string; count: number; sample: string }[] = [];
|
||||||
|
|
||||||
|
// Try various selectors for brand cards
|
||||||
|
const selectors = [
|
||||||
|
'[class*="brand_brand"]',
|
||||||
|
'[class*="brands_brand"]',
|
||||||
|
'[class*="brand-card"]',
|
||||||
|
'[class*="brandCard"]',
|
||||||
|
'[class*="BrandCard"]',
|
||||||
|
'a[href*="/brand/"]',
|
||||||
|
'[data-testid*="brand"]',
|
||||||
|
];
|
||||||
|
|
||||||
|
selectors.forEach(sel => {
|
||||||
|
const els = document.querySelectorAll(sel);
|
||||||
|
if (els.length > 0) {
|
||||||
|
const first = els[0];
|
||||||
|
results.push({
|
||||||
|
selector: sel,
|
||||||
|
count: els.length,
|
||||||
|
sample: first.textContent?.trim().slice(0, 50) || '',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Brand containers found:');
|
||||||
|
containers.forEach(c => console.log(` ${c.selector}: ${c.count} elements, sample: "${c.sample}"`));
|
||||||
|
|
||||||
|
// Get ALL unique hrefs that contain /brand/
|
||||||
|
console.log('\n[4] All links containing "/brand/" in href...');
|
||||||
|
|
||||||
|
const brandHrefs = await page.evaluate(() => {
|
||||||
|
const hrefs: string[] = [];
|
||||||
|
|
||||||
|
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||||
|
const href = a.getAttribute('href');
|
||||||
|
if (href && !hrefs.includes(href)) {
|
||||||
|
hrefs.push(href);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return hrefs;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Found ${brandHrefs.length} unique brand hrefs:`);
|
||||||
|
brandHrefs.forEach(href => console.log(` ${href}`));
|
||||||
|
|
||||||
|
// Take screenshot
|
||||||
|
await page.screenshot({ path: '/tmp/treez-brands-page.png', fullPage: false });
|
||||||
|
console.log('\n[5] Screenshot saved to /tmp/treez-brands-page.png');
|
||||||
|
|
||||||
|
// Scroll and see if more brands load
|
||||||
|
console.log('\n[6] Scrolling to load more brands...');
|
||||||
|
|
||||||
|
for (let i = 0; i < 10; i++) {
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1500);
|
||||||
|
|
||||||
|
const brandCount = await page.evaluate(() =>
|
||||||
|
document.querySelectorAll('a[href*="/brand/"]').length
|
||||||
|
);
|
||||||
|
const productCount = await page.evaluate(() =>
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').length
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` Scroll ${i + 1}: brand links=${brandCount}, product links=${productCount}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final brand href list
|
||||||
|
const finalBrandHrefs = await page.evaluate(() => {
|
||||||
|
const hrefs: string[] = [];
|
||||||
|
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||||
|
const href = a.getAttribute('href');
|
||||||
|
if (href && !hrefs.includes(href)) hrefs.push(href);
|
||||||
|
});
|
||||||
|
return hrefs;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`\n[7] Final brand href list (${finalBrandHrefs.length} brands):`);
|
||||||
|
finalBrandHrefs.forEach(href => console.log(` ${href}`));
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
221
backend/scripts/test-treez-full-crawl.ts
Normal file
221
backend/scripts/test-treez-full-crawl.ts
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
/**
|
||||||
|
* Full crawl: Visit each brand page and aggregate all products
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
|
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function bypassAgeGate(page: Page): Promise<void> {
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function scrollToLoadAll(page: Page): Promise<void> {
|
||||||
|
let previousHeight = 0;
|
||||||
|
let sameCount = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < 30; i++) {
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
sameCount++;
|
||||||
|
if (sameCount >= 3) break;
|
||||||
|
} else {
|
||||||
|
sameCount = 0;
|
||||||
|
}
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1000);
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function extractProducts(page: Page): Promise<{ name: string; brand: string; price: string; href: string }[]> {
|
||||||
|
return page.evaluate(() => {
|
||||||
|
const products: { name: string; brand: string; price: string; href: string }[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
const img = a.querySelector('img');
|
||||||
|
const h5 = a.querySelector('h5');
|
||||||
|
const name = img?.getAttribute('alt') || h5?.textContent?.trim() || '';
|
||||||
|
|
||||||
|
if (!name || seen.has(href)) return;
|
||||||
|
seen.add(href);
|
||||||
|
|
||||||
|
// Extract brand from href pattern: /product/{brand}-{product}
|
||||||
|
const brandMatch = href.match(/\/product\/([^\/]+)/);
|
||||||
|
const productSlug = brandMatch ? brandMatch[1] : '';
|
||||||
|
|
||||||
|
const priceEl = a.querySelector('[class*="price"]');
|
||||||
|
const priceMatch = priceEl?.textContent?.match(/\$(\d+(?:\.\d{2})?)/);
|
||||||
|
const price = priceMatch ? priceMatch[1] : '';
|
||||||
|
|
||||||
|
products.push({ name, brand: productSlug.split('-')[0] || '', price, href });
|
||||||
|
});
|
||||||
|
|
||||||
|
return products;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Full Treez Crawl - All Brands');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step 1: Go to brands page and extract all brand links
|
||||||
|
const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||||
|
console.log(`\n[1] Getting brand list...`);
|
||||||
|
|
||||||
|
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
await sleep(3000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
// Get all brand links from the page
|
||||||
|
const brandLinks = await page.evaluate(() => {
|
||||||
|
const links: string[] = [];
|
||||||
|
const seen = new Set<string>();
|
||||||
|
|
||||||
|
// Get all /brand/ links
|
||||||
|
document.querySelectorAll('a[href*="/brand/"]').forEach(a => {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
if (href && !seen.has(href)) {
|
||||||
|
seen.add(href);
|
||||||
|
links.push(href);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return links;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Found ${brandLinks.length} brand links: ${brandLinks.join(', ')}`);
|
||||||
|
|
||||||
|
// Step 2: Also extract unique brands from product URLs
|
||||||
|
const productBrands = await page.evaluate(() => {
|
||||||
|
const brands = new Set<string>();
|
||||||
|
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
|
||||||
|
const href = a.getAttribute('href') || '';
|
||||||
|
// Pattern: /product/{brand}-{product}-...
|
||||||
|
// Extract first part before first hyphen that looks like brand
|
||||||
|
const match = href.match(/\/product\/([a-z0-9]+(?:-[a-z0-9]+)?)-/i);
|
||||||
|
if (match) {
|
||||||
|
brands.add(match[1].toLowerCase());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return Array.from(brands);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Found ${productBrands.length} brands from product URLs`);
|
||||||
|
|
||||||
|
// Step 3: Build full brand URL list
|
||||||
|
const allBrandUrls = new Set<string>();
|
||||||
|
|
||||||
|
// Add direct brand links
|
||||||
|
brandLinks.forEach(link => {
|
||||||
|
if (link.startsWith('/')) {
|
||||||
|
allBrandUrls.add(`https://${STORE_ID}.treez.io${link}`);
|
||||||
|
} else {
|
||||||
|
allBrandUrls.add(link);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Add brand URLs from product slugs
|
||||||
|
productBrands.forEach(brand => {
|
||||||
|
allBrandUrls.add(`https://${STORE_ID}.treez.io/brand/${encodeURIComponent(brand)}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Total brand URLs to visit: ${allBrandUrls.size}`);
|
||||||
|
|
||||||
|
// Step 4: Visit each brand page and collect products
|
||||||
|
const allProducts = new Map<string, { name: string; brand: string; price: string; href: string }>();
|
||||||
|
let visitedBrands = 0;
|
||||||
|
|
||||||
|
for (const brandUrl of allBrandUrls) {
|
||||||
|
try {
|
||||||
|
const fullUrl = brandUrl.includes('customerType') ? brandUrl : `${brandUrl}?customerType=ADULT`;
|
||||||
|
console.log(`\n[${++visitedBrands}/${allBrandUrls.size}] Visiting: ${fullUrl}`);
|
||||||
|
|
||||||
|
await page.goto(fullUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
await sleep(1500);
|
||||||
|
|
||||||
|
// Scroll to load all
|
||||||
|
await scrollToLoadAll(page);
|
||||||
|
|
||||||
|
const products = await extractProducts(page);
|
||||||
|
console.log(` Found ${products.length} products`);
|
||||||
|
|
||||||
|
products.forEach(p => {
|
||||||
|
if (!allProducts.has(p.href)) {
|
||||||
|
allProducts.set(p.href, p);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(` Total unique so far: ${allProducts.size}`);
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log(` Error: ${error.message.slice(0, 50)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Small delay between requests
|
||||||
|
await sleep(500);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('SUMMARY');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log(`Brands visited: ${visitedBrands}`);
|
||||||
|
console.log(`Total unique products: ${allProducts.size}`);
|
||||||
|
|
||||||
|
// Count by brand
|
||||||
|
const brandCounts: Record<string, number> = {};
|
||||||
|
allProducts.forEach(p => {
|
||||||
|
brandCounts[p.brand] = (brandCounts[p.brand] || 0) + 1;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\nProducts by brand:');
|
||||||
|
Object.entries(brandCounts)
|
||||||
|
.sort((a, b) => b[1] - a[1])
|
||||||
|
.slice(0, 20)
|
||||||
|
.forEach(([brand, count]) => {
|
||||||
|
console.log(` ${brand}: ${count}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sample products
|
||||||
|
console.log('\nSample products:');
|
||||||
|
Array.from(allProducts.values()).slice(0, 10).forEach(p => {
|
||||||
|
console.log(` - ${p.name} | ${p.brand} | $${p.price || 'N/A'}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
57
backend/scripts/test-treez-headless-api.ts
Normal file
57
backend/scripts/test-treez-headless-api.ts
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
import axios from 'axios';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const storeId = 'best';
|
||||||
|
const baseUrl = `https://headless.treez.io/v2.0/dispensary/${storeId}`;
|
||||||
|
|
||||||
|
// Try various endpoints
|
||||||
|
const endpoints = [
|
||||||
|
'/ecommerce/discounts?excludeInactive=true&hideUnset=true&includeProdInfo=true',
|
||||||
|
'/ecommerce/products',
|
||||||
|
'/products',
|
||||||
|
'/menu',
|
||||||
|
'/inventory',
|
||||||
|
'/catalog',
|
||||||
|
];
|
||||||
|
|
||||||
|
console.log('Testing Treez Headless API endpoints...\n');
|
||||||
|
|
||||||
|
for (const endpoint of endpoints) {
|
||||||
|
const url = baseUrl + endpoint;
|
||||||
|
console.log('GET ' + url);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.get(url, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': 'application/json',
|
||||||
|
},
|
||||||
|
timeout: 10000,
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(' Status: ' + response.status);
|
||||||
|
const data = response.data;
|
||||||
|
|
||||||
|
if (Array.isArray(data)) {
|
||||||
|
console.log(' Array length: ' + data.length);
|
||||||
|
if (data.length > 0) {
|
||||||
|
console.log(' First item keys: ' + Object.keys(data[0]).join(', '));
|
||||||
|
console.log(' Sample: ' + JSON.stringify(data[0]).slice(0, 300));
|
||||||
|
}
|
||||||
|
} else if (typeof data === 'object') {
|
||||||
|
console.log(' Keys: ' + Object.keys(data).join(', '));
|
||||||
|
console.log(' Sample: ' + JSON.stringify(data).slice(0, 500));
|
||||||
|
}
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
} catch (err: any) {
|
||||||
|
console.log(' Error: ' + (err.response?.status || err.message));
|
||||||
|
if (err.response?.data) {
|
||||||
|
console.log(' Data: ' + JSON.stringify(err.response.data).slice(0, 200));
|
||||||
|
}
|
||||||
|
console.log('');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
166
backend/scripts/test-treez-inventory.ts
Normal file
166
backend/scripts/test-treez-inventory.ts
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
// Go to a product detail page
|
||||||
|
await page.goto('https://shop.bestdispensary.com/brand/dime', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get first product URL
|
||||||
|
const productUrl = await page.evaluate(() => {
|
||||||
|
const a = document.querySelector('a[href*="/product/"]');
|
||||||
|
return a ? 'https://shop.bestdispensary.com' + a.getAttribute('href') : null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!productUrl) {
|
||||||
|
console.log('No product found');
|
||||||
|
await browser.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Checking product: ' + productUrl + '\n');
|
||||||
|
await page.goto(productUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
// Look for inventory/stock info
|
||||||
|
const inventoryData = await page.evaluate(() => {
|
||||||
|
const data: any = {};
|
||||||
|
|
||||||
|
// Check for stock/inventory elements
|
||||||
|
const stockSelectors = [
|
||||||
|
'[class*="stock"]',
|
||||||
|
'[class*="Stock"]',
|
||||||
|
'[class*="inventory"]',
|
||||||
|
'[class*="Inventory"]',
|
||||||
|
'[class*="quantity"]',
|
||||||
|
'[class*="Quantity"]',
|
||||||
|
'[class*="available"]',
|
||||||
|
'[class*="Available"]',
|
||||||
|
'[class*="in-stock"]',
|
||||||
|
'[class*="out-of-stock"]',
|
||||||
|
'[data-stock]',
|
||||||
|
'[data-quantity]',
|
||||||
|
'[data-inventory]',
|
||||||
|
];
|
||||||
|
|
||||||
|
data.stockElements = [];
|
||||||
|
stockSelectors.forEach(sel => {
|
||||||
|
document.querySelectorAll(sel).forEach(el => {
|
||||||
|
data.stockElements.push({
|
||||||
|
selector: sel,
|
||||||
|
text: el.textContent?.trim().slice(0, 100),
|
||||||
|
dataAttrs: Object.keys((el as HTMLElement).dataset || {}),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check for "Add to cart" button state (disabled = out of stock)
|
||||||
|
const addToCartBtn = document.querySelector('button[class*="add"], button[class*="cart"]');
|
||||||
|
data.addToCartBtn = {
|
||||||
|
found: !!addToCartBtn,
|
||||||
|
disabled: (addToCartBtn as HTMLButtonElement)?.disabled,
|
||||||
|
text: addToCartBtn?.textContent?.trim(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check page source for inventory keywords
|
||||||
|
const bodyText = document.body.innerText;
|
||||||
|
data.hasStockText = bodyText.includes('stock') || bodyText.includes('Stock');
|
||||||
|
data.hasInventoryText = bodyText.includes('inventory') || bodyText.includes('Inventory');
|
||||||
|
data.hasQuantityText = bodyText.includes('quantity') || bodyText.includes('Quantity');
|
||||||
|
data.hasAvailableText = bodyText.includes('available') || bodyText.includes('Available');
|
||||||
|
|
||||||
|
// Get all data attributes on the page
|
||||||
|
data.allDataAttrs = [];
|
||||||
|
document.querySelectorAll('[data-product-id], [data-sku], [data-variant]').forEach(el => {
|
||||||
|
const attrs: any = {};
|
||||||
|
Object.entries((el as HTMLElement).dataset).forEach(([k, v]) => {
|
||||||
|
attrs[k] = v;
|
||||||
|
});
|
||||||
|
if (Object.keys(attrs).length > 0) {
|
||||||
|
data.allDataAttrs.push(attrs);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check for JSON-LD or schema data
|
||||||
|
const scripts = document.querySelectorAll('script[type="application/ld+json"]');
|
||||||
|
data.jsonLd = [];
|
||||||
|
scripts.forEach(s => {
|
||||||
|
try {
|
||||||
|
const json = JSON.parse(s.textContent || '');
|
||||||
|
data.jsonLd.push(json);
|
||||||
|
} catch {}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Check Next.js data
|
||||||
|
const nextData = document.getElementById('__NEXT_DATA__');
|
||||||
|
if (nextData) {
|
||||||
|
try {
|
||||||
|
const json = JSON.parse(nextData.textContent || '');
|
||||||
|
data.hasNextData = true;
|
||||||
|
data.nextDataKeys = Object.keys(json);
|
||||||
|
// Look for product data in props
|
||||||
|
if (json.props?.pageProps?.product) {
|
||||||
|
data.productFromNext = json.props.pageProps.product;
|
||||||
|
}
|
||||||
|
if (json.props?.pageProps) {
|
||||||
|
data.pagePropsKeys = Object.keys(json.props.pageProps);
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
|
||||||
|
return data;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Inventory Analysis:\n');
|
||||||
|
console.log('Stock elements found: ' + inventoryData.stockElements.length);
|
||||||
|
inventoryData.stockElements.forEach((s: any) => {
|
||||||
|
console.log(' - ' + s.selector + ': "' + s.text + '"');
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\nAdd to Cart button: ' + JSON.stringify(inventoryData.addToCartBtn));
|
||||||
|
|
||||||
|
console.log('\nText checks:');
|
||||||
|
console.log(' Has "stock": ' + inventoryData.hasStockText);
|
||||||
|
console.log(' Has "inventory": ' + inventoryData.hasInventoryText);
|
||||||
|
console.log(' Has "quantity": ' + inventoryData.hasQuantityText);
|
||||||
|
console.log(' Has "available": ' + inventoryData.hasAvailableText);
|
||||||
|
|
||||||
|
console.log('\nData attributes: ' + JSON.stringify(inventoryData.allDataAttrs));
|
||||||
|
console.log('\nJSON-LD: ' + JSON.stringify(inventoryData.jsonLd, null, 2));
|
||||||
|
|
||||||
|
if (inventoryData.hasNextData) {
|
||||||
|
console.log('\nNext.js data found!');
|
||||||
|
console.log(' Keys: ' + inventoryData.nextDataKeys);
|
||||||
|
console.log(' Page props keys: ' + inventoryData.pagePropsKeys);
|
||||||
|
if (inventoryData.productFromNext) {
|
||||||
|
console.log('\n Product data from Next.js:');
|
||||||
|
console.log(JSON.stringify(inventoryData.productFromNext, null, 2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
211
backend/scripts/test-treez-load-all.ts
Normal file
211
backend/scripts/test-treez-load-all.ts
Normal file
@@ -0,0 +1,211 @@
|
|||||||
|
/**
|
||||||
|
* Find and interact with "load more brands" selector
|
||||||
|
*/
|
||||||
|
|
||||||
|
import puppeteer, { Page } from 'puppeteer';
|
||||||
|
|
||||||
|
const STORE_ID = 'best';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function bypassAgeGate(page: Page): Promise<void> {
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log(' Age gate detected, bypassing...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Finding "Load More Brands" control');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
// Don't block stylesheets - might affect layout
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (req) => {
|
||||||
|
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||||
|
req.abort();
|
||||||
|
} else {
|
||||||
|
req.continue();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||||
|
console.log(`\nNavigating to ${url}`);
|
||||||
|
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||||
|
await sleep(3000);
|
||||||
|
await bypassAgeGate(page);
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
// Find all selects and dropdowns
|
||||||
|
console.log('\n[1] Looking for select elements...');
|
||||||
|
|
||||||
|
const selectInfo = await page.evaluate(() => {
|
||||||
|
const results: any[] = [];
|
||||||
|
|
||||||
|
// Native select elements
|
||||||
|
document.querySelectorAll('select').forEach((sel, i) => {
|
||||||
|
const options = Array.from(sel.options).map(o => ({ value: o.value, text: o.text }));
|
||||||
|
results.push({
|
||||||
|
type: 'select',
|
||||||
|
id: sel.id || `select-${i}`,
|
||||||
|
class: sel.className,
|
||||||
|
options: options.slice(0, 10),
|
||||||
|
totalOptions: sel.options.length,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Native selects found:', JSON.stringify(selectInfo, null, 2));
|
||||||
|
|
||||||
|
// Look for custom dropdown buttons
|
||||||
|
console.log('\n[2] Looking for dropdown/button elements...');
|
||||||
|
|
||||||
|
const dropdownInfo = await page.evaluate(() => {
|
||||||
|
const results: any[] = [];
|
||||||
|
|
||||||
|
// Look for common dropdown patterns
|
||||||
|
const selectors = [
|
||||||
|
'[class*="dropdown"]',
|
||||||
|
'[class*="Dropdown"]',
|
||||||
|
'[class*="select"]',
|
||||||
|
'[class*="Select"]',
|
||||||
|
'[class*="picker"]',
|
||||||
|
'[class*="Picker"]',
|
||||||
|
'[role="listbox"]',
|
||||||
|
'[role="combobox"]',
|
||||||
|
'button[aria-haspopup]',
|
||||||
|
'[class*="brand"] button',
|
||||||
|
'[class*="Brand"] button',
|
||||||
|
'[class*="filter"]',
|
||||||
|
'[class*="Filter"]',
|
||||||
|
];
|
||||||
|
|
||||||
|
selectors.forEach(sel => {
|
||||||
|
document.querySelectorAll(sel).forEach((el, i) => {
|
||||||
|
const text = el.textContent?.trim().slice(0, 100) || '';
|
||||||
|
const className = el.className?.toString?.().slice(0, 100) || '';
|
||||||
|
if (text.toLowerCase().includes('brand') || text.toLowerCase().includes('more') || text.toLowerCase().includes('all')) {
|
||||||
|
results.push({
|
||||||
|
selector: sel,
|
||||||
|
tag: el.tagName,
|
||||||
|
class: className,
|
||||||
|
text: text.slice(0, 50),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Dropdown-like elements:', JSON.stringify(dropdownInfo.slice(0, 10), null, 2));
|
||||||
|
|
||||||
|
// Look for any element containing "brand" text
|
||||||
|
console.log('\n[3] Looking for elements with "brand" or "more" text...');
|
||||||
|
|
||||||
|
const brandTextElements = await page.evaluate(() => {
|
||||||
|
const results: any[] = [];
|
||||||
|
const textContent = ['brand', 'more', 'load', 'view all', 'show all'];
|
||||||
|
|
||||||
|
document.querySelectorAll('button, a, [role="button"], select, [class*="select"]').forEach(el => {
|
||||||
|
const text = el.textContent?.toLowerCase() || '';
|
||||||
|
if (textContent.some(t => text.includes(t))) {
|
||||||
|
results.push({
|
||||||
|
tag: el.tagName,
|
||||||
|
class: el.className?.toString?.().slice(0, 80) || '',
|
||||||
|
text: el.textContent?.trim().slice(0, 100) || '',
|
||||||
|
href: el.getAttribute('href') || '',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Elements with brand/more text:', JSON.stringify(brandTextElements.slice(0, 15), null, 2));
|
||||||
|
|
||||||
|
// Count current brand sections
|
||||||
|
console.log('\n[4] Counting brand sections...');
|
||||||
|
|
||||||
|
const brandSections = await page.evaluate(() => {
|
||||||
|
// Look for brand section headers or containers
|
||||||
|
const sections: { title: string; productCount: number }[] = [];
|
||||||
|
|
||||||
|
document.querySelectorAll('[class*="products_product__section"]').forEach(section => {
|
||||||
|
const header = section.querySelector('h2, h3, [class*="heading"]');
|
||||||
|
const title = header?.textContent?.trim() || 'Unknown';
|
||||||
|
const products = section.querySelectorAll('a[class*="product_product__"]');
|
||||||
|
sections.push({ title, productCount: products.length });
|
||||||
|
});
|
||||||
|
|
||||||
|
return sections;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Found ${brandSections.length} brand sections:`);
|
||||||
|
brandSections.slice(0, 20).forEach(s => console.log(` - ${s.title}: ${s.productCount} products`));
|
||||||
|
|
||||||
|
// Take a screenshot
|
||||||
|
await page.screenshot({ path: '/tmp/treez-brands-full.png', fullPage: true });
|
||||||
|
console.log('\n[5] Full page screenshot saved to /tmp/treez-brands-full.png');
|
||||||
|
|
||||||
|
// Try scrolling to bottom to trigger any lazy loading
|
||||||
|
console.log('\n[6] Scrolling to load more content...');
|
||||||
|
|
||||||
|
let previousHeight = 0;
|
||||||
|
for (let i = 0; i < 20; i++) {
|
||||||
|
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||||
|
await sleep(1500);
|
||||||
|
|
||||||
|
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||||
|
const sectionCount = await page.evaluate(() =>
|
||||||
|
document.querySelectorAll('[class*="products_product__section"]').length
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` Scroll ${i + 1}: height=${currentHeight}, sections=${sectionCount}`);
|
||||||
|
|
||||||
|
if (currentHeight === previousHeight) {
|
||||||
|
console.log(' No new content, stopping');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
previousHeight = currentHeight;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final count
|
||||||
|
const finalSections = await page.evaluate(() => {
|
||||||
|
const sections: { title: string; productCount: number }[] = [];
|
||||||
|
document.querySelectorAll('[class*="products_product__section"]').forEach(section => {
|
||||||
|
const header = section.querySelector('h2, h3, [class*="heading"]');
|
||||||
|
const title = header?.textContent?.trim() || 'Unknown';
|
||||||
|
const products = section.querySelectorAll('a[class*="product_product__"]');
|
||||||
|
sections.push({ title, productCount: products.length });
|
||||||
|
});
|
||||||
|
return sections;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`\n[7] After scrolling: ${finalSections.length} brand sections`);
|
||||||
|
finalSections.forEach(s => console.log(` - ${s.title}: ${s.productCount} products`));
|
||||||
|
|
||||||
|
const totalProducts = finalSections.reduce((sum, s) => sum + s.productCount, 0);
|
||||||
|
console.log(`\nTotal products across all sections: ${totalProducts}`);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
104
backend/scripts/test-treez-network.ts
Normal file
104
backend/scripts/test-treez-network.ts
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
// Capture all network requests
|
||||||
|
const requests: any[] = [];
|
||||||
|
const responses: any[] = [];
|
||||||
|
|
||||||
|
page.on('request', (req) => {
|
||||||
|
const url = req.url();
|
||||||
|
if (url.includes('api') || url.includes('graphql') ||
|
||||||
|
url.includes('product') || url.includes('menu') ||
|
||||||
|
url.includes('treez') || url.includes('inventory')) {
|
||||||
|
requests.push({
|
||||||
|
url: url.slice(0, 150),
|
||||||
|
method: req.method(),
|
||||||
|
headers: req.headers(),
|
||||||
|
postData: req.postData()?.slice(0, 500),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
page.on('response', async (res) => {
|
||||||
|
const url = res.url();
|
||||||
|
if (url.includes('api') || url.includes('graphql') ||
|
||||||
|
url.includes('product') || url.includes('menu') ||
|
||||||
|
url.includes('inventory')) {
|
||||||
|
try {
|
||||||
|
const contentType = res.headers()['content-type'] || '';
|
||||||
|
if (contentType.includes('json')) {
|
||||||
|
const body = await res.text();
|
||||||
|
responses.push({
|
||||||
|
url: url.slice(0, 150),
|
||||||
|
status: res.status(),
|
||||||
|
bodyPreview: body.slice(0, 1000),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Loading page and capturing network requests...\n');
|
||||||
|
|
||||||
|
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Click load more to trigger more API calls
|
||||||
|
for (let i = 0; i < 3; i++) {
|
||||||
|
const btn = await page.$('button.collection__load-more');
|
||||||
|
if (btn) {
|
||||||
|
await btn.click();
|
||||||
|
await sleep(2000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also visit a product page
|
||||||
|
console.log('\nVisiting a product page...\n');
|
||||||
|
await page.goto('https://shop.bestdispensary.com/product/dime-sour-grapes-2g-disposable-cartridge-2-grams', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 30000
|
||||||
|
});
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
console.log('=== API REQUESTS FOUND ===\n');
|
||||||
|
requests.forEach((r, i) => {
|
||||||
|
console.log((i+1) + '. ' + r.method + ' ' + r.url);
|
||||||
|
if (r.postData) {
|
||||||
|
console.log(' POST data: ' + r.postData);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('\n=== JSON RESPONSES ===\n');
|
||||||
|
responses.forEach((r, i) => {
|
||||||
|
console.log((i+1) + '. ' + r.url);
|
||||||
|
console.log(' Status: ' + r.status);
|
||||||
|
console.log(' Body: ' + r.bodyPreview.slice(0, 300) + '...\n');
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
110
backend/scripts/test-treez-page-fetch.ts
Normal file
110
backend/scripts/test-treez-page-fetch.ts
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
import fs from 'fs';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
console.log('Loading page first to establish session...\n');
|
||||||
|
|
||||||
|
await page.goto('https://shop.bestdispensary.com/shop', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log('Bypassing age gate...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(3000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for page to fully load
|
||||||
|
await sleep(2000);
|
||||||
|
|
||||||
|
console.log('\nMaking fetch request from page context...\n');
|
||||||
|
|
||||||
|
// Try to make the ES request from within page context
|
||||||
|
const result = await page.evaluate(async () => {
|
||||||
|
const url = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
|
||||||
|
const apiKey = 'V3jHL9dFzi3Gj4UISM4lr38Nm0GSxcps5OBz1PbS';
|
||||||
|
|
||||||
|
const query = {
|
||||||
|
from: 0,
|
||||||
|
size: 1000,
|
||||||
|
query: {
|
||||||
|
bool: {
|
||||||
|
must: [
|
||||||
|
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
|
||||||
|
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
|
||||||
|
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': apiKey,
|
||||||
|
},
|
||||||
|
body: JSON.stringify(query),
|
||||||
|
credentials: 'include',
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
return { error: 'HTTP ' + response.status, statusText: response.statusText };
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
return {
|
||||||
|
total: data.hits?.total?.value,
|
||||||
|
count: data.hits?.hits?.length,
|
||||||
|
firstProduct: data.hits?.hits?.[0]?._source,
|
||||||
|
products: data.hits?.hits?.map((h: any) => h._source),
|
||||||
|
};
|
||||||
|
} catch (err: any) {
|
||||||
|
return { error: err.message };
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (result.error) {
|
||||||
|
console.log('Error: ' + result.error);
|
||||||
|
if (result.statusText) console.log('Status: ' + result.statusText);
|
||||||
|
} else {
|
||||||
|
console.log('Total products in ES: ' + result.total);
|
||||||
|
console.log('Products returned: ' + result.count);
|
||||||
|
|
||||||
|
if (result.firstProduct) {
|
||||||
|
console.log('\n=== PRODUCT FIELDS ===\n');
|
||||||
|
console.log(Object.keys(result.firstProduct).sort().join('\n'));
|
||||||
|
|
||||||
|
console.log('\n=== SAMPLE PRODUCT ===\n');
|
||||||
|
console.log(JSON.stringify(result.firstProduct, null, 2));
|
||||||
|
|
||||||
|
// Save all products
|
||||||
|
if (result.products) {
|
||||||
|
fs.writeFileSync('/tmp/treez-all-products.json', JSON.stringify(result.products, null, 2));
|
||||||
|
console.log('\nSaved ' + result.products.length + ' products to /tmp/treez-all-products.json');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
171
backend/scripts/test-treez-page-state.ts
Normal file
171
backend/scripts/test-treez-page-state.ts
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
import puppeteer from 'puppeteer';
|
||||||
|
import fs from 'fs';
|
||||||
|
|
||||||
|
async function sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true,
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 });
|
||||||
|
|
||||||
|
console.log('Loading page...\n');
|
||||||
|
|
||||||
|
await page.goto('https://shop.bestdispensary.com/shop', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000
|
||||||
|
});
|
||||||
|
await sleep(3000);
|
||||||
|
|
||||||
|
// Bypass age gate
|
||||||
|
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||||
|
if (ageGate) {
|
||||||
|
console.log('Bypassing age gate...');
|
||||||
|
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||||
|
if (btn) await btn.click();
|
||||||
|
await sleep(3000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract __NEXT_DATA__
|
||||||
|
console.log('\n=== NEXT.JS DATA ===\n');
|
||||||
|
|
||||||
|
const nextData = await page.evaluate(() => {
|
||||||
|
const script = document.getElementById('__NEXT_DATA__');
|
||||||
|
if (script) {
|
||||||
|
try {
|
||||||
|
return JSON.parse(script.textContent || '');
|
||||||
|
} catch { return null; }
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (nextData) {
|
||||||
|
console.log('Top keys: ' + Object.keys(nextData).join(', '));
|
||||||
|
if (nextData.props?.pageProps) {
|
||||||
|
console.log('pageProps keys: ' + Object.keys(nextData.props.pageProps).join(', '));
|
||||||
|
|
||||||
|
// Look for products
|
||||||
|
const pp = nextData.props.pageProps;
|
||||||
|
if (pp.products) {
|
||||||
|
console.log('\nFound products: ' + pp.products.length);
|
||||||
|
if (pp.products[0]) {
|
||||||
|
console.log('Product fields: ' + Object.keys(pp.products[0]).join(', '));
|
||||||
|
console.log('\nSample:\n' + JSON.stringify(pp.products[0], null, 2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (pp.initialProducts) {
|
||||||
|
console.log('\nFound initialProducts: ' + pp.initialProducts.length);
|
||||||
|
}
|
||||||
|
if (pp.data) {
|
||||||
|
console.log('\nFound data: ' + (Array.isArray(pp.data) ? pp.data.length + ' items' : typeof pp.data));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also check window object
|
||||||
|
console.log('\n=== WINDOW GLOBALS ===\n');
|
||||||
|
|
||||||
|
const windowData = await page.evaluate(() => {
|
||||||
|
const win = window as any;
|
||||||
|
const result: any = {};
|
||||||
|
|
||||||
|
// Common patterns for storing product data
|
||||||
|
const patterns = ['products', 'items', 'data', 'state', 'store', 'redux', 'apollo'];
|
||||||
|
|
||||||
|
Object.keys(win).forEach(key => {
|
||||||
|
const lowerKey = key.toLowerCase();
|
||||||
|
if (patterns.some(p => lowerKey.includes(p))) {
|
||||||
|
try {
|
||||||
|
const val = win[key];
|
||||||
|
if (typeof val === 'object' && val !== null) {
|
||||||
|
result[key] = {
|
||||||
|
type: Array.isArray(val) ? 'array' : 'object',
|
||||||
|
keys: Object.keys(val).slice(0, 10),
|
||||||
|
length: Array.isArray(val) ? val.length : undefined,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Window globals with data-like names:');
|
||||||
|
Object.entries(windowData).forEach(([k, v]: [string, any]) => {
|
||||||
|
console.log(' ' + k + ': ' + v.type + (v.length ? ' (' + v.length + ')' : '') + ' - keys: ' + v.keys?.join(', '));
|
||||||
|
});
|
||||||
|
|
||||||
|
// Try to find React state
|
||||||
|
console.log('\n=== EXTRACTING FROM DOM ===\n');
|
||||||
|
|
||||||
|
const domProducts = await page.evaluate(() => {
|
||||||
|
const products: any[] = [];
|
||||||
|
|
||||||
|
document.querySelectorAll('a[href*="/product/"]').forEach((card: Element) => {
|
||||||
|
const product: any = {};
|
||||||
|
|
||||||
|
product.href = card.getAttribute('href');
|
||||||
|
product.name = card.querySelector('h3, h4, h5')?.textContent?.trim();
|
||||||
|
|
||||||
|
// Get all text
|
||||||
|
const allText = card.textContent || '';
|
||||||
|
|
||||||
|
// Extract THC %
|
||||||
|
const thcMatch = allText.match(/(\d+(?:\.\d+)?)\s*%/);
|
||||||
|
if (thcMatch) product.thc = thcMatch[1];
|
||||||
|
|
||||||
|
// Extract price
|
||||||
|
const priceMatch = allText.match(/\$(\d+(?:\.\d+)?)/);
|
||||||
|
if (priceMatch) product.price = priceMatch[1];
|
||||||
|
|
||||||
|
// Extract weight
|
||||||
|
const weightMatch = allText.match(/(\d+(?:\.\d+)?)\s*[gG]/);
|
||||||
|
if (weightMatch) product.weight = weightMatch[1] + 'g';
|
||||||
|
|
||||||
|
// Get brand from card
|
||||||
|
const brandEl = card.querySelector('[class*="brand"]');
|
||||||
|
product.brand = brandEl?.textContent?.trim();
|
||||||
|
|
||||||
|
// Get strain type
|
||||||
|
const strainTypes = ['Indica', 'Sativa', 'Hybrid', 'I/S', 'S/I', 'CBD'];
|
||||||
|
strainTypes.forEach(st => {
|
||||||
|
if (allText.includes(st)) product.strainType = st;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get image
|
||||||
|
const img = card.querySelector('img');
|
||||||
|
product.image = img?.getAttribute('src');
|
||||||
|
|
||||||
|
products.push(product);
|
||||||
|
});
|
||||||
|
|
||||||
|
return products;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Products from DOM: ' + domProducts.length);
|
||||||
|
if (domProducts.length > 0) {
|
||||||
|
console.log('\nSample:\n' + JSON.stringify(domProducts[0], null, 2));
|
||||||
|
|
||||||
|
// Show variety
|
||||||
|
console.log('\n=== DATA QUALITY ===');
|
||||||
|
const withThc = domProducts.filter(p => p.thc).length;
|
||||||
|
const withPrice = domProducts.filter(p => p.price).length;
|
||||||
|
const withBrand = domProducts.filter(p => p.brand).length;
|
||||||
|
const withStrain = domProducts.filter(p => p.strainType).length;
|
||||||
|
|
||||||
|
console.log('With THC%: ' + withThc + '/' + domProducts.length);
|
||||||
|
console.log('With Price: ' + withPrice + '/' + domProducts.length);
|
||||||
|
console.log('With Brand: ' + withBrand + '/' + domProducts.length);
|
||||||
|
console.log('With Strain: ' + withStrain + '/' + domProducts.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user