Compare commits
2 Commits
master
...
fix/api-se
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2513e22171 | ||
|
|
e17b3b225a |
7
.gitignore
vendored
7
.gitignore
vendored
@@ -51,10 +51,3 @@ coverage/
|
||||
*.tmp
|
||||
*.temp
|
||||
llm-scraper/
|
||||
|
||||
# Claude Code
|
||||
.claude/
|
||||
|
||||
# Test/debug scripts
|
||||
backend/scripts/test-*.ts
|
||||
backend/scripts/run-*.ts
|
||||
|
||||
181
.woodpecker.yml
181
.woodpecker.yml
@@ -3,7 +3,7 @@ steps:
|
||||
# PR VALIDATION: Parallel type checks (PRs only)
|
||||
# ===========================================
|
||||
typecheck-backend:
|
||||
image: node:22
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- cd backend
|
||||
- npm ci --prefer-offline
|
||||
@@ -13,7 +13,7 @@ steps:
|
||||
event: pull_request
|
||||
|
||||
typecheck-cannaiq:
|
||||
image: node:22
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- cd cannaiq
|
||||
- npm ci --prefer-offline
|
||||
@@ -23,7 +23,7 @@ steps:
|
||||
event: pull_request
|
||||
|
||||
typecheck-findadispo:
|
||||
image: node:22
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- cd findadispo/frontend
|
||||
- npm ci --prefer-offline
|
||||
@@ -33,7 +33,7 @@ steps:
|
||||
event: pull_request
|
||||
|
||||
typecheck-findagram:
|
||||
image: node:22
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- cd findagram/frontend
|
||||
- npm ci --prefer-offline
|
||||
@@ -58,7 +58,7 @@ steps:
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"Do":"merge"}' \
|
||||
"https://git.spdy.io/api/v1/repos/Creationshop/cannaiq/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||
depends_on:
|
||||
- typecheck-backend
|
||||
- typecheck-cannaiq
|
||||
@@ -68,122 +68,127 @@ steps:
|
||||
event: pull_request
|
||||
|
||||
# ===========================================
|
||||
# DOCKER: Multi-stage builds with layer caching
|
||||
# MASTER DEPLOY: Parallel Docker builds
|
||||
# NOTE: cache_from/cache_to removed due to plugin bug splitting on commas
|
||||
# ===========================================
|
||||
docker-backend:
|
||||
image: gcr.io/kaniko-project/executor:debug
|
||||
commands:
|
||||
- /kaniko/executor
|
||||
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/backend
|
||||
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/backend/Dockerfile
|
||||
--destination=registry.spdy.io/cannaiq/backend:latest
|
||||
--destination=registry.spdy.io/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8}
|
||||
--build-arg=APP_BUILD_VERSION=sha-${CI_COMMIT_SHA:0:8}
|
||||
--build-arg=APP_GIT_SHA=${CI_COMMIT_SHA}
|
||||
--build-arg=APP_BUILD_TIME=${CI_PIPELINE_CREATED}
|
||||
--cache=true
|
||||
--cache-repo=registry.spdy.io/cannaiq/cache-backend
|
||||
--cache-ttl=168h
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: backend/Dockerfile
|
||||
context: backend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
build_args:
|
||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
||||
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
||||
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
||||
depends_on: []
|
||||
when:
|
||||
branch: [master, develop]
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-cannaiq:
|
||||
image: gcr.io/kaniko-project/executor:debug
|
||||
commands:
|
||||
- /kaniko/executor
|
||||
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/cannaiq
|
||||
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/cannaiq/Dockerfile
|
||||
--destination=registry.spdy.io/cannaiq/frontend:latest
|
||||
--destination=registry.spdy.io/cannaiq/frontend:sha-${CI_COMMIT_SHA:0:8}
|
||||
--cache=true
|
||||
--cache-repo=registry.spdy.io/cannaiq/cache-cannaiq
|
||||
--cache-ttl=168h
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: cannaiq/Dockerfile
|
||||
context: cannaiq
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
depends_on: []
|
||||
when:
|
||||
branch: [master, develop]
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-findadispo:
|
||||
image: gcr.io/kaniko-project/executor:debug
|
||||
commands:
|
||||
- /kaniko/executor
|
||||
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findadispo/frontend
|
||||
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findadispo/frontend/Dockerfile
|
||||
--destination=registry.spdy.io/cannaiq/findadispo:latest
|
||||
--destination=registry.spdy.io/cannaiq/findadispo:sha-${CI_COMMIT_SHA:0:8}
|
||||
--cache=true
|
||||
--cache-repo=registry.spdy.io/cannaiq/cache-findadispo
|
||||
--cache-ttl=168h
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: findadispo/frontend/Dockerfile
|
||||
context: findadispo/frontend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
depends_on: []
|
||||
when:
|
||||
branch: [master, develop]
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-findagram:
|
||||
image: gcr.io/kaniko-project/executor:debug
|
||||
commands:
|
||||
- /kaniko/executor
|
||||
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findagram/frontend
|
||||
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findagram/frontend/Dockerfile
|
||||
--destination=registry.spdy.io/cannaiq/findagram:latest
|
||||
--destination=registry.spdy.io/cannaiq/findagram:sha-${CI_COMMIT_SHA:0:8}
|
||||
--cache=true
|
||||
--cache-repo=registry.spdy.io/cannaiq/cache-findagram
|
||||
--cache-ttl=168h
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/findagram-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: findagram/frontend/Dockerfile
|
||||
context: findagram/frontend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
depends_on: []
|
||||
when:
|
||||
branch: [master, develop]
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
# ===========================================
|
||||
# DEPLOY: Pull from local registry
|
||||
# STAGE 3: Deploy and Run Migrations
|
||||
# ===========================================
|
||||
deploy:
|
||||
image: bitnami/kubectl:latest
|
||||
environment:
|
||||
K8S_TOKEN:
|
||||
from_secret: k8s_token
|
||||
KUBECONFIG_CONTENT:
|
||||
from_secret: kubeconfig_data
|
||||
commands:
|
||||
- mkdir -p ~/.kube
|
||||
- |
|
||||
cat > ~/.kube/config << KUBEEOF
|
||||
apiVersion: v1
|
||||
kind: Config
|
||||
clusters:
|
||||
- cluster:
|
||||
certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkakNDQVIyZ0F3SUJBZ0lCQURBS0JnZ3Foa2pPUFFRREFqQWpNU0V3SHdZRFZRUUREQmhyTTNNdGMyVnkKZG1WeUxXTmhRREUzTmpVM05UUTNPRE13SGhjTk1qVXhNakUwTWpNeU5qSXpXaGNOTXpVeE1qRXlNak15TmpJegpXakFqTVNFd0h3WURWUVFEREJock0zTXRjMlZ5ZG1WeUxXTmhRREUzTmpVM05UUTNPRE13V1RBVEJnY3Foa2pPClBRSUJCZ2dxaGtqT1BRTUJCd05DQUFRWDRNdFJRTW5lWVJVV0s2cjZ3VEV2WjAxNnV4T3NUR3JJZ013TXVnNGwKajQ1bHZ6ZkM1WE1NY1pESnUxZ0t1dVJhVGxlb0xVOVJnSERIUUI4TUwzNTJvMEl3UURBT0JnTlZIUThCQWY4RQpCQU1DQXFRd0R3WURWUjBUQVFIL0JBVXdBd0VCL3pBZEJnTlZIUTRFRmdRVXIzNDZpNE42TFhzaEZsREhvSlU0CjJ1RjZseGN3Q2dZSUtvWkl6ajBFQXdJRFJ3QXdSQUlnVUtqdWRFQWJyS1JDVHROVXZTc1Rmb3FEaHFSeDM5MkYKTFFSVWlKK0hCVElDSUJqOFIxbG1zSnFSRkRHMEpwMGN4OG5ZZnFCaElRQzh6WWdRdTdBZmR4L3IKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
|
||||
server: https://10.100.6.10:6443
|
||||
name: spdy-k3s
|
||||
contexts:
|
||||
- context:
|
||||
cluster: spdy-k3s
|
||||
namespace: cannaiq
|
||||
user: cannaiq-admin
|
||||
name: cannaiq
|
||||
current-context: cannaiq
|
||||
users:
|
||||
- name: cannaiq-admin
|
||||
user:
|
||||
token: $K8S_TOKEN
|
||||
KUBEEOF
|
||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||
- chmod 600 ~/.kube/config
|
||||
# Apply manifests to ensure probes and resource limits are set
|
||||
- kubectl apply -f /woodpecker/src/git.spdy.io/Creationshop/cannaiq/k8s/scraper.yaml
|
||||
- kubectl apply -f /woodpecker/src/git.spdy.io/Creationshop/cannaiq/k8s/scraper-worker.yaml
|
||||
- kubectl set image deployment/scraper scraper=registry.spdy.io/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||
- kubectl rollout status deployment/scraper -n cannaiq --timeout=300s
|
||||
- kubectl set image deployment/scraper-worker worker=registry.spdy.io/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=registry.spdy.io/cannaiq/frontend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=registry.spdy.io/cannaiq/findadispo:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||
- kubectl set image deployment/findagram-frontend findagram-frontend=registry.spdy.io/cannaiq/findagram:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||
- kubectl rollout status deployment/cannaiq-frontend -n cannaiq --timeout=300s
|
||||
# Deploy backend first
|
||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||
# Note: Migrations run automatically at startup via auto-migrate
|
||||
# Deploy remaining services
|
||||
# Resilience: ensure workers are scaled up if at 0
|
||||
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
|
||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||
depends_on:
|
||||
- docker-backend
|
||||
- docker-cannaiq
|
||||
- docker-findadispo
|
||||
- docker-findagram
|
||||
when:
|
||||
branch: [master, develop]
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
191
.woodpecker/ci.yml
Normal file
191
.woodpecker/ci.yml
Normal file
@@ -0,0 +1,191 @@
|
||||
steps:
|
||||
# ===========================================
|
||||
# PR VALIDATION: Only typecheck changed projects
|
||||
# ===========================================
|
||||
typecheck-backend:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- npm config set cache /npm-cache/backend --global
|
||||
- cd backend
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit
|
||||
volumes:
|
||||
- npm-cache:/npm-cache
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
path:
|
||||
include: ['backend/**']
|
||||
|
||||
typecheck-cannaiq:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- npm config set cache /npm-cache/cannaiq --global
|
||||
- cd cannaiq
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit
|
||||
volumes:
|
||||
- npm-cache:/npm-cache
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
path:
|
||||
include: ['cannaiq/**']
|
||||
|
||||
# findadispo/findagram typechecks skipped - they have || true anyway
|
||||
|
||||
# ===========================================
|
||||
# AUTO-MERGE: Merge PR after all checks pass
|
||||
# ===========================================
|
||||
auto-merge:
|
||||
image: alpine:latest
|
||||
environment:
|
||||
GITEA_TOKEN:
|
||||
from_secret: gitea_token
|
||||
commands:
|
||||
- apk add --no-cache curl
|
||||
- |
|
||||
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||
curl -s -X POST \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"Do":"merge"}' \
|
||||
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||
depends_on:
|
||||
- typecheck-backend
|
||||
- typecheck-cannaiq
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
# ===========================================
|
||||
# MASTER DEPLOY: Parallel Docker builds
|
||||
# ===========================================
|
||||
docker-backend:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: backend/Dockerfile
|
||||
context: backend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
|
||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
|
||||
build_args:
|
||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
||||
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
||||
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-cannaiq:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: cannaiq/Dockerfile
|
||||
context: cannaiq
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
|
||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-findadispo:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: findadispo/frontend/Dockerfile
|
||||
context: findadispo/frontend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
|
||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-findagram:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/findagram-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: findagram/frontend/Dockerfile
|
||||
context: findagram/frontend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
|
||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
# ===========================================
|
||||
# STAGE 3: Deploy and Run Migrations
|
||||
# ===========================================
|
||||
deploy:
|
||||
image: bitnami/kubectl:latest
|
||||
environment:
|
||||
KUBECONFIG_CONTENT:
|
||||
from_secret: kubeconfig_data
|
||||
commands:
|
||||
- mkdir -p ~/.kube
|
||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||
- chmod 600 ~/.kube/config
|
||||
# Deploy backend first
|
||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||
# Note: Migrations run automatically at startup via auto-migrate
|
||||
# Deploy remaining services
|
||||
# Resilience: ensure workers are scaled up if at 0
|
||||
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
|
||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||
depends_on:
|
||||
- docker-backend
|
||||
- docker-cannaiq
|
||||
- docker-findadispo
|
||||
- docker-findagram
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
300
CLAUDE.md
300
CLAUDE.md
@@ -1,8 +1,5 @@
|
||||
# Claude Guidelines for CannaiQ
|
||||
|
||||
## CURRENT ENVIRONMENT: PRODUCTION
|
||||
**We are working in PRODUCTION only.** All database queries and API calls should target the remote production environment, not localhost. Use kubectl port-forward or remote DB connections as needed.
|
||||
|
||||
## PERMANENT RULES (NEVER VIOLATE)
|
||||
|
||||
### 1. NO DELETE
|
||||
@@ -20,72 +17,6 @@ Never deploy unless user explicitly says: "CLAUDE — DEPLOYMENT IS NOW AUTHORIZ
|
||||
### 5. DB POOL ONLY
|
||||
Never import `src/db/migrate.ts` at runtime. Use `src/db/pool.ts` for DB access.
|
||||
|
||||
### 6. CI/CD DEPLOYMENT — BATCH CHANGES, PUSH ONCE
|
||||
**Never manually deploy or check deployment status.** The project uses Woodpecker CI.
|
||||
|
||||
**CRITICAL: Each CI build takes 30 minutes. NEVER push incrementally.**
|
||||
|
||||
**Workflow:**
|
||||
1. Make ALL related code changes first
|
||||
2. Test locally if possible (./setup-local.sh)
|
||||
3. ONE commit with all changes
|
||||
4. ONE push to master
|
||||
5. **STOP** - CI handles the rest
|
||||
6. Wait for user to confirm deployment worked
|
||||
|
||||
**DO NOT:**
|
||||
- Push multiple small commits (each triggers 30-min build)
|
||||
- Run `kubectl rollout status` to check deployment
|
||||
- Run `kubectl logs` to verify new code is running
|
||||
- Manually restart pods
|
||||
- Check CI pipeline status
|
||||
|
||||
Batch everything, push once, wait for user feedback.
|
||||
|
||||
### 7. K8S — DEPLOY AND FORGET
|
||||
**DO NOT run kubectl commands.** The system is self-managing.
|
||||
|
||||
**Operational Model:**
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ DEPLOY ONCE → WORKERS RUN FOREVER → CREATE TASKS ONLY │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
|
||||
1. CI deploys code changes (automatic on push)
|
||||
2. K8s maintains 8 pods (self-healing)
|
||||
3. Workers poll DB for tasks (autonomous)
|
||||
4. Create tasks via API or DB → workers pick them up
|
||||
5. Never touch K8s directly
|
||||
```
|
||||
|
||||
**Fixed Configuration (NEVER CHANGE):**
|
||||
- **8 replicas** — locked in `k8s/scraper-worker.yaml`
|
||||
- **MAX_CONCURRENT_TASKS=3** — 3 browsers per pod (memory safe)
|
||||
- **Total capacity:** 8 pods × 3 = 24 concurrent tasks
|
||||
|
||||
**DO NOT:**
|
||||
- Run `kubectl` commands (scale, rollout, logs, get pods, etc.)
|
||||
- Manually restart pods
|
||||
- Change replica count
|
||||
- Check deployment status
|
||||
|
||||
**To interact with the system:**
|
||||
- Create tasks in DB → workers pick them up automatically
|
||||
- Check task status via DB queries or API
|
||||
- View worker status via dashboard (cannaiq.co)
|
||||
|
||||
**Why no kubectl?**
|
||||
- K8s auto-restarts crashed pods
|
||||
- Workers self-heal (reconnect to DB, retry failed tasks)
|
||||
- No manual intervention needed in steady state
|
||||
- Only CI touches K8s (on code deployments)
|
||||
|
||||
**Scaling Decision:**
|
||||
- Monitor pool drain rate via dashboard/DB queries
|
||||
- If pool drains too slowly, manually increase replicas in `k8s/scraper-worker.yaml`
|
||||
- Commit + push → CI deploys new replica count
|
||||
- No runtime kubectl scaling — all changes via code
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference
|
||||
@@ -272,216 +203,57 @@ All other browsers are filtered out. Uses `intoli/user-agents` library for reali
|
||||
|
||||
These binaries mimic real browser TLS fingerprints to avoid detection.
|
||||
|
||||
### Evomi Residential Proxy API
|
||||
|
||||
Workers use Evomi's residential proxy API for geo-targeted proxies on-demand.
|
||||
|
||||
**Priority Order**:
|
||||
1. Evomi API (if EVOMI_USER/EVOMI_PASS configured)
|
||||
2. DB proxies (fallback if Evomi not configured)
|
||||
|
||||
**Environment Variables**:
|
||||
| Variable | Description | Default |
|
||||
|----------|-------------|---------|
|
||||
| `EVOMI_USER` | API username | - |
|
||||
| `EVOMI_PASS` | API key | - |
|
||||
| `EVOMI_HOST` | Proxy host | `rpc.evomi.com` |
|
||||
| `EVOMI_PORT` | Proxy port | `1000` |
|
||||
|
||||
**K8s Secret**: Credentials stored in `scraper-secrets`:
|
||||
```bash
|
||||
kubectl get secret scraper-secrets -n cannaiq -o jsonpath='{.data.EVOMI_PASS}' | base64 -d
|
||||
```
|
||||
|
||||
**Proxy URL Format**: `http://{user}_{session}_{geo}:{pass}@{host}:{port}`
|
||||
- `session`: Worker ID for sticky sessions
|
||||
- `geo`: State code (e.g., `arizona`, `california`)
|
||||
|
||||
**Files**:
|
||||
- `src/services/crawl-rotator.ts` - `getEvomiConfig()`, `buildEvomiProxyUrl()`
|
||||
- `src/tasks/task-worker.ts` - Proxy initialization order
|
||||
|
||||
---
|
||||
|
||||
## Bulk Task Workflow (Updated 2025-12-13)
|
||||
## Worker Architecture (Kubernetes)
|
||||
|
||||
### Overview
|
||||
Tasks are created with `scheduled_for = NOW()` by default. Worker-level controls handle pacing - no task-level staggering needed.
|
||||
### Persistent Workers (StatefulSet)
|
||||
|
||||
### How It Works
|
||||
```
|
||||
1. Task created with scheduled_for = NOW()
|
||||
2. Worker claims task only when scheduled_for <= NOW()
|
||||
3. Worker runs preflight on EVERY task claim (proxy health check)
|
||||
4. If preflight passes, worker executes task
|
||||
5. If preflight fails, task released back to pending for another worker
|
||||
6. Worker finishes task, polls for next available task
|
||||
7. Repeat - preflight runs on each new task claim
|
||||
```
|
||||
Workers run as a **StatefulSet** with 8 persistent pods. They maintain identity across restarts.
|
||||
|
||||
### Worker-Level Throttling
|
||||
These controls pace task execution - no staggering at task creation time:
|
||||
**Pod Names**: `scraper-worker-0` through `scraper-worker-7`
|
||||
|
||||
| Control | Purpose |
|
||||
|---------|---------|
|
||||
| `MAX_CONCURRENT_TASKS` | Limits concurrent tasks per pod (default: 3) |
|
||||
| Working hours | Restricts when tasks run (configurable per schedule) |
|
||||
| Preflight checks | Ensures proxy health before each task |
|
||||
| Per-store locking | Only one active task per dispensary |
|
||||
**Key Properties**:
|
||||
- `updateStrategy: OnDelete` - Pods only update when manually deleted (no automatic restarts)
|
||||
- `podManagementPolicy: Parallel` - All pods start simultaneously
|
||||
- Workers register with their pod name as identity
|
||||
|
||||
### Key Points
|
||||
- **Preflight is per-task, not per-startup**: Each task claim triggers a new preflight check
|
||||
- **Worker controls pacing**: Tasks scheduled for NOW() but claimed based on worker capacity
|
||||
- **Optional staggering**: Pass `stagger_seconds > 0` if you need explicit delays
|
||||
**K8s Manifest**: `backend/k8s/scraper-worker-statefulset.yaml`
|
||||
|
||||
### API Endpoints
|
||||
```bash
|
||||
# Create bulk tasks for specific dispensary IDs
|
||||
POST /api/tasks/batch/staggered
|
||||
{
|
||||
"dispensary_ids": [1, 2, 3, 4],
|
||||
"role": "product_refresh", # or "product_discovery"
|
||||
"stagger_seconds": 0, # default: 0 (all NOW)
|
||||
"platform": "dutchie", # default: "dutchie"
|
||||
"method": null # "curl" | "http" | null
|
||||
}
|
||||
### Worker Lifecycle
|
||||
|
||||
# Create bulk tasks for all stores in a state
|
||||
POST /api/tasks/crawl-state/:stateCode
|
||||
{
|
||||
"stagger_seconds": 0, # default: 0 (all NOW)
|
||||
"method": "http" # default: "http"
|
||||
}
|
||||
```
|
||||
1. **Startup**: Worker registers in `worker_registry` table with pod name
|
||||
2. **Preflight**: Runs dual-transport preflights (curl + http), reports IPs and fingerprint
|
||||
3. **Task Loop**: Polls for tasks, executes them, reports status
|
||||
4. **Shutdown**: Graceful 60-second termination period
|
||||
|
||||
### Example: Tasks for AZ Stores
|
||||
```bash
|
||||
curl -X POST http://localhost:3010/api/tasks/crawl-state/AZ \
|
||||
-H "Content-Type: application/json"
|
||||
```
|
||||
### NEVER Restart Workers Unnecessarily
|
||||
|
||||
### Related Files
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `src/tasks/task-service.ts` | `createStaggeredTasks()` method |
|
||||
| `src/routes/tasks.ts` | API endpoints for batch task creation |
|
||||
| `src/tasks/task-worker.ts` | Worker task claiming and preflight logic |
|
||||
**Claude must NOT**:
|
||||
- Restart workers unless explicitly requested
|
||||
- Use `kubectl rollout restart` on workers
|
||||
- Use `kubectl set image` on workers (this triggers restart)
|
||||
|
||||
---
|
||||
**To update worker code** (only when user authorizes):
|
||||
1. Build and push new image with version tag
|
||||
2. Update StatefulSet image reference
|
||||
3. Manually delete pods one at a time when ready: `kubectl delete pod scraper-worker-0 -n dispensary-scraper`
|
||||
|
||||
## Wasabi S3 Storage (Payload Archive)
|
||||
### Worker Registry API
|
||||
|
||||
Raw crawl payloads are archived to Wasabi S3 for long-term storage and potential reprocessing.
|
||||
**Endpoint**: `GET /api/worker-registry/workers`
|
||||
|
||||
### Configuration
|
||||
|
||||
| Variable | Description | Default |
|
||||
|----------|-------------|---------|
|
||||
| `WASABI_ACCESS_KEY` | Wasabi access key ID | - |
|
||||
| `WASABI_SECRET_KEY` | Wasabi secret access key | - |
|
||||
| `WASABI_BUCKET` | Bucket name | `cannaiq` |
|
||||
| `WASABI_REGION` | Wasabi region | `us-west-2` |
|
||||
| `WASABI_ENDPOINT` | S3 endpoint URL | `https://s3.us-west-2.wasabisys.com` |
|
||||
|
||||
### Storage Path Format
|
||||
```
|
||||
payloads/{state}/{YYYY-MM-DD}/{dispensary_id}/{platform}_{timestamp}.json.gz
|
||||
```
|
||||
|
||||
Example: `payloads/AZ/2025-12-16/123/dutchie_2025-12-16T10-30-00-000Z.json.gz`
|
||||
|
||||
### Features
|
||||
- **Gzip compression**: ~70% size reduction on JSON payloads
|
||||
- **Automatic archival**: Every crawl is archived (not just daily baselines)
|
||||
- **Metadata**: taskId, productCount, platform stored with each object
|
||||
- **Graceful fallback**: If Wasabi not configured, archival is skipped (no task failure)
|
||||
|
||||
### Files
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `src/services/wasabi-storage.ts` | S3 client and storage functions |
|
||||
| `src/tasks/handlers/product-discovery-dutchie.ts` | Archives Dutchie payloads |
|
||||
| `src/tasks/handlers/product-discovery-jane.ts` | Archives Jane payloads |
|
||||
| `src/tasks/handlers/product-discovery-treez.ts` | Archives Treez payloads |
|
||||
|
||||
### K8s Secret Setup
|
||||
```bash
|
||||
kubectl patch secret scraper-secrets -n cannaiq -p '{"stringData":{
|
||||
"WASABI_ACCESS_KEY": "<access-key>",
|
||||
"WASABI_SECRET_KEY": "<secret-key>"
|
||||
}}'
|
||||
```
|
||||
|
||||
### Usage in Code
|
||||
```typescript
|
||||
import { storePayload, getPayload, listPayloads } from '../services/wasabi-storage';
|
||||
|
||||
// Store a payload
|
||||
const result = await storePayload(dispensaryId, 'AZ', 'dutchie', rawPayload);
|
||||
console.log(result.path); // payloads/AZ/2025-12-16/123/dutchie_...
|
||||
console.log(result.compressedBytes); // Size after gzip
|
||||
|
||||
// Retrieve a payload
|
||||
const payload = await getPayload(result.path);
|
||||
|
||||
// List payloads for a store on a date
|
||||
const paths = await listPayloads(123, 'AZ', '2025-12-16');
|
||||
```
|
||||
|
||||
### Estimated Storage
|
||||
- ~100KB per crawl (compressed)
|
||||
- ~200 stores × 12 crawls/day = 240MB/day
|
||||
- ~7.2GB/month
|
||||
- 5TB capacity = ~5+ years of storage
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Inventory Tracking
|
||||
|
||||
High-frequency crawling for sales velocity and inventory analytics.
|
||||
|
||||
### Crawl Intervals
|
||||
|
||||
| State | Interval | Jitter | Effective Range |
|
||||
|-------|----------|--------|-----------------|
|
||||
| AZ | 5 min | ±3 min | 2-8 min |
|
||||
| Others | 60 min | ±3 min | 57-63 min |
|
||||
|
||||
### Delta-Only Snapshots
|
||||
|
||||
Only store inventory changes, not full state. Reduces storage by ~95%.
|
||||
|
||||
**Change Types**:
|
||||
- `sale`: quantity decreased (qty_delta < 0)
|
||||
- `restock`: quantity increased (qty_delta > 0)
|
||||
- `price_change`: price changed, quantity same
|
||||
- `oos`: went out of stock (qty → 0)
|
||||
- `back_in_stock`: returned to stock (0 → qty)
|
||||
- `new_product`: first time seeing product
|
||||
|
||||
### Revenue Calculation
|
||||
```
|
||||
revenue = ABS(qty_delta) × effective_price
|
||||
effective_price = sale_price if on_special else regular_price
|
||||
```
|
||||
|
||||
### Key Views
|
||||
| View | Purpose |
|
||||
|------|---------|
|
||||
| `v_hourly_sales` | Sales aggregated by hour |
|
||||
| `v_daily_store_sales` | Daily revenue by store |
|
||||
| `v_daily_brand_sales` | Daily brand performance |
|
||||
| `v_product_velocity` | Hot/steady/slow/stale rankings |
|
||||
| `v_stock_out_prediction` | Days until OOS based on velocity |
|
||||
| `v_brand_variants` | SKU counts per brand |
|
||||
|
||||
### Files
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `src/services/inventory-snapshots.ts` | Delta calculation and storage |
|
||||
| `src/services/task-scheduler.ts` | High-frequency scheduling with jitter |
|
||||
| `migrations/125_delta_only_snapshots.sql` | Delta columns and views |
|
||||
| `migrations/126_az_high_frequency.sql` | AZ 5-min intervals |
|
||||
**Response Fields**:
|
||||
| Field | Description |
|
||||
|-------|-------------|
|
||||
| `pod_name` | Kubernetes pod name |
|
||||
| `worker_id` | Internal worker UUID |
|
||||
| `status` | active, idle, offline |
|
||||
| `curl_ip` | IP from curl preflight |
|
||||
| `http_ip` | IP from Puppeteer preflight |
|
||||
| `preflight_status` | pending, passed, failed |
|
||||
| `preflight_at` | Timestamp of last preflight |
|
||||
| `fingerprint_data` | Browser fingerprint JSON |
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1,33 +1,17 @@
|
||||
# Build stage
|
||||
# Image: git.spdy.io/creationshop/dispensary-scraper
|
||||
FROM node:22-slim AS builder
|
||||
|
||||
# Install build tools for native modules (bcrypt, sharp)
|
||||
RUN apt-get update && apt-get install -y \
|
||||
python3 \
|
||||
build-essential \
|
||||
--no-install-recommends \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
# Image: code.cannabrands.app/creationshop/dispensary-scraper
|
||||
FROM code.cannabrands.app/creationshop/node:20-slim AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json ./
|
||||
|
||||
# Install dependencies with retry and fallback registry
|
||||
RUN npm config set fetch-retries 3 && \
|
||||
npm config set fetch-retry-mintimeout 20000 && \
|
||||
npm config set fetch-retry-maxtimeout 120000 && \
|
||||
npm install || \
|
||||
(npm config set registry https://registry.npmmirror.com && npm install)
|
||||
RUN npm install
|
||||
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
# Prune dev dependencies for smaller production image
|
||||
RUN npm prune --production
|
||||
|
||||
# Production stage
|
||||
FROM node:22-slim
|
||||
FROM code.cannabrands.app/creationshop/node:20-slim
|
||||
|
||||
# Build arguments for version info
|
||||
ARG APP_BUILD_VERSION=dev
|
||||
@@ -60,7 +44,8 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json ./
|
||||
COPY --from=builder /app/node_modules ./node_modules
|
||||
RUN npm install --omit=dev
|
||||
|
||||
COPY --from=builder /app/dist ./dist
|
||||
|
||||
# Copy migrations for auto-migrate on startup
|
||||
|
||||
175
backend/docs/API_SECURITY.md
Normal file
175
backend/docs/API_SECURITY.md
Normal file
@@ -0,0 +1,175 @@
|
||||
# API Security Documentation
|
||||
|
||||
This document describes the authentication and authorization configuration for all CannaiQ API endpoints.
|
||||
|
||||
## Authentication Methods
|
||||
|
||||
### 1. Trusted Origins (No Token Required)
|
||||
|
||||
Requests from trusted sources are automatically authenticated with `internal` role:
|
||||
|
||||
**Trusted IPs:**
|
||||
- `127.0.0.1` (localhost IPv4)
|
||||
- `::1` (localhost IPv6)
|
||||
- `::ffff:127.0.0.1` (IPv4-mapped IPv6)
|
||||
|
||||
**Trusted Domains:**
|
||||
- `https://cannaiq.co`
|
||||
- `https://www.cannaiq.co`
|
||||
- `https://findadispo.com`
|
||||
- `https://www.findadispo.com`
|
||||
- `https://findagram.co`
|
||||
- `https://www.findagram.co`
|
||||
- `http://localhost:3010`
|
||||
- `http://localhost:8080`
|
||||
- `http://localhost:5173`
|
||||
|
||||
**Trusted Patterns:**
|
||||
- `*.cannabrands.app`
|
||||
- `*.cannaiq.co`
|
||||
|
||||
**Internal Header:**
|
||||
- `X-Internal-Request` header matching `INTERNAL_REQUEST_SECRET` env var
|
||||
|
||||
### 2. Bearer Token Authentication
|
||||
|
||||
External requests must include a valid token:
|
||||
|
||||
```
|
||||
Authorization: Bearer <token>
|
||||
```
|
||||
|
||||
**Token Types:**
|
||||
- **JWT Token**: User session tokens (7-day expiry)
|
||||
- **API Token**: Long-lived tokens for integrations (stored in `api_tokens` table)
|
||||
|
||||
## Authorization Levels
|
||||
|
||||
### Public (No Auth)
|
||||
Routes accessible without authentication:
|
||||
- `GET /health` - Health check
|
||||
- `GET /api/health/*` - Comprehensive health endpoints
|
||||
- `GET /outbound-ip` - Server's outbound IP
|
||||
- `GET /api/v1/deals` - Public deals endpoint
|
||||
|
||||
### Authenticated (Trusted Origin or Token)
|
||||
Routes requiring authentication but no specific role:
|
||||
|
||||
| Route | Description |
|
||||
|-------|-------------|
|
||||
| `/api/payloads/*` | Raw crawl payload access |
|
||||
| `/api/workers/*` | Worker monitoring |
|
||||
| `/api/worker-registry/*` | Worker registration and heartbeats |
|
||||
| `/api/stores/*` | Store CRUD |
|
||||
| `/api/products/*` | Product listing |
|
||||
| `/api/dispensaries/*` | Dispensary data |
|
||||
|
||||
### Admin Only (Requires `admin` or `superadmin` role)
|
||||
Routes restricted to administrators:
|
||||
|
||||
| Route | Description |
|
||||
|-------|-------------|
|
||||
| `/api/job-queue/*` | Job queue management |
|
||||
| `/api/k8s/*` | Kubernetes control (scaling) |
|
||||
| `/api/pipeline/*` | Pipeline stage transitions |
|
||||
| `/api/tasks/*` | Task queue management |
|
||||
| `/api/admin/orchestrator/*` | Orchestrator dashboard |
|
||||
| `/api/admin/trusted-origins/*` | Manage trusted origins |
|
||||
| `/api/admin/debug/*` | Debug endpoints |
|
||||
|
||||
**Note:** The `internal` role (localhost/trusted origins) bypasses role checks, granting automatic admin access for local development and internal services.
|
||||
|
||||
## Endpoint Security Matrix
|
||||
|
||||
| Endpoint Group | Auth Required | Role Required | Notes |
|
||||
|----------------|---------------|---------------|-------|
|
||||
| `/api/payloads/*` | Yes | None | Query API for raw crawl data |
|
||||
| `/api/job-queue/*` | Yes | admin | Legacy job queue (deprecated) |
|
||||
| `/api/workers/*` | Yes | None | Worker status monitoring |
|
||||
| `/api/worker-registry/*` | Yes | None | Workers register via trusted IPs |
|
||||
| `/api/k8s/*` | Yes | admin | K8s scaling controls |
|
||||
| `/api/pipeline/*` | Yes | admin | Store pipeline transitions |
|
||||
| `/api/tasks/*` | Yes | admin | Task queue CRUD |
|
||||
| `/api/admin/orchestrator/*` | Yes | admin | Orchestrator metrics/alerts |
|
||||
| `/api/admin/trusted-origins/*` | Yes | admin | Auth bypass management |
|
||||
| `/api/v1/*` | Varies | Varies | Public API (per-endpoint) |
|
||||
| `/api/consumer/*` | Varies | Varies | Consumer features |
|
||||
|
||||
## Implementation Details
|
||||
|
||||
### Middleware Stack
|
||||
|
||||
```typescript
|
||||
// Authentication middleware - validates token or trusted origin
|
||||
import { authMiddleware } from '../auth/middleware';
|
||||
|
||||
// Role requirement middleware - checks user role
|
||||
import { requireRole } from '../auth/middleware';
|
||||
|
||||
// Usage in route files:
|
||||
router.use(authMiddleware); // All routes need auth
|
||||
router.use(requireRole('admin', 'superadmin')); // Admin-only routes
|
||||
```
|
||||
|
||||
### Auth Middleware Flow
|
||||
|
||||
```
|
||||
Request → Check Bearer Token
|
||||
├─ Valid JWT → Set user from token → Continue
|
||||
├─ Valid API Token → Set user as api_token role → Continue
|
||||
└─ No Token → Check Trusted Origin
|
||||
├─ Trusted → Set user as internal role → Continue
|
||||
└─ Not Trusted → 401 Unauthorized
|
||||
```
|
||||
|
||||
### Role Check Flow
|
||||
|
||||
```
|
||||
Request → authMiddleware → requireRole('admin')
|
||||
├─ role === 'internal' → Continue (bypass)
|
||||
├─ role in ['admin', 'superadmin'] → Continue
|
||||
└─ else → 403 Forbidden
|
||||
```
|
||||
|
||||
## Worker Pod Authentication
|
||||
|
||||
Worker pods (in Kubernetes) authenticate via:
|
||||
|
||||
1. **Internal IP**: Pods communicate via cluster IPs, which are trusted
|
||||
2. **Internal Header**: Optional `X-Internal-Request` header for explicit trust
|
||||
|
||||
Endpoints used by workers:
|
||||
- `POST /api/worker-registry/register` - Report for duty
|
||||
- `POST /api/worker-registry/heartbeat` - Stay alive
|
||||
- `POST /api/worker-registry/deregister` - Graceful shutdown
|
||||
- `POST /api/worker-registry/task-completed` - Report task completion
|
||||
|
||||
## API Token Management
|
||||
|
||||
API tokens are managed via:
|
||||
- `GET /api/api-tokens` - List tokens
|
||||
- `POST /api/api-tokens` - Create token
|
||||
- `DELETE /api/api-tokens/:id` - Revoke token
|
||||
|
||||
Token properties:
|
||||
- `token`: The bearer token value
|
||||
- `name`: Human-readable identifier
|
||||
- `rate_limit`: Requests per minute
|
||||
- `expires_at`: Optional expiration
|
||||
- `active`: Enable/disable toggle
|
||||
- `allowed_endpoints`: Optional endpoint restrictions
|
||||
|
||||
## Security Best Practices
|
||||
|
||||
1. **Never expose tokens in URLs** - Use Authorization header
|
||||
2. **Use HTTPS in production** - All traffic encrypted
|
||||
3. **Rotate API tokens periodically** - Set expiration dates
|
||||
4. **Monitor rate limits** - Prevent abuse
|
||||
5. **Audit access logs** - Track API usage via `api_usage_logs` table
|
||||
|
||||
## Related Files
|
||||
|
||||
- `src/auth/middleware.ts` - Auth middleware implementation
|
||||
- `src/routes/api-tokens.ts` - Token management endpoints
|
||||
- `src/middleware/apiTokenTracker.ts` - Usage tracking
|
||||
- `src/middleware/trustedDomains.ts` - Domain trust markers
|
||||
@@ -99,60 +99,10 @@ src/scraper-v2/*.ts # Entire directory deprecated
|
||||
|------|---------|--------|
|
||||
| `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
|
||||
| `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
|
||||
| `src/tasks/handlers/entry-point-discovery.ts` | Resolve platform IDs (auto-healing) | **PRIMARY** |
|
||||
| `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
|
||||
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs (legacy) | LEGACY |
|
||||
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs | ACTIVE |
|
||||
| `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
|
||||
|
||||
---
|
||||
|
||||
## Transport Rules (CRITICAL)
|
||||
|
||||
**Browser-based (Puppeteer) is the DEFAULT transport. curl is ONLY allowed when explicitly specified.**
|
||||
|
||||
### Transport Selection
|
||||
| `task.method` | Transport Used | Notes |
|
||||
|---------------|----------------|-------|
|
||||
| `null` | Browser (Puppeteer) | DEFAULT - use this for most tasks |
|
||||
| `'http'` | Browser (Puppeteer) | Explicit browser request |
|
||||
| `'curl'` | curl-impersonate | ONLY when explicitly needed |
|
||||
|
||||
### Why Browser-First?
|
||||
1. **Anti-detection**: Puppeteer with StealthPlugin evades bot detection
|
||||
2. **Session cookies**: Browser maintains session state automatically
|
||||
3. **Fingerprinting**: Real browser fingerprint (TLS, headers, etc.)
|
||||
4. **Age gates**: Browser can click through age verification
|
||||
|
||||
### Entry Point Discovery Auto-Healing
|
||||
The `entry_point_discovery` handler uses a healing strategy:
|
||||
|
||||
```
|
||||
1. FIRST: Check dutchie_discovery_locations for existing platform_location_id
|
||||
- By linked dutchie_discovery_id
|
||||
- By slug match in discovery data
|
||||
→ If found, NO network call needed
|
||||
|
||||
2. SECOND: Browser-based GraphQL (Puppeteer)
|
||||
- 5x retries for network/proxy failures
|
||||
- On HTTP 403: rotate proxy and retry
|
||||
- On HTTP 404 after 2 attempts: mark as 'removed'
|
||||
|
||||
3. HARD FAILURE: After exhausting options → 'needs_investigation'
|
||||
```
|
||||
|
||||
### DO NOT Use curl Unless:
|
||||
- Task explicitly has `method = 'curl'`
|
||||
- You're testing curl-impersonate binaries
|
||||
- The API explicitly requires curl fingerprinting
|
||||
|
||||
### Files
|
||||
| File | Transport | Purpose |
|
||||
|------|-----------|---------|
|
||||
| `src/services/puppeteer-preflight.ts` | Browser | Preflight check |
|
||||
| `src/services/curl-preflight.ts` | curl | Preflight check |
|
||||
| `src/tasks/handlers/entry-point-discovery.ts` | Browser | Platform ID resolution |
|
||||
| `src/tasks/handlers/payload-fetch.ts` | Both | Product fetching |
|
||||
|
||||
### Database
|
||||
| File | Purpose | Status |
|
||||
|------|---------|--------|
|
||||
|
||||
@@ -1,343 +0,0 @@
|
||||
# CannaiQ Query API
|
||||
|
||||
Query raw crawl payload data with flexible filters, sorting, and aggregation.
|
||||
|
||||
## Base URL
|
||||
|
||||
```
|
||||
https://cannaiq.co/api/payloads
|
||||
```
|
||||
|
||||
## Authentication
|
||||
|
||||
Include your API key in the header:
|
||||
```
|
||||
X-API-Key: your-api-key
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Endpoints
|
||||
|
||||
### 1. Query Products
|
||||
|
||||
Filter and search products from a store's latest crawl data.
|
||||
|
||||
```
|
||||
GET /api/payloads/store/{dispensaryId}/query
|
||||
```
|
||||
|
||||
#### Query Parameters
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `brand` | string | Filter by brand name (partial match) |
|
||||
| `category` | string | Filter by category (flower, vape, edible, etc.) |
|
||||
| `subcategory` | string | Filter by subcategory |
|
||||
| `strain_type` | string | Filter by strain (indica, sativa, hybrid, cbd) |
|
||||
| `in_stock` | boolean | Filter by stock status (true/false) |
|
||||
| `price_min` | number | Minimum price |
|
||||
| `price_max` | number | Maximum price |
|
||||
| `thc_min` | number | Minimum THC percentage |
|
||||
| `thc_max` | number | Maximum THC percentage |
|
||||
| `search` | string | Search product name (partial match) |
|
||||
| `fields` | string | Comma-separated fields to return |
|
||||
| `limit` | number | Max results (default 100, max 1000) |
|
||||
| `offset` | number | Skip results for pagination |
|
||||
| `sort` | string | Sort by: name, price, thc, brand |
|
||||
| `order` | string | Sort order: asc, desc |
|
||||
|
||||
#### Available Fields
|
||||
|
||||
When using `fields` parameter, you can request:
|
||||
- `id` - Product ID
|
||||
- `name` - Product name
|
||||
- `brand` - Brand name
|
||||
- `category` - Product category
|
||||
- `subcategory` - Product subcategory
|
||||
- `strain_type` - Indica/Sativa/Hybrid/CBD
|
||||
- `price` - Current price
|
||||
- `price_med` - Medical price
|
||||
- `price_rec` - Recreational price
|
||||
- `thc` - THC percentage
|
||||
- `cbd` - CBD percentage
|
||||
- `weight` - Product weight/size
|
||||
- `status` - Stock status
|
||||
- `in_stock` - Boolean in-stock flag
|
||||
- `image_url` - Product image
|
||||
- `description` - Product description
|
||||
|
||||
#### Examples
|
||||
|
||||
**Get all flower products under $40:**
|
||||
```
|
||||
GET /api/payloads/store/112/query?category=flower&price_max=40
|
||||
```
|
||||
|
||||
**Search for "Blue Dream" with high THC:**
|
||||
```
|
||||
GET /api/payloads/store/112/query?search=blue+dream&thc_min=20
|
||||
```
|
||||
|
||||
**Get only name and price for Alien Labs products:**
|
||||
```
|
||||
GET /api/payloads/store/112/query?brand=Alien+Labs&fields=name,price,thc
|
||||
```
|
||||
|
||||
**Get top 10 highest THC products:**
|
||||
```
|
||||
GET /api/payloads/store/112/query?sort=thc&order=desc&limit=10
|
||||
```
|
||||
|
||||
**Paginate through in-stock products:**
|
||||
```
|
||||
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=0
|
||||
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=50
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"dispensaryId": 112,
|
||||
"payloadId": 45,
|
||||
"fetchedAt": "2025-12-11T10:30:00Z",
|
||||
"query": {
|
||||
"filters": {
|
||||
"brand": "Alien Labs",
|
||||
"category": null,
|
||||
"price_max": null
|
||||
},
|
||||
"sort": "price",
|
||||
"order": "asc",
|
||||
"limit": 100,
|
||||
"offset": 0
|
||||
},
|
||||
"pagination": {
|
||||
"total": 15,
|
||||
"returned": 15,
|
||||
"limit": 100,
|
||||
"offset": 0,
|
||||
"has_more": false
|
||||
},
|
||||
"products": [
|
||||
{
|
||||
"id": "507f1f77bcf86cd799439011",
|
||||
"name": "Alien Labs - Baklava 3.5g",
|
||||
"brand": "Alien Labs",
|
||||
"category": "flower",
|
||||
"strain_type": "hybrid",
|
||||
"price": 55,
|
||||
"thc": "28.5",
|
||||
"in_stock": true
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. Aggregate Data
|
||||
|
||||
Group products and calculate metrics.
|
||||
|
||||
```
|
||||
GET /api/payloads/store/{dispensaryId}/aggregate
|
||||
```
|
||||
|
||||
#### Query Parameters
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `group_by` | string | **Required.** Field to group by: brand, category, subcategory, strain_type |
|
||||
| `metrics` | string | Comma-separated metrics (default: count) |
|
||||
|
||||
#### Available Metrics
|
||||
|
||||
- `count` - Number of products
|
||||
- `avg_price` - Average price
|
||||
- `min_price` - Lowest price
|
||||
- `max_price` - Highest price
|
||||
- `avg_thc` - Average THC percentage
|
||||
- `in_stock_count` - Number of in-stock products
|
||||
|
||||
#### Examples
|
||||
|
||||
**Count products by brand:**
|
||||
```
|
||||
GET /api/payloads/store/112/aggregate?group_by=brand
|
||||
```
|
||||
|
||||
**Get price stats by category:**
|
||||
```
|
||||
GET /api/payloads/store/112/aggregate?group_by=category&metrics=count,avg_price,min_price,max_price
|
||||
```
|
||||
|
||||
**Get THC averages by strain type:**
|
||||
```
|
||||
GET /api/payloads/store/112/aggregate?group_by=strain_type&metrics=count,avg_thc
|
||||
```
|
||||
|
||||
**Brand analysis with stock info:**
|
||||
```
|
||||
GET /api/payloads/store/112/aggregate?group_by=brand&metrics=count,avg_price,in_stock_count
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"dispensaryId": 112,
|
||||
"payloadId": 45,
|
||||
"fetchedAt": "2025-12-11T10:30:00Z",
|
||||
"groupBy": "brand",
|
||||
"metrics": ["count", "avg_price"],
|
||||
"totalProducts": 450,
|
||||
"groupCount": 85,
|
||||
"aggregations": [
|
||||
{
|
||||
"brand": "Alien Labs",
|
||||
"count": 15,
|
||||
"avg_price": 52.33
|
||||
},
|
||||
{
|
||||
"brand": "Connected",
|
||||
"count": 12,
|
||||
"avg_price": 48.50
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3. Compare Stores (Price Comparison)
|
||||
|
||||
Query the same data from multiple stores and compare in your app:
|
||||
|
||||
```javascript
|
||||
// Get flower prices from Store A
|
||||
const storeA = await fetch('/api/payloads/store/112/query?category=flower&fields=name,brand,price');
|
||||
|
||||
// Get flower prices from Store B
|
||||
const storeB = await fetch('/api/payloads/store/115/query?category=flower&fields=name,brand,price');
|
||||
|
||||
// Compare in your app
|
||||
const dataA = await storeA.json();
|
||||
const dataB = await storeB.json();
|
||||
|
||||
// Find matching products and compare prices
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4. Price History
|
||||
|
||||
For historical price data, use the snapshots endpoint:
|
||||
|
||||
```
|
||||
GET /api/v1/products/{productId}/history?days=30
|
||||
```
|
||||
|
||||
Or compare payloads over time:
|
||||
|
||||
```
|
||||
GET /api/payloads/store/{dispensaryId}/diff?from={payloadId1}&to={payloadId2}
|
||||
```
|
||||
|
||||
The diff endpoint shows:
|
||||
- Products added
|
||||
- Products removed
|
||||
- Price changes
|
||||
- Stock changes
|
||||
|
||||
---
|
||||
|
||||
### 5. List Stores
|
||||
|
||||
Get available dispensaries to query:
|
||||
|
||||
```
|
||||
GET /api/stores
|
||||
```
|
||||
|
||||
Returns all stores with their IDs, names, and locations.
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Price Comparison App
|
||||
|
||||
```javascript
|
||||
// 1. Get stores in Arizona
|
||||
const stores = await fetch('/api/stores?state=AZ').then(r => r.json());
|
||||
|
||||
// 2. Query flower prices from each store
|
||||
const prices = await Promise.all(
|
||||
stores.map(store =>
|
||||
fetch(`/api/payloads/store/${store.id}/query?category=flower&fields=name,brand,price`)
|
||||
.then(r => r.json())
|
||||
)
|
||||
);
|
||||
|
||||
// 3. Build comparison matrix in your app
|
||||
```
|
||||
|
||||
### Brand Analytics Dashboard
|
||||
|
||||
```javascript
|
||||
// Get brand presence across stores
|
||||
const brandData = await Promise.all(
|
||||
storeIds.map(id =>
|
||||
fetch(`/api/payloads/store/${id}/aggregate?group_by=brand&metrics=count,avg_price`)
|
||||
.then(r => r.json())
|
||||
)
|
||||
);
|
||||
|
||||
// Aggregate brand presence across all stores
|
||||
```
|
||||
|
||||
### Deal Finder
|
||||
|
||||
```javascript
|
||||
// Find high-THC flower under $30
|
||||
const deals = await fetch(
|
||||
'/api/payloads/store/112/query?category=flower&price_max=30&thc_min=20&in_stock=true&sort=thc&order=desc'
|
||||
).then(r => r.json());
|
||||
```
|
||||
|
||||
### Inventory Tracker
|
||||
|
||||
```javascript
|
||||
// Get products that went out of stock
|
||||
const diff = await fetch('/api/payloads/store/112/diff').then(r => r.json());
|
||||
|
||||
const outOfStock = diff.details.stockChanges.filter(
|
||||
p => p.newStatus !== 'Active'
|
||||
);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rate Limits
|
||||
|
||||
- Default: 100 requests/minute per API key
|
||||
- Contact support for higher limits
|
||||
|
||||
## Error Responses
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"error": "Error message here"
|
||||
}
|
||||
```
|
||||
|
||||
Common errors:
|
||||
- `404` - Store or payload not found
|
||||
- `400` - Missing required parameter
|
||||
- `401` - Invalid or missing API key
|
||||
- `429` - Rate limit exceeded
|
||||
@@ -504,103 +504,6 @@ The Workers Dashboard shows:
|
||||
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
|
||||
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
|
||||
|
||||
## Browser Task Memory Limits (Updated 2025-12)
|
||||
|
||||
Browser-based tasks (Puppeteer/Chrome) have strict memory constraints that limit concurrency.
|
||||
|
||||
### Why Browser Tasks Are Different
|
||||
|
||||
Each browser task launches a Chrome process. Unlike I/O-bound API calls, browsers consume significant RAM:
|
||||
|
||||
| Component | RAM Usage |
|
||||
|-----------|-----------|
|
||||
| Node.js runtime | ~150 MB |
|
||||
| Chrome browser (base) | ~200-250 MB |
|
||||
| Dutchie menu page (loaded) | ~100-150 MB |
|
||||
| **Per browser total** | **~350-450 MB** |
|
||||
|
||||
### Memory Math for Pod Limits
|
||||
|
||||
```
|
||||
Pod memory limit: 2 GB (2000 MB)
|
||||
Node.js runtime: -150 MB
|
||||
Safety buffer: -100 MB
|
||||
────────────────────────────────
|
||||
Available for browsers: 1750 MB
|
||||
|
||||
Per browser + page: ~400 MB
|
||||
|
||||
Max browsers: 1750 ÷ 400 = ~4 browsers
|
||||
|
||||
Recommended: 3 browsers (leaves headroom for spikes)
|
||||
```
|
||||
|
||||
### MAX_CONCURRENT_TASKS for Browser Tasks
|
||||
|
||||
| Browsers per Pod | RAM Used | Risk Level |
|
||||
|------------------|----------|------------|
|
||||
| 1 | ~500 MB | Very safe |
|
||||
| 2 | ~900 MB | Safe |
|
||||
| **3** | **~1.3 GB** | **Recommended** |
|
||||
| 4 | ~1.7 GB | Tight (may OOM) |
|
||||
| 5+ | >2 GB | Will OOM crash |
|
||||
|
||||
**CRITICAL**: `MAX_CONCURRENT_TASKS=3` is the maximum safe value for browser tasks with current pod limits.
|
||||
|
||||
### Scaling Strategy
|
||||
|
||||
Scale **horizontally** (more pods) rather than vertically (more concurrency per pod):
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────┐
|
||||
│ Cluster: 8 pods × 3 browsers = 24 concurrent tasks │
|
||||
│ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ Pod 0 │ │ Pod 1 │ │ Pod 2 │ │ Pod 3 │ │
|
||||
│ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ Pod 4 │ │ Pod 5 │ │ Pod 6 │ │ Pod 7 │ │
|
||||
│ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Browser Lifecycle Per Task
|
||||
|
||||
Each task gets a fresh browser with fresh IP/identity:
|
||||
|
||||
```
|
||||
1. Claim task from queue
|
||||
2. Get fresh proxy from pool
|
||||
3. Launch browser with proxy
|
||||
4. Run preflight (verify IP)
|
||||
5. Execute scrape
|
||||
6. Close browser
|
||||
7. Repeat
|
||||
```
|
||||
|
||||
This ensures:
|
||||
- Fresh IP per task (proxy rotation)
|
||||
- Fresh fingerprint per task (UA rotation)
|
||||
- No cookie/session bleed between tasks
|
||||
- Predictable memory usage
|
||||
|
||||
### Increasing Capacity
|
||||
|
||||
To handle more concurrent tasks:
|
||||
|
||||
1. **Add more pods** (up to 8 per CLAUDE.md limit)
|
||||
2. **Increase pod memory** (allows 4 browsers per pod):
|
||||
```yaml
|
||||
resources:
|
||||
limits:
|
||||
memory: "2.5Gi" # from 2Gi
|
||||
```
|
||||
|
||||
**DO NOT** simply increase `MAX_CONCURRENT_TASKS` without also increasing pod memory limits.
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Logs
|
||||
|
||||
@@ -2,7 +2,7 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: scraper-worker
|
||||
namespace: cannaiq
|
||||
namespace: dispensary-scraper
|
||||
labels:
|
||||
app: scraper-worker
|
||||
spec:
|
||||
@@ -17,7 +17,7 @@ apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: scraper-worker
|
||||
namespace: cannaiq
|
||||
namespace: dispensary-scraper
|
||||
spec:
|
||||
serviceName: scraper-worker
|
||||
replicas: 8
|
||||
@@ -37,7 +37,7 @@ spec:
|
||||
- name: regcred
|
||||
containers:
|
||||
- name: worker
|
||||
image: git.spdy.io/creationshop/cannaiq:latest
|
||||
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
|
||||
imagePullPolicy: Always
|
||||
command: ["node"]
|
||||
args: ["dist/tasks/task-worker.js"]
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
-- Migration 085: Trusted Origins Management
|
||||
-- Allows admin to manage trusted IPs and domains via UI instead of hardcoded values
|
||||
|
||||
-- Trusted origins table (IPs and domains that bypass API key auth)
|
||||
CREATE TABLE IF NOT EXISTS trusted_origins (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Origin type: 'ip', 'domain', 'pattern'
|
||||
origin_type VARCHAR(20) NOT NULL CHECK (origin_type IN ('ip', 'domain', 'pattern')),
|
||||
|
||||
-- The actual value
|
||||
-- For ip: '127.0.0.1', '::1', '192.168.1.0/24'
|
||||
-- For domain: 'cannaiq.co', 'findadispo.com'
|
||||
-- For pattern: '^https://.*\.cannabrands\.app$' (regex)
|
||||
origin_value VARCHAR(255) NOT NULL,
|
||||
|
||||
-- Description for admin reference
|
||||
description TEXT,
|
||||
|
||||
-- Active flag
|
||||
active BOOLEAN DEFAULT true,
|
||||
|
||||
-- Audit
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_by INTEGER REFERENCES users(id),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
UNIQUE(origin_type, origin_value)
|
||||
);
|
||||
|
||||
-- Index for quick lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active ON trusted_origins(active) WHERE active = true;
|
||||
CREATE INDEX IF NOT EXISTS idx_trusted_origins_type ON trusted_origins(origin_type, active);
|
||||
|
||||
-- Seed with current hardcoded values
|
||||
INSERT INTO trusted_origins (origin_type, origin_value, description) VALUES
|
||||
-- Trusted IPs (localhost)
|
||||
('ip', '127.0.0.1', 'Localhost IPv4'),
|
||||
('ip', '::1', 'Localhost IPv6'),
|
||||
('ip', '::ffff:127.0.0.1', 'Localhost IPv4-mapped IPv6'),
|
||||
|
||||
-- Trusted domains
|
||||
('domain', 'cannaiq.co', 'CannaiQ production'),
|
||||
('domain', 'www.cannaiq.co', 'CannaiQ production (www)'),
|
||||
('domain', 'findadispo.com', 'FindADispo production'),
|
||||
('domain', 'www.findadispo.com', 'FindADispo production (www)'),
|
||||
('domain', 'findagram.co', 'Findagram production'),
|
||||
('domain', 'www.findagram.co', 'Findagram production (www)'),
|
||||
('domain', 'localhost:3010', 'Local backend dev'),
|
||||
('domain', 'localhost:8080', 'Local admin dev'),
|
||||
('domain', 'localhost:5173', 'Local Vite dev'),
|
||||
|
||||
-- Pattern-based (regex)
|
||||
('pattern', '^https://.*\.cannabrands\.app$', 'All cannabrands.app subdomains'),
|
||||
('pattern', '^https://.*\.cannaiq\.co$', 'All cannaiq.co subdomains')
|
||||
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||
|
||||
-- Add comment
|
||||
COMMENT ON TABLE trusted_origins IS 'IPs and domains that bypass API key authentication. Managed via /admin.';
|
||||
@@ -1,10 +0,0 @@
|
||||
-- Migration 086: Add proxy_url column for alternative URL formats
|
||||
-- Some proxy providers use non-standard URL formats (e.g., host:port:user:pass)
|
||||
-- This column allows storing the raw URL directly
|
||||
|
||||
-- Add proxy_url column - if set, used directly instead of constructing from parts
|
||||
ALTER TABLE proxies
|
||||
ADD COLUMN IF NOT EXISTS proxy_url TEXT;
|
||||
|
||||
-- Add comment
|
||||
COMMENT ON COLUMN proxies.proxy_url IS 'Raw proxy URL (if provider uses non-standard format). Takes precedence over constructed URL from host/port/user/pass.';
|
||||
@@ -1,30 +0,0 @@
|
||||
-- Migration 088: Extend raw_crawl_payloads for discovery payloads
|
||||
--
|
||||
-- Enables saving raw store data from Dutchie discovery crawls.
|
||||
-- Store discovery returns raw dispensary objects - save them for historical analysis.
|
||||
|
||||
-- Add payload_type to distinguish product crawls from discovery crawls
|
||||
ALTER TABLE raw_crawl_payloads
|
||||
ADD COLUMN IF NOT EXISTS payload_type VARCHAR(32) NOT NULL DEFAULT 'product';
|
||||
|
||||
-- Add state_code for discovery payloads (null for product payloads)
|
||||
ALTER TABLE raw_crawl_payloads
|
||||
ADD COLUMN IF NOT EXISTS state_code VARCHAR(10);
|
||||
|
||||
-- Add store_count for discovery payloads (alternative to product_count)
|
||||
ALTER TABLE raw_crawl_payloads
|
||||
ADD COLUMN IF NOT EXISTS store_count INTEGER;
|
||||
|
||||
-- Make dispensary_id nullable for discovery payloads
|
||||
ALTER TABLE raw_crawl_payloads
|
||||
ALTER COLUMN dispensary_id DROP NOT NULL;
|
||||
|
||||
-- Add index for discovery payload queries
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_type_state
|
||||
ON raw_crawl_payloads(payload_type, state_code)
|
||||
WHERE payload_type = 'store_discovery';
|
||||
|
||||
-- Comments
|
||||
COMMENT ON COLUMN raw_crawl_payloads.payload_type IS 'Type: product (default), store_discovery';
|
||||
COMMENT ON COLUMN raw_crawl_payloads.state_code IS 'State code for discovery payloads (e.g., AZ, MI)';
|
||||
COMMENT ON COLUMN raw_crawl_payloads.store_count IS 'Number of stores in discovery payload';
|
||||
@@ -1,105 +0,0 @@
|
||||
-- Migration 089: Immutable Schedules with Per-State Product Discovery
|
||||
--
|
||||
-- Key changes:
|
||||
-- 1. Add is_immutable column - schedules can be edited but not deleted
|
||||
-- 2. Add method column - all tasks use 'http' (Puppeteer transport)
|
||||
-- 3. Store discovery weekly (168h)
|
||||
-- 4. Per-state product_discovery schedules (4h default)
|
||||
-- 5. Remove old payload_fetch schedules
|
||||
|
||||
-- =====================================================
|
||||
-- 1) Add new columns to task_schedules
|
||||
-- =====================================================
|
||||
ALTER TABLE task_schedules
|
||||
ADD COLUMN IF NOT EXISTS is_immutable BOOLEAN DEFAULT FALSE;
|
||||
|
||||
ALTER TABLE task_schedules
|
||||
ADD COLUMN IF NOT EXISTS method VARCHAR(10) DEFAULT 'http';
|
||||
|
||||
-- =====================================================
|
||||
-- 2) Update store_discovery to weekly and immutable
|
||||
-- =====================================================
|
||||
UPDATE task_schedules
|
||||
SET interval_hours = 168, -- 7 days
|
||||
is_immutable = TRUE,
|
||||
method = 'http',
|
||||
description = 'Discover new Dutchie stores weekly (HTTP transport)'
|
||||
WHERE name IN ('store_discovery_dutchie', 'Store Discovery');
|
||||
|
||||
-- Insert if doesn't exist
|
||||
INSERT INTO task_schedules (name, role, interval_hours, priority, description, is_immutable, method, platform, next_run_at)
|
||||
VALUES ('Store Discovery', 'store_discovery', 168, 5, 'Discover new Dutchie stores weekly (HTTP transport)', TRUE, 'http', 'dutchie', NOW())
|
||||
ON CONFLICT (name) DO UPDATE SET
|
||||
interval_hours = 168,
|
||||
is_immutable = TRUE,
|
||||
method = 'http',
|
||||
description = 'Discover new Dutchie stores weekly (HTTP transport)';
|
||||
|
||||
-- =====================================================
|
||||
-- 3) Remove old payload_fetch and product_refresh_all schedules
|
||||
-- =====================================================
|
||||
DELETE FROM task_schedules WHERE name IN ('payload_fetch_all', 'product_refresh_all');
|
||||
|
||||
-- =====================================================
|
||||
-- 4) Create per-state product_discovery schedules
|
||||
-- =====================================================
|
||||
-- One schedule per state that has dispensaries with active cannabis programs
|
||||
INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
|
||||
SELECT
|
||||
'product_discovery_' || lower(s.code) AS name,
|
||||
'product_discovery' AS role,
|
||||
s.code AS state_code,
|
||||
4 AS interval_hours, -- 4 hours default, editable
|
||||
10 AS priority,
|
||||
'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
|
||||
TRUE AS is_immutable, -- Can edit but not delete
|
||||
'http' AS method,
|
||||
CASE WHEN s.is_active THEN TRUE ELSE FALSE END AS enabled,
|
||||
-- Stagger start times: each state starts 5 minutes after the previous
|
||||
NOW() + (ROW_NUMBER() OVER (ORDER BY s.code) * INTERVAL '5 minutes') AS next_run_at
|
||||
FROM states s
|
||||
WHERE EXISTS (
|
||||
SELECT 1 FROM dispensaries d
|
||||
WHERE d.state_id = s.id AND d.crawl_enabled = true
|
||||
)
|
||||
ON CONFLICT (name) DO UPDATE SET
|
||||
is_immutable = TRUE,
|
||||
method = 'http',
|
||||
description = EXCLUDED.description;
|
||||
|
||||
-- Also create schedules for states that might have stores discovered later
|
||||
INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
|
||||
SELECT
|
||||
'product_discovery_' || lower(s.code) AS name,
|
||||
'product_discovery' AS role,
|
||||
s.code AS state_code,
|
||||
4 AS interval_hours,
|
||||
10 AS priority,
|
||||
'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
|
||||
TRUE AS is_immutable,
|
||||
'http' AS method,
|
||||
FALSE AS enabled, -- Disabled until stores exist
|
||||
NOW() + INTERVAL '1 hour'
|
||||
FROM states s
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM task_schedules ts WHERE ts.name = 'product_discovery_' || lower(s.code)
|
||||
)
|
||||
ON CONFLICT (name) DO NOTHING;
|
||||
|
||||
-- =====================================================
|
||||
-- 5) Make analytics_refresh immutable
|
||||
-- =====================================================
|
||||
UPDATE task_schedules
|
||||
SET is_immutable = TRUE, method = 'http'
|
||||
WHERE name = 'analytics_refresh';
|
||||
|
||||
-- =====================================================
|
||||
-- 6) Add index for schedule lookups
|
||||
-- =====================================================
|
||||
CREATE INDEX IF NOT EXISTS idx_task_schedules_state_code
|
||||
ON task_schedules(state_code)
|
||||
WHERE state_code IS NOT NULL;
|
||||
|
||||
-- Comments
|
||||
COMMENT ON COLUMN task_schedules.is_immutable IS 'If TRUE, schedule cannot be deleted (only edited)';
|
||||
COMMENT ON COLUMN task_schedules.method IS 'Transport method: http (Puppeteer/browser) or curl (axios)';
|
||||
@@ -1,66 +0,0 @@
|
||||
-- Migration 090: Add modification tracking columns
|
||||
--
|
||||
-- Tracks when records were last modified and by which task.
|
||||
-- Enables debugging, auditing, and understanding data freshness.
|
||||
--
|
||||
-- Columns added:
|
||||
-- last_modified_at - When the record was last modified by a task
|
||||
-- last_modified_by_task - Which task role modified it (e.g., 'product_refresh')
|
||||
-- last_modified_task_id - The specific task ID that modified it
|
||||
|
||||
-- ============================================================
|
||||
-- dispensaries table
|
||||
-- ============================================================
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
|
||||
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
|
||||
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
|
||||
|
||||
-- Index for querying recently modified records
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_last_modified
|
||||
ON dispensaries(last_modified_at DESC)
|
||||
WHERE last_modified_at IS NOT NULL;
|
||||
|
||||
-- Index for querying by task type
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_modified_by_task
|
||||
ON dispensaries(last_modified_by_task)
|
||||
WHERE last_modified_by_task IS NOT NULL;
|
||||
|
||||
COMMENT ON COLUMN dispensaries.last_modified_at IS 'Timestamp when this record was last modified by a task';
|
||||
COMMENT ON COLUMN dispensaries.last_modified_by_task IS 'Task role that last modified this record (e.g., store_discovery_state, entry_point_discovery)';
|
||||
COMMENT ON COLUMN dispensaries.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';
|
||||
|
||||
-- ============================================================
|
||||
-- store_products table
|
||||
-- ============================================================
|
||||
ALTER TABLE store_products
|
||||
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
|
||||
|
||||
ALTER TABLE store_products
|
||||
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
|
||||
|
||||
ALTER TABLE store_products
|
||||
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
|
||||
|
||||
-- Index for querying recently modified products
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_last_modified
|
||||
ON store_products(last_modified_at DESC)
|
||||
WHERE last_modified_at IS NOT NULL;
|
||||
|
||||
-- Index for querying by task type
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_modified_by_task
|
||||
ON store_products(last_modified_by_task)
|
||||
WHERE last_modified_by_task IS NOT NULL;
|
||||
|
||||
-- Composite index for finding products modified by a specific task
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_task_modified
|
||||
ON store_products(dispensary_id, last_modified_at DESC)
|
||||
WHERE last_modified_at IS NOT NULL;
|
||||
|
||||
COMMENT ON COLUMN store_products.last_modified_at IS 'Timestamp when this record was last modified by a task';
|
||||
COMMENT ON COLUMN store_products.last_modified_by_task IS 'Task role that last modified this record (e.g., product_refresh, product_discovery)';
|
||||
COMMENT ON COLUMN store_products.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';
|
||||
@@ -1,26 +0,0 @@
|
||||
-- Migration 091: Add store discovery tracking columns
|
||||
-- Per auto-healing scheme (2025-12-12):
|
||||
-- Track when store_discovery last updated each dispensary
|
||||
-- Track when last payload was saved
|
||||
|
||||
-- Add last_store_discovery_at to track when store_discovery updated this record
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS last_store_discovery_at TIMESTAMPTZ;
|
||||
|
||||
-- Add last_payload_at to track when last product payload was saved
|
||||
-- (Complements last_fetch_at which tracks API fetch time)
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS last_payload_at TIMESTAMPTZ;
|
||||
|
||||
-- Add index for finding stale discovery data
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_store_discovery_at
|
||||
ON dispensaries (last_store_discovery_at DESC NULLS LAST)
|
||||
WHERE crawl_enabled = true;
|
||||
|
||||
-- Add index for finding dispensaries without recent payloads
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_payload_at
|
||||
ON dispensaries (last_payload_at DESC NULLS LAST)
|
||||
WHERE crawl_enabled = true;
|
||||
|
||||
COMMENT ON COLUMN dispensaries.last_store_discovery_at IS 'When store_discovery task last updated this record';
|
||||
COMMENT ON COLUMN dispensaries.last_payload_at IS 'When last product payload was saved for this dispensary';
|
||||
@@ -1,30 +0,0 @@
|
||||
-- Fix 3 Trulieve/Harvest stores with incorrect menu URLs
|
||||
-- These records have NULL or mismatched platform_dispensary_id so store_discovery
|
||||
-- ON CONFLICT can't update them automatically
|
||||
|
||||
UPDATE dispensaries
|
||||
SET
|
||||
menu_url = 'https://dutchie.com/dispensary/svaccha-llc-nirvana-center-apache-junction',
|
||||
updated_at = NOW()
|
||||
WHERE id = 224;
|
||||
|
||||
UPDATE dispensaries
|
||||
SET
|
||||
menu_url = 'https://dutchie.com/dispensary/trulieve-of-phoenix-tatum',
|
||||
updated_at = NOW()
|
||||
WHERE id = 76;
|
||||
|
||||
UPDATE dispensaries
|
||||
SET
|
||||
menu_url = 'https://dutchie.com/dispensary/harvest-of-havasu',
|
||||
updated_at = NOW()
|
||||
WHERE id = 403;
|
||||
|
||||
-- Queue entry_point_discovery tasks to resolve their platform_dispensary_id
|
||||
-- method='http' ensures only workers that passed http preflight can claim these
|
||||
INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for, method)
|
||||
VALUES
|
||||
('entry_point_discovery', 224, 5, NOW(), 'http'),
|
||||
('entry_point_discovery', 76, 5, NOW(), 'http'),
|
||||
('entry_point_discovery', 403, 5, NOW(), 'http')
|
||||
ON CONFLICT DO NOTHING;
|
||||
@@ -1,35 +0,0 @@
|
||||
-- Migration 092: Store Intelligence Cache
|
||||
-- Pre-computed store intelligence data refreshed by analytics_refresh task
|
||||
-- Eliminates costly aggregation queries on /intelligence/stores endpoint
|
||||
|
||||
CREATE TABLE IF NOT EXISTS store_intelligence_cache (
|
||||
dispensary_id INTEGER PRIMARY KEY REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
|
||||
-- Basic counts
|
||||
sku_count INTEGER NOT NULL DEFAULT 0,
|
||||
brand_count INTEGER NOT NULL DEFAULT 0,
|
||||
snapshot_count INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
-- Pricing
|
||||
avg_price_rec NUMERIC(10,2),
|
||||
avg_price_med NUMERIC(10,2),
|
||||
min_price NUMERIC(10,2),
|
||||
max_price NUMERIC(10,2),
|
||||
|
||||
-- Category breakdown (JSONB for flexibility)
|
||||
category_counts JSONB DEFAULT '{}',
|
||||
|
||||
-- Timestamps
|
||||
last_crawl_at TIMESTAMPTZ,
|
||||
last_refresh_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Metadata
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for fast lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_store_intelligence_cache_refresh
|
||||
ON store_intelligence_cache (last_refresh_at DESC);
|
||||
|
||||
COMMENT ON TABLE store_intelligence_cache IS 'Pre-computed store intelligence metrics, refreshed by analytics_refresh task';
|
||||
COMMENT ON COLUMN store_intelligence_cache.category_counts IS 'JSON object mapping category_raw to product count';
|
||||
@@ -1,43 +0,0 @@
|
||||
-- Migration: 093_fix_mv_state_metrics.sql
|
||||
-- Purpose: Fix mv_state_metrics to use brand_name_raw and show correct store counts
|
||||
-- Issues fixed:
|
||||
-- 1. unique_brands used brand_id (often NULL), now uses brand_name_raw
|
||||
-- 2. Added out_of_stock_products column
|
||||
-- 3. dispensary_count now correctly named
|
||||
|
||||
-- Drop and recreate the materialized view with correct definition
|
||||
DROP MATERIALIZED VIEW IF EXISTS mv_state_metrics;
|
||||
|
||||
CREATE MATERIALIZED VIEW mv_state_metrics AS
|
||||
SELECT
|
||||
d.state,
|
||||
s.name AS state_name,
|
||||
COUNT(DISTINCT d.id) AS dispensary_count,
|
||||
COUNT(DISTINCT CASE WHEN d.menu_type = 'dutchie' THEN d.id END) AS dutchie_stores,
|
||||
COUNT(DISTINCT CASE WHEN d.crawl_enabled = true THEN d.id END) AS active_stores,
|
||||
COUNT(sp.id) AS total_products,
|
||||
COUNT(CASE WHEN COALESCE(sp.is_in_stock, true) THEN sp.id END) AS in_stock_products,
|
||||
COUNT(CASE WHEN sp.is_in_stock = false THEN sp.id END) AS out_of_stock_products,
|
||||
COUNT(CASE WHEN sp.is_on_special THEN sp.id END) AS on_special_products,
|
||||
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != '') AS unique_brands,
|
||||
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS unique_categories,
|
||||
ROUND(AVG(sp.price_rec) FILTER (WHERE sp.price_rec > 0)::NUMERIC, 2) AS avg_price_rec,
|
||||
MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) AS min_price_rec,
|
||||
MAX(sp.price_rec) FILTER (WHERE sp.price_rec > 0) AS max_price_rec,
|
||||
NOW() AS refreshed_at
|
||||
FROM dispensaries d
|
||||
LEFT JOIN states s ON d.state = s.code
|
||||
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
|
||||
WHERE d.state IS NOT NULL
|
||||
GROUP BY d.state, s.name;
|
||||
|
||||
-- Create unique index for CONCURRENTLY refresh support
|
||||
CREATE UNIQUE INDEX idx_mv_state_metrics_state ON mv_state_metrics(state);
|
||||
|
||||
-- Update refresh function
|
||||
CREATE OR REPLACE FUNCTION refresh_state_metrics()
|
||||
RETURNS void AS $$
|
||||
BEGIN
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_state_metrics;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
@@ -1,516 +0,0 @@
|
||||
-- Migration: Import 500 Evomi residential proxies
|
||||
-- These are sticky-session rotating proxies where password contains session ID
|
||||
-- Active is set to false - run Test All to verify and activate
|
||||
|
||||
-- First, drop the old unique constraint that doesn't account for username/password
|
||||
ALTER TABLE proxies DROP CONSTRAINT IF EXISTS proxies_host_port_protocol_key;
|
||||
|
||||
-- Add new unique constraint that includes username and password
|
||||
-- This allows multiple entries for the same host:port with different credentials (sessions)
|
||||
ALTER TABLE proxies ADD CONSTRAINT proxies_host_port_protocol_username_password_key
|
||||
UNIQUE(host, port, protocol, username, password);
|
||||
|
||||
-- Now insert all 500 proxies
|
||||
INSERT INTO proxies (host, port, protocol, username, password, active, max_connections)
|
||||
VALUES
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4XRRPF1UQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5UNGX7N7K', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9PSKYP1GU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GZBKKYL2S', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YHJHM0XZU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ESDYQ34CJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GAXUMFKQI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2FF66K4CI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SUYM0R49B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A8VHZMEFP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WNRLH6NXR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SPSB3IUX6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-85N76UU5Q', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-189P3LH2F', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-47DQOAGWY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IBT0QO7M2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UPXOUOH8X', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BFQ1PH75D', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KNTFKRY1J', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5L8IG6DZX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9YE13X0BA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6KBHCHF0I', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CETHHFHZ6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A06J8ST3I', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YFS93P1YR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RB74B3R6C', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2JW27O3EU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCUX84BL0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1A2KSG6HO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4QW8ILV0E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0Q09GH2VL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-16BRXBCYC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9W02B3R4L', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CVAEH76YT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CATOG0Q5I', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F81625L74', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DO4AVTPK4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SBZPXORD5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JA1AWOX03', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0FUJTRSYT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CM1R2RSTB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EHPJZCK1S', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYLKORNAF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-05A8BUD25', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RHM1Q6O4M', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ES5VPCE6Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-P0JEGLP4O', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OC4AX88D0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3BN54IEBV', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ABSC7S550', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LNIJU6R2V', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OYGQPPCOV', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-32YBOHQWR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7KGEMK4SL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FAW8T2EBW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GPV69KI9T', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JPBHSN8M2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VZ1JQOF15', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7DJXXPK1E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JXKQ7JVZ1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-88Q5UQX3B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HAI5K0JFO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-65SUKG0QH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1XFJETX1F', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7ZNUCVCBW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O1DCK15LA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WLTEA65WB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCHAFNK2P', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6ODSZ6CUT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SZ8R2EFH4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9EPPYQREC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MPCBES7UI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FCCPL0XWZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GJ23UYEGI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RQT80689I', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TDQO2AP5E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D5Q5SEUEO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DZN4ZTENM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4HVQ33VK9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F1HJ7GPHA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RM708QD2Y', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-K36N27GM5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O73TS0DAE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-54QXRWEA8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1P6LP0365', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WMZ2ST34E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-175UYF58T', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-W0HTK6F28', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D5275CTIM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IH2IWVZOH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C4VFW7GSA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O9XGULSNA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PJ1W1P5L9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MQQU30KPC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BNPIBZTYV', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7BNRCH922', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5AZLU117B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3PPJ49VJC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FMC8CQO74', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VCHW23CXJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1S4749PCB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0T9DJFZPK', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-L0RMV65W3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FZ1ZZUQNA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6IFJD23DI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZKUEP5XM0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z8KU62CLT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LO77J78X1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-27FBKYRJ4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0TDQTESGW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IMKI89WQ1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ANS65MIJS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O3T2OTT0Y', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MWW6Z1QVM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TT47MX0BB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-59CFKTM14', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DOD61TVZN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RH9Y298WS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X98AATJ7B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C3UMES1W8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8O3J7G3PT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3K4OH78OJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N4A3JMVL1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HK1SRLAC9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y9VLJJXVU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KTTH7R0EC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JKVX01E8T', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HW2VPAHJO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7WZ9UHBH8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JTKFK0CP7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-G3F27NXG5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-K7I2JWYSP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CTUU8UQ0T', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ISHMAP6RQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LVWNZ1LHP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N5CQ1YG2Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XL2XY2SLZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UCRZVFIV1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VLGQFYNEL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YPCDM9O5Y', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R6VA2S25E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4W8X8BBUL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5INDC8M80', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q8RKKOF29', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B5ED3EFBC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8IC5ZXAX1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCGM25D75', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1MO06IRID', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4QWGUGN6W', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5T9M5KEHT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9KG7W7NZF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NYGN5R2CL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H61OXFCJ2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-30WSQ4EFH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-J36NG6MY2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TZU34ZA7A', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LPWNYL74G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DDJTXOS4Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HFOS4S185', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2MLGIFL1M', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CI5AHX0TC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WSXVCH1WN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8F0C3D06T', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3YZR0664F', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1L2VMWTM0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KPMCB57O7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N6QXQDZV3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-35FAYFWDP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TVZWE2JR8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0WK86IKLF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8WBU6ESHJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGU6UNM01', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-86CXNEQZC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NZ4LFCHE3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZKB6D72RF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BKXNG77NS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3MJ332POD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SL9VEYNJ0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LY8KO43Z8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8KGF1XR1L', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WT6FB54HW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7UQ9JMG5E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KX3L2040U', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HL809F9WU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-T9GU40ERH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I5O2NX3G9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RVOUYU3NO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2T3ETNUKS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SW0B93DZZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PQ55UF3K6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VNRWWHHJB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Q26FZ7EP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZWD9FA90J', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QSGMQX3RZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-83NZ9MEAC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q9QQ4AL37', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QBE9KD60Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NRNUXUO44', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8F0XKQ9P8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-095JV1CJN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WRRSIRUTZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DTUD7IDQI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ASCEAI9LD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YOUM7BJZH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PEG2ZH9J3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WAUW31F78', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GIBZ6U7AQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-63TD9LFBG', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0MH1N9MJB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YFP9RNQIK', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SW4N5162D', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-53MWFB2MP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QWLUKBMIN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JHS6QIX9G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6R04HZ5UD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OUJLT31VN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6BMKW933S', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R4GG84E4Q', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-00XAP630X', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AK97MC2A0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NBS2GKGO5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NVFEWK4S5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MTV3WSYS1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JS8RM4JGW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6NL4QR1XN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4BUUQVSN6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-56WEAAU3M', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WCA56PFTF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TK1QAZP0B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SYZ5ADFXP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-S3VLOUW6G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V2K1V1JWJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MZ6VHV5PQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DRZDQDPN3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-231VVRYYA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-06G3MC88G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WS52I2ZVD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QTNQD55U', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EX7ALECU3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DQN8TVQY6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FJT54OQFI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BLTYUF7QR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8DL2JXDSO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KBAOXIJ4Y', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYL28R5UW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NCRDA8LYB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BQYKXQLXU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PSHCS65MR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-90Y1WFVYZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4GG33NUPW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5Y0A79GED', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RMZHTAD6J', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XBSOJ5I36', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AAJW53VNE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9NYSPSEL6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-94WMY337S', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-35Y3BJQFW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R7WY3TMRC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RXAQVH0F3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EFQ2AVFSB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XPOUJSAVD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RSHPF5NTT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z9402336V', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OI36C5WOJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XEOGV1LVS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QIQDXG9NC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9IY242GGT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PQTEUT52E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-18NKI3WPS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-34U3QAA49', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-S05TYKBBF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B4J8WCWDD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HR377WC28', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PNRR7S1T2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UNR0N0KJ9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NARQQANBE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8PUL1MYUU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KJPCT1FP3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGC80N0AM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Y1JN8DH3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y56M31T07', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NHYHXQSV1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V30RZVG7L', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CR6V2GSOU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VSAF5O0LJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4F4BF2LFH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ERSMQHXNX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q0TFLZQWS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZXCS6SMHD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JHXYAUGRA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IT2XYWES2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-22UCD94OG', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VGDLQ3K35', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O8AFL8RGX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9RBIZ8G9X', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9JIU0SVBV', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PWRBG0GWU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZME1MX12T', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A7LWRKSJP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5XISX0HD4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5T6EXKD3Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-10ILV351B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FDULBZDIY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SFVR6I980', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FKV8DCZGT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ECRK3M3IZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WMKSLOF39', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HGE60O6AL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RGCWDJOT8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DESWK5KVN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RD593HJ92', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XWNCAO39B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AQ4XGDLX8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2ZOVEA1PL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JF4FUX83X', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CQ228GK3B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XCTMU9I7U', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-M3F37T22W', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ASZUXM9M9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CJVHX24WW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KZT4T898V', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RI128R5TE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HCAG6X9MJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XOQENWBP7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1LTQGM497', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZLVZT4O1G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FTIXTXCIA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O2YE6QNHY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0JPDDBF47', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H1FP1IFJI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FYBPBMY5B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F7BWDVC97', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MLENB1LQ4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FT9YNU8UP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5W21Q2O5L', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YM61QWPR3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XXFQJJHZM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H52YKCM9X', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NT56ZNZ54', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DRJY7BMB5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-P6886RPXX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PBXW2EY5K', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5VQCJTM36', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NMM3GGM1J', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1JQQ0CDSA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R89YI91K4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7L7L9MXOT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-50Z7MXKZS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EGADRZTIB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1DR7H46H6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O28QZL994', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EYTRWVERM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HAJZAUWJV', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AGYO3AB89', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V224329ZM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4YTMSFWYK', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QP40RL1N1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CB1BVAMAH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9VGXUY02O', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BCPVVKCZ3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VDC3CWZX7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7HWLI21FA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5QWIUJEFM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4C3PBMAIZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QC7DM7PH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A6R5G3FWV', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3A6WDE12Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0F2LZA9RU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGBJXMXRX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5YOGR8PQ1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LPBFBUF3N', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TUSPGR2AY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-G05I8M2FQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H5NDXJIAQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X8FJL8WQZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KIB2FQRUP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VNV0OYWR7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GKBPM3PB2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XVPI30KE7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y3PRMJP51', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KEPP5SBML', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0PDUZ6QEQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1GHWWFLLE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-149S2TO8O', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1ZB6FSIGE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VCRQTXDZL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-645JVC3XL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2HJ00JBSR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7FZDG2W65', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HD6ANE3LN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2HS1B1J8V', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IHOHYMDF5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYZMAFEKF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JO85WX5JE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RURJDCURW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FZC3BLXPJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B0YR2LOZ1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6ZFP58ZRK', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UMZDLHQ78', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8A2IHDXY3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EDYEPWUMT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X3TM99R12', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DLV0UTQ72', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SFU0ZYIM0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YAJ6A66NH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X8CFU41AU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CJ3Z4WP32', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UJBLRQKXA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-T78R8EBGH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DDIH55GNZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F1SSD4NWF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4BE55FKRD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BG2DFBL46', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MKVMNR7W4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C3Z4JUGU5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NVP8EEEGQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MQFWP2LU7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BH873JG6H', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3D76651SM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KZ7V6KWMP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CD8NEJFJN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PWXE9L30H', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1RT95F5LR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q7CEEROE5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q08APOAEG', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NNKREGLXE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YQEG33MKX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VRD9G7H5K', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-68R86GQ1G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BXZUKQL2M', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QM13UD73C', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I7OOGJLNS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GXDBO1IQJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JJZPRFMWN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DBTDFITGW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VYHL6ASIJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F61NNU332', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6Z9H72KMC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WVOONDMA9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CXTSTBXN3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CSMZLC921', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3FTBSARZJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ESHGKBXLY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-E0YLXW5H4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QFI6UMWE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-23VOWHO88', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-02Q9U5QCH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3POMNSMB0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NTT8OWUFQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MT5XEHJWX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ILDOY0PCQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MN9HU4DGO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1YOPU7GLL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZC5BM5MYB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UD3FXK3I9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LMDJOV52Y', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N45X16BSL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1CBY3Z7QC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F0D3AO9E6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YQA8GUOD1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2EE999233', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D6GD5WT2Y', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7DFBMLTMY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-J6TJKC6VJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2AWQ3ZRF4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4KOVIF5W3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3489SXI1U', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F37VKUHVE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GHBMAVCE4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-W64U46547', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1GUJV1MGQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-M13IOZVI9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TX7EVZN1Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2PTS2ML8J', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VTG83RVX7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2IOE6BR66', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I68XZMR23', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q940UN6MU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Y9NFR0N0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MYP341DZ8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WJ68VGKAZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-819MSDR9H', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-27CGND4VG', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YYDOD47BF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YU7F6J8G5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HMY16WTCA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FPWEBRLG2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FGE79X0DE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-551LMZ84R', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UWMBDCTX4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BNHQXW9HY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WB0P5LCN6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z4P9E1SVG', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UVW2G9IRN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OO93WVLB0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NTRIK82TG', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8TXV42S74', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z74LKL50G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QQEXNIPTR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WGK2VD34L', false, 1)
|
||||
ON CONFLICT DO NOTHING;
|
||||
@@ -1,81 +0,0 @@
|
||||
-- Migration: Auto-retry failed proxies after cooldown period
|
||||
-- Proxies that fail will be retried after a configurable interval
|
||||
|
||||
-- Add last_failed_at column to track when proxy last failed
|
||||
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS last_failed_at TIMESTAMP;
|
||||
|
||||
-- Add retry settings
|
||||
INSERT INTO settings (key, value, description)
|
||||
VALUES
|
||||
('proxy_retry_interval_hours', '4', 'Hours to wait before retrying a failed proxy'),
|
||||
('proxy_max_failures_before_permanent', '10', 'Max failures before proxy is permanently disabled')
|
||||
ON CONFLICT (key) DO NOTHING;
|
||||
|
||||
-- Create function to get eligible proxies (active OR failed but past retry interval)
|
||||
CREATE OR REPLACE FUNCTION get_eligible_proxy_ids()
|
||||
RETURNS TABLE(proxy_id INT) AS $$
|
||||
DECLARE
|
||||
retry_hours INT;
|
||||
BEGIN
|
||||
-- Get retry interval from settings (default 4 hours)
|
||||
SELECT COALESCE(value::int, 4) INTO retry_hours
|
||||
FROM settings WHERE key = 'proxy_retry_interval_hours';
|
||||
|
||||
RETURN QUERY
|
||||
SELECT p.id
|
||||
FROM proxies p
|
||||
WHERE p.active = true
|
||||
OR (
|
||||
p.active = false
|
||||
AND p.last_failed_at IS NOT NULL
|
||||
AND p.last_failed_at < NOW() - (retry_hours || ' hours')::interval
|
||||
AND p.failure_count < 10 -- Don't retry if too many failures
|
||||
)
|
||||
ORDER BY
|
||||
p.active DESC, -- Prefer active proxies
|
||||
p.failure_count ASC, -- Then prefer proxies with fewer failures
|
||||
RANDOM();
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create scheduled job to periodically re-enable proxies past their retry window
|
||||
-- This runs every hour and marks proxies as active if they're past retry interval
|
||||
CREATE OR REPLACE FUNCTION auto_reenable_proxies()
|
||||
RETURNS INT AS $$
|
||||
DECLARE
|
||||
retry_hours INT;
|
||||
max_failures INT;
|
||||
reenabled_count INT;
|
||||
BEGIN
|
||||
-- Get settings
|
||||
SELECT COALESCE(value::int, 4) INTO retry_hours
|
||||
FROM settings WHERE key = 'proxy_retry_interval_hours';
|
||||
|
||||
SELECT COALESCE(value::int, 10) INTO max_failures
|
||||
FROM settings WHERE key = 'proxy_max_failures_before_permanent';
|
||||
|
||||
-- Re-enable proxies that have cooled down
|
||||
UPDATE proxies
|
||||
SET active = true,
|
||||
updated_at = NOW()
|
||||
WHERE active = false
|
||||
AND last_failed_at IS NOT NULL
|
||||
AND last_failed_at < NOW() - (retry_hours || ' hours')::interval
|
||||
AND failure_count < max_failures;
|
||||
|
||||
GET DIAGNOSTICS reenabled_count = ROW_COUNT;
|
||||
|
||||
IF reenabled_count > 0 THEN
|
||||
RAISE NOTICE 'Auto-reenabled % proxies after % hour cooldown', reenabled_count, retry_hours;
|
||||
END IF;
|
||||
|
||||
RETURN reenabled_count;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Add index for efficient querying
|
||||
CREATE INDEX IF NOT EXISTS idx_proxies_retry
|
||||
ON proxies(active, last_failed_at, failure_count);
|
||||
|
||||
COMMENT ON COLUMN proxies.last_failed_at IS 'Timestamp of last failure - used for auto-retry logic';
|
||||
COMMENT ON FUNCTION auto_reenable_proxies() IS 'Call periodically to re-enable failed proxies that have cooled down';
|
||||
@@ -1,20 +0,0 @@
|
||||
-- Migration: Add trigram indexes for fast ILIKE product searches
|
||||
-- Enables fast searches on name_raw, brand_name_raw, and description
|
||||
|
||||
-- Enable pg_trgm extension if not already enabled
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
|
||||
-- Create GIN trigram indexes for fast ILIKE searches
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_name_trgm
|
||||
ON store_products USING gin (name_raw gin_trgm_ops);
|
||||
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_brand_name_trgm
|
||||
ON store_products USING gin (brand_name_raw gin_trgm_ops);
|
||||
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_description_trgm
|
||||
ON store_products USING gin (description gin_trgm_ops);
|
||||
|
||||
-- Add comment
|
||||
COMMENT ON INDEX idx_store_products_name_trgm IS 'Trigram index for fast ILIKE searches on product name';
|
||||
COMMENT ON INDEX idx_store_products_brand_name_trgm IS 'Trigram index for fast ILIKE searches on brand name';
|
||||
COMMENT ON INDEX idx_store_products_description_trgm IS 'Trigram index for fast ILIKE searches on description';
|
||||
@@ -1,11 +0,0 @@
|
||||
-- Migration: Add indexes for dashboard performance
|
||||
-- Speeds up the tasks listing query with ORDER BY and JOIN
|
||||
|
||||
-- Index for JOIN with worker_registry
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_worker_tasks_worker_id
|
||||
ON worker_tasks(worker_id)
|
||||
WHERE worker_id IS NOT NULL;
|
||||
|
||||
-- Index for ORDER BY created_at DESC (dashboard listing)
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_worker_tasks_created_at_desc
|
||||
ON worker_tasks(created_at DESC);
|
||||
@@ -1,13 +0,0 @@
|
||||
-- Migration: Add stage tracking columns to dispensaries table
|
||||
-- Required for stage checkpoint feature in task handlers
|
||||
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_successes INTEGER DEFAULT 0;
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_failures INTEGER DEFAULT 0;
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_successful_crawl_at TIMESTAMPTZ;
|
||||
|
||||
-- Indexes for finding stores by status
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_consecutive_successes
|
||||
ON dispensaries(consecutive_successes) WHERE consecutive_successes > 0;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_consecutive_failures
|
||||
ON dispensaries(consecutive_failures) WHERE consecutive_failures > 0;
|
||||
@@ -1,68 +0,0 @@
|
||||
-- Migration: 099_working_hours.sql
|
||||
-- Description: Working hours profiles for natural traffic pattern simulation
|
||||
-- Created: 2024-12-13
|
||||
|
||||
-- Working hours table: defines hourly activity weights to mimic natural traffic
|
||||
CREATE TABLE IF NOT EXISTS working_hours (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(50) UNIQUE NOT NULL,
|
||||
description TEXT,
|
||||
|
||||
-- Hour weights: {"0": 15, "1": 5, ..., "18": 100, ...}
|
||||
-- Value = percent chance to trigger activity that hour (0-100)
|
||||
hour_weights JSONB NOT NULL,
|
||||
|
||||
-- Day-of-week multipliers (0=Sunday, 6=Saturday)
|
||||
-- Optional adjustment for weekend vs weekday patterns
|
||||
dow_weights JSONB DEFAULT '{"0": 90, "1": 100, "2": 100, "3": 100, "4": 100, "5": 110, "6": 95}',
|
||||
|
||||
timezone VARCHAR(50) DEFAULT 'America/Phoenix',
|
||||
enabled BOOLEAN DEFAULT true,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Seed: Natural traffic pattern based on internet usage research
|
||||
-- Optimized for cannabis dispensary browsing (lunch + after-work peaks)
|
||||
INSERT INTO working_hours (name, description, timezone, hour_weights) VALUES (
|
||||
'natural_traffic',
|
||||
'Mimics natural user browsing patterns - peaks at lunch and 5-7 PM',
|
||||
'America/Phoenix',
|
||||
'{
|
||||
"0": 15,
|
||||
"1": 5,
|
||||
"2": 5,
|
||||
"3": 5,
|
||||
"4": 5,
|
||||
"5": 10,
|
||||
"6": 20,
|
||||
"7": 30,
|
||||
"8": 35,
|
||||
"9": 45,
|
||||
"10": 50,
|
||||
"11": 60,
|
||||
"12": 75,
|
||||
"13": 65,
|
||||
"14": 60,
|
||||
"15": 70,
|
||||
"16": 80,
|
||||
"17": 95,
|
||||
"18": 100,
|
||||
"19": 100,
|
||||
"20": 90,
|
||||
"21": 70,
|
||||
"22": 45,
|
||||
"23": 25
|
||||
}'::jsonb
|
||||
) ON CONFLICT (name) DO UPDATE SET
|
||||
hour_weights = EXCLUDED.hour_weights,
|
||||
description = EXCLUDED.description,
|
||||
updated_at = NOW();
|
||||
|
||||
-- Index for quick lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_working_hours_name ON working_hours(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_working_hours_enabled ON working_hours(enabled);
|
||||
|
||||
COMMENT ON TABLE working_hours IS 'Activity profiles for natural traffic simulation. Hour weights are percent chance (0-100) to trigger activity.';
|
||||
COMMENT ON COLUMN working_hours.hour_weights IS 'JSON object mapping hour (0-23) to percent chance (0-100). 100 = always run, 0 = never run.';
|
||||
COMMENT ON COLUMN working_hours.dow_weights IS 'Optional day-of-week multipliers. 0=Sunday. Applied as (hour_weight * dow_weight / 100).';
|
||||
@@ -1,19 +0,0 @@
|
||||
-- Migration: 100_worker_timezone.sql
|
||||
-- Description: Add timezone column to worker_registry for working hours support
|
||||
-- Created: 2024-12-13
|
||||
|
||||
-- Add timezone column to worker_registry
|
||||
-- Populated from preflight IP geolocation (e.g., 'America/New_York')
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||
|
||||
-- Add working_hours_id to link worker to a specific working hours profile
|
||||
-- NULL means use default 'natural_traffic' profile
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS working_hours_id INTEGER REFERENCES working_hours(id);
|
||||
|
||||
-- Index for workers by timezone (useful for capacity planning)
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_registry_timezone ON worker_registry(timezone);
|
||||
|
||||
COMMENT ON COLUMN worker_registry.timezone IS 'IANA timezone from preflight IP geolocation (e.g., America/New_York)';
|
||||
COMMENT ON COLUMN worker_registry.working_hours_id IS 'Reference to working_hours profile. NULL uses default natural_traffic.';
|
||||
@@ -1,78 +0,0 @@
|
||||
-- Migration: 101_worker_preflight_timezone.sql
|
||||
-- Description: Update update_worker_preflight to extract timezone from fingerprint
|
||||
-- Created: 2024-12-13
|
||||
|
||||
CREATE OR REPLACE FUNCTION public.update_worker_preflight(
|
||||
p_worker_id character varying,
|
||||
p_transport character varying,
|
||||
p_status character varying,
|
||||
p_ip character varying DEFAULT NULL,
|
||||
p_response_ms integer DEFAULT NULL,
|
||||
p_error text DEFAULT NULL,
|
||||
p_fingerprint jsonb DEFAULT NULL
|
||||
)
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql
|
||||
AS $function$
|
||||
DECLARE
|
||||
v_curl_status VARCHAR(20);
|
||||
v_http_status VARCHAR(20);
|
||||
v_overall_status VARCHAR(20);
|
||||
v_timezone VARCHAR(50);
|
||||
BEGIN
|
||||
IF p_transport = 'curl' THEN
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_curl_status = p_status,
|
||||
preflight_curl_at = NOW(),
|
||||
preflight_curl_ms = p_response_ms,
|
||||
preflight_curl_error = p_error,
|
||||
curl_ip = p_ip,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
ELSIF p_transport = 'http' THEN
|
||||
-- Extract timezone from fingerprint JSON if present
|
||||
v_timezone := p_fingerprint->>'detectedTimezone';
|
||||
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_http_status = p_status,
|
||||
preflight_http_at = NOW(),
|
||||
preflight_http_ms = p_response_ms,
|
||||
preflight_http_error = p_error,
|
||||
http_ip = p_ip,
|
||||
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
|
||||
-- Save extracted timezone
|
||||
timezone = COALESCE(v_timezone, timezone),
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
END IF;
|
||||
|
||||
-- Update overall preflight status
|
||||
SELECT preflight_curl_status, preflight_http_status
|
||||
INTO v_curl_status, v_http_status
|
||||
FROM worker_registry
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
-- Compute overall status
|
||||
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
|
||||
v_overall_status := 'passed';
|
||||
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
|
||||
v_overall_status := 'partial';
|
||||
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
|
||||
v_overall_status := 'failed';
|
||||
ELSE
|
||||
v_overall_status := 'pending';
|
||||
END IF;
|
||||
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_status = v_overall_status,
|
||||
preflight_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
END;
|
||||
$function$;
|
||||
|
||||
COMMENT ON FUNCTION update_worker_preflight(varchar, varchar, varchar, varchar, integer, text, jsonb)
|
||||
IS 'Updates worker preflight status and extracts timezone from fingerprint for working hours';
|
||||
@@ -1,114 +0,0 @@
|
||||
-- Migration: 102_check_working_hours.sql
|
||||
-- Description: Function to check if worker should be available based on working hours
|
||||
-- Created: 2024-12-13
|
||||
|
||||
-- Function to check if a worker should be available for work
|
||||
-- Returns TRUE if worker passes the probability check for current hour
|
||||
-- Returns FALSE if worker should sleep/skip this cycle
|
||||
CREATE OR REPLACE FUNCTION check_working_hours(
|
||||
p_worker_id VARCHAR,
|
||||
p_profile_name VARCHAR DEFAULT 'natural_traffic'
|
||||
)
|
||||
RETURNS TABLE (
|
||||
is_available BOOLEAN,
|
||||
current_hour INTEGER,
|
||||
hour_weight INTEGER,
|
||||
worker_timezone VARCHAR,
|
||||
roll INTEGER,
|
||||
reason TEXT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $function$
|
||||
DECLARE
|
||||
v_timezone VARCHAR(50);
|
||||
v_hour INTEGER;
|
||||
v_weight INTEGER;
|
||||
v_dow INTEGER;
|
||||
v_dow_weight INTEGER;
|
||||
v_final_weight INTEGER;
|
||||
v_roll INTEGER;
|
||||
v_hour_weights JSONB;
|
||||
v_dow_weights JSONB;
|
||||
v_profile_enabled BOOLEAN;
|
||||
BEGIN
|
||||
-- Get worker's timezone (from preflight)
|
||||
SELECT wr.timezone INTO v_timezone
|
||||
FROM worker_registry wr
|
||||
WHERE wr.worker_id = p_worker_id;
|
||||
|
||||
-- Default to America/Phoenix if no timezone set
|
||||
v_timezone := COALESCE(v_timezone, 'America/Phoenix');
|
||||
|
||||
-- Get current hour in worker's timezone
|
||||
v_hour := EXTRACT(HOUR FROM NOW() AT TIME ZONE v_timezone)::INTEGER;
|
||||
|
||||
-- Get day of week (0=Sunday)
|
||||
v_dow := EXTRACT(DOW FROM NOW() AT TIME ZONE v_timezone)::INTEGER;
|
||||
|
||||
-- Get working hours profile
|
||||
SELECT wh.hour_weights, wh.dow_weights, wh.enabled
|
||||
INTO v_hour_weights, v_dow_weights, v_profile_enabled
|
||||
FROM working_hours wh
|
||||
WHERE wh.name = p_profile_name AND wh.enabled = true;
|
||||
|
||||
-- If profile not found or disabled, always available
|
||||
IF v_hour_weights IS NULL THEN
|
||||
RETURN QUERY SELECT
|
||||
TRUE::BOOLEAN,
|
||||
v_hour,
|
||||
100::INTEGER,
|
||||
v_timezone,
|
||||
0::INTEGER,
|
||||
'Profile not found or disabled - defaulting to available'::TEXT;
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
-- Get hour weight (default to 50 if hour not specified)
|
||||
v_weight := COALESCE((v_hour_weights->>v_hour::TEXT)::INTEGER, 50);
|
||||
|
||||
-- Get day-of-week weight (default to 100)
|
||||
v_dow_weight := COALESCE((v_dow_weights->>v_dow::TEXT)::INTEGER, 100);
|
||||
|
||||
-- Calculate final weight (hour_weight * dow_weight / 100)
|
||||
v_final_weight := (v_weight * v_dow_weight / 100);
|
||||
|
||||
-- Roll the dice (0-99)
|
||||
v_roll := floor(random() * 100)::INTEGER;
|
||||
|
||||
-- Return result
|
||||
RETURN QUERY SELECT
|
||||
(v_roll < v_final_weight)::BOOLEAN AS is_available,
|
||||
v_hour AS current_hour,
|
||||
v_final_weight AS hour_weight,
|
||||
v_timezone AS worker_timezone,
|
||||
v_roll AS roll,
|
||||
CASE
|
||||
WHEN v_roll < v_final_weight THEN
|
||||
format('Available: rolled %s < %s%% threshold', v_roll, v_final_weight)
|
||||
ELSE
|
||||
format('Sleeping: rolled %s >= %s%% threshold', v_roll, v_final_weight)
|
||||
END AS reason;
|
||||
END;
|
||||
$function$;
|
||||
|
||||
-- Simplified version that just returns boolean
|
||||
CREATE OR REPLACE FUNCTION is_worker_available(
|
||||
p_worker_id VARCHAR,
|
||||
p_profile_name VARCHAR DEFAULT 'natural_traffic'
|
||||
)
|
||||
RETURNS BOOLEAN
|
||||
LANGUAGE plpgsql
|
||||
AS $function$
|
||||
DECLARE
|
||||
v_result BOOLEAN;
|
||||
BEGIN
|
||||
SELECT is_available INTO v_result
|
||||
FROM check_working_hours(p_worker_id, p_profile_name);
|
||||
RETURN COALESCE(v_result, TRUE);
|
||||
END;
|
||||
$function$;
|
||||
|
||||
COMMENT ON FUNCTION check_working_hours(VARCHAR, VARCHAR) IS
|
||||
'Check if worker should be available based on working hours profile. Returns detailed info.';
|
||||
COMMENT ON FUNCTION is_worker_available(VARCHAR, VARCHAR) IS
|
||||
'Simple boolean check if worker passes working hours probability roll.';
|
||||
@@ -1,12 +0,0 @@
|
||||
-- Migration: 103_schedule_dispensary_id.sql
|
||||
-- Description: Add dispensary_id to task_schedules for per-store schedules
|
||||
-- Created: 2025-12-13
|
||||
|
||||
-- Add dispensary_id column for single-store schedules
|
||||
ALTER TABLE task_schedules
|
||||
ADD COLUMN IF NOT EXISTS dispensary_id INTEGER REFERENCES dispensaries(id);
|
||||
|
||||
-- Index for quick lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_task_schedules_dispensary_id ON task_schedules(dispensary_id);
|
||||
|
||||
COMMENT ON COLUMN task_schedules.dispensary_id IS 'For single-store schedules. If set, only this store is refreshed. If NULL, uses state_code for all stores in state.';
|
||||
@@ -1,25 +0,0 @@
|
||||
-- Migration 104: Add source tracking to worker_tasks
|
||||
-- Purpose: Track WHERE tasks are created from (schedule vs API endpoint)
|
||||
--
|
||||
-- All automated task creation should be visible in task_schedules.
|
||||
-- This column helps identify "phantom" tasks created outside the schedule system.
|
||||
|
||||
-- Add source column to worker_tasks
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS source VARCHAR(100);
|
||||
|
||||
-- Add source_id column (references schedule_id if from a schedule)
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS source_schedule_id INTEGER REFERENCES task_schedules(id);
|
||||
|
||||
-- Add request metadata (IP, user agent) for debugging
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS source_metadata JSONB;
|
||||
|
||||
-- Create index for querying by source
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_tasks_source ON worker_tasks(source);
|
||||
|
||||
-- Comment explaining source values
|
||||
COMMENT ON COLUMN worker_tasks.source IS 'Task creation source: schedule, api_run_now, api_crawl_state, api_batch_staggered, api_batch_az_stores, task_chain, manual';
|
||||
COMMENT ON COLUMN worker_tasks.source_schedule_id IS 'ID of the schedule that created this task (if source=schedule or source=api_run_now)';
|
||||
COMMENT ON COLUMN worker_tasks.source_metadata IS 'Request metadata: {ip, user_agent, endpoint, timestamp}';
|
||||
@@ -1,25 +0,0 @@
|
||||
-- Migration 105: Add indexes for dashboard performance
|
||||
-- Purpose: Speed up the /dashboard and /national/summary endpoints
|
||||
--
|
||||
-- These queries were identified as slow:
|
||||
-- 1. COUNT(*) FROM store_product_snapshots WHERE captured_at >= NOW() - INTERVAL '24 hours'
|
||||
-- 2. National summary aggregate queries
|
||||
|
||||
-- Index for snapshot counts by time (used in dashboard)
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_product_snapshots_captured_at
|
||||
ON store_product_snapshots(captured_at DESC);
|
||||
|
||||
-- Index for crawl traces by time and success (used in dashboard)
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_crawl_traces_started_success
|
||||
ON crawl_orchestration_traces(started_at DESC, success);
|
||||
|
||||
-- Partial index for recent failed crawls (faster for dashboard alerts)
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_crawl_traces_recent_failures
|
||||
ON crawl_orchestration_traces(started_at DESC)
|
||||
WHERE success = false;
|
||||
|
||||
-- Composite index for store_products aggregations by dispensary
|
||||
-- Helps with national summary state metrics query
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_dispensary_brand
|
||||
ON store_products(dispensary_id, brand_name_raw)
|
||||
WHERE brand_name_raw IS NOT NULL;
|
||||
@@ -1,10 +0,0 @@
|
||||
-- Migration: 106_rename_store_discovery_schedule.sql
|
||||
-- Description: Rename store_discovery_dutchie to 'Store Discovery'
|
||||
-- Created: 2025-12-13
|
||||
|
||||
-- Update the schedule name for better display
|
||||
-- The platform='dutchie' field is preserved for badge display in UI
|
||||
UPDATE task_schedules
|
||||
SET name = 'Store Discovery',
|
||||
updated_at = NOW()
|
||||
WHERE name = 'store_discovery_dutchie';
|
||||
@@ -1,23 +0,0 @@
|
||||
-- Migration: 107_proxy_tracking.sql
|
||||
-- Description: Add proxy tracking columns to worker_tasks for geo-targeting visibility
|
||||
-- Created: 2025-12-13
|
||||
|
||||
-- Add proxy tracking columns to worker_tasks
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
|
||||
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
|
||||
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS proxy_source VARCHAR(10);
|
||||
|
||||
-- Comments
|
||||
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'IP address of proxy used for this task';
|
||||
COMMENT ON COLUMN worker_tasks.proxy_geo IS 'Geo target used (e.g., "arizona", "phoenix, arizona")';
|
||||
COMMENT ON COLUMN worker_tasks.proxy_source IS 'Source of proxy: "api" (Evomi dynamic) or "static" (fallback table)';
|
||||
|
||||
-- Index for proxy analysis
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip
|
||||
ON worker_tasks(proxy_ip)
|
||||
WHERE proxy_ip IS NOT NULL;
|
||||
@@ -1,231 +0,0 @@
|
||||
-- Migration: 108_worker_geo_sessions.sql
|
||||
-- Description: Add geo session tracking to worker_registry for state-based task assignment
|
||||
-- Created: 2025-12-13
|
||||
|
||||
-- Worker geo session columns
|
||||
-- Worker qualifies with a geo (state/city), then only claims tasks matching that geo
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS current_state VARCHAR(2);
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS current_city VARCHAR(100);
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS geo_session_started_at TIMESTAMPTZ;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS session_task_count INT DEFAULT 0;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS session_max_tasks INT DEFAULT 7;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
|
||||
|
||||
-- Comments
|
||||
COMMENT ON COLUMN worker_registry.current_state IS 'Worker''s current geo assignment (US state code, e.g., AZ)';
|
||||
COMMENT ON COLUMN worker_registry.current_city IS 'Worker''s current city assignment (optional, e.g., phoenix)';
|
||||
COMMENT ON COLUMN worker_registry.geo_session_started_at IS 'When worker''s current geo session started';
|
||||
COMMENT ON COLUMN worker_registry.session_task_count IS 'Number of tasks completed in current geo session';
|
||||
COMMENT ON COLUMN worker_registry.session_max_tasks IS 'Max tasks per geo session before re-qualification (default 7)';
|
||||
COMMENT ON COLUMN worker_registry.proxy_geo IS 'Geo target string used for proxy (e.g., "arizona" or "phoenix, arizona")';
|
||||
|
||||
-- Index for finding workers by state
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_registry_current_state
|
||||
ON worker_registry(current_state)
|
||||
WHERE current_state IS NOT NULL;
|
||||
|
||||
-- ============================================================
|
||||
-- UPDATED claim_task FUNCTION
|
||||
-- Now filters by worker's geo session state
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION claim_task(
|
||||
p_role VARCHAR(50),
|
||||
p_worker_id VARCHAR(100),
|
||||
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||
p_http_passed BOOLEAN DEFAULT FALSE
|
||||
) RETURNS worker_tasks AS $$
|
||||
DECLARE
|
||||
claimed_task worker_tasks;
|
||||
worker_state VARCHAR(2);
|
||||
session_valid BOOLEAN;
|
||||
session_tasks INT;
|
||||
max_tasks INT;
|
||||
BEGIN
|
||||
-- Get worker's current geo session info
|
||||
SELECT
|
||||
current_state,
|
||||
session_task_count,
|
||||
session_max_tasks,
|
||||
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
|
||||
INTO worker_state, session_tasks, max_tasks, session_valid
|
||||
FROM worker_registry
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
-- If no valid geo session, or session exhausted, worker can't claim tasks
|
||||
-- Worker must re-qualify first
|
||||
IF worker_state IS NULL OR NOT session_valid OR session_tasks >= COALESCE(max_tasks, 7) THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Claim task matching worker's state
|
||||
UPDATE worker_tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
worker_id = p_worker_id,
|
||||
claimed_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT wt.id FROM worker_tasks wt
|
||||
JOIN dispensaries d ON wt.dispensary_id = d.id
|
||||
WHERE wt.role = p_role
|
||||
AND wt.status = 'pending'
|
||||
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
|
||||
-- GEO FILTER: Task's dispensary must match worker's state
|
||||
AND d.state = worker_state
|
||||
-- Method compatibility: worker must have passed the required preflight
|
||||
AND (
|
||||
wt.method IS NULL -- No preference, any worker can claim
|
||||
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
|
||||
OR (wt.method = 'http' AND p_http_passed = TRUE)
|
||||
)
|
||||
-- Exclude stores that already have an active task
|
||||
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
|
||||
SELECT dispensary_id FROM worker_tasks
|
||||
WHERE status IN ('claimed', 'running')
|
||||
AND dispensary_id IS NOT NULL
|
||||
))
|
||||
ORDER BY wt.priority DESC, wt.created_at ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_task;
|
||||
|
||||
-- If task claimed, increment session task count
|
||||
-- Note: Use claimed_task.id IS NOT NULL (not claimed_task IS NOT NULL)
|
||||
-- PostgreSQL composite type NULL check quirk
|
||||
IF claimed_task.id IS NOT NULL THEN
|
||||
UPDATE worker_registry
|
||||
SET session_task_count = session_task_count + 1
|
||||
WHERE worker_id = p_worker_id;
|
||||
END IF;
|
||||
|
||||
RETURN claimed_task;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: assign_worker_geo
|
||||
-- Assigns a geo session to a worker based on demand
|
||||
-- Returns the assigned state, or NULL if no tasks available
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION assign_worker_geo(
|
||||
p_worker_id VARCHAR(100)
|
||||
) RETURNS VARCHAR(2) AS $$
|
||||
DECLARE
|
||||
assigned_state VARCHAR(2);
|
||||
BEGIN
|
||||
-- Find state with highest demand (pending tasks) and lowest coverage (workers)
|
||||
SELECT d.state INTO assigned_state
|
||||
FROM dispensaries d
|
||||
JOIN worker_tasks wt ON wt.dispensary_id = d.id
|
||||
LEFT JOIN worker_registry wr ON wr.current_state = d.state
|
||||
AND wr.status = 'active'
|
||||
AND wr.geo_session_started_at > NOW() - INTERVAL '60 minutes'
|
||||
WHERE wt.status = 'pending'
|
||||
AND d.platform_dispensary_id IS NOT NULL
|
||||
GROUP BY d.state
|
||||
ORDER BY
|
||||
COUNT(wt.id) DESC, -- Most pending tasks first
|
||||
COUNT(DISTINCT wr.worker_id) ASC -- Fewest workers second
|
||||
LIMIT 1;
|
||||
|
||||
-- If no pending tasks anywhere, return NULL
|
||||
IF assigned_state IS NULL THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Assign the state to this worker
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
current_state = assigned_state,
|
||||
current_city = NULL, -- City assigned later if available
|
||||
geo_session_started_at = NOW(),
|
||||
session_task_count = 0
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
RETURN assigned_state;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: check_worker_geo_session
|
||||
-- Returns info about worker's current geo session
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION check_worker_geo_session(
|
||||
p_worker_id VARCHAR(100)
|
||||
) RETURNS TABLE (
|
||||
current_state VARCHAR(2),
|
||||
current_city VARCHAR(100),
|
||||
session_valid BOOLEAN,
|
||||
session_tasks_remaining INT,
|
||||
session_minutes_remaining INT
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
wr.current_state,
|
||||
wr.current_city,
|
||||
(wr.geo_session_started_at IS NOT NULL AND wr.geo_session_started_at > NOW() - INTERVAL '60 minutes') as session_valid,
|
||||
GREATEST(0, wr.session_max_tasks - wr.session_task_count) as session_tasks_remaining,
|
||||
GREATEST(0, EXTRACT(EPOCH FROM (wr.geo_session_started_at + INTERVAL '60 minutes' - NOW())) / 60)::INT as session_minutes_remaining
|
||||
FROM worker_registry wr
|
||||
WHERE wr.worker_id = p_worker_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- View for worker thinness per state
|
||||
-- Derives states from dispensaries table - no external states table dependency
|
||||
CREATE OR REPLACE VIEW worker_state_capacity AS
|
||||
WITH active_states AS (
|
||||
-- Get unique states from dispensaries with valid platform IDs
|
||||
SELECT DISTINCT state as code
|
||||
FROM dispensaries
|
||||
WHERE state IS NOT NULL
|
||||
AND platform_dispensary_id IS NOT NULL
|
||||
),
|
||||
pending_by_state AS (
|
||||
SELECT d.state, COUNT(*) as count
|
||||
FROM worker_tasks t
|
||||
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||
WHERE t.status = 'pending'
|
||||
AND d.state IS NOT NULL
|
||||
GROUP BY d.state
|
||||
),
|
||||
workers_by_state AS (
|
||||
SELECT
|
||||
current_state,
|
||||
COUNT(*) as count,
|
||||
SUM(GREATEST(0, session_max_tasks - session_task_count)) as remaining_capacity
|
||||
FROM worker_registry
|
||||
WHERE status IN ('active', 'idle') -- Include both active and idle workers
|
||||
AND preflight_http_status = 'passed'
|
||||
AND current_state IS NOT NULL
|
||||
AND geo_session_started_at > NOW() - INTERVAL '60 minutes'
|
||||
GROUP BY current_state
|
||||
)
|
||||
SELECT
|
||||
s.code as state,
|
||||
s.code as state_name, -- Use code as name since we don't have a states lookup table
|
||||
COALESCE(p.count, 0) as pending_tasks,
|
||||
COALESCE(w.count, 0) as workers_on_state,
|
||||
COALESCE(w.remaining_capacity, 0) as remaining_capacity,
|
||||
CASE
|
||||
WHEN COALESCE(w.remaining_capacity, 0) = 0 AND COALESCE(p.count, 0) > 0 THEN 'no_coverage'
|
||||
WHEN COALESCE(w.remaining_capacity, 0) < COALESCE(p.count, 0) THEN 'thin'
|
||||
ELSE 'ok'
|
||||
END as status
|
||||
FROM active_states s
|
||||
LEFT JOIN pending_by_state p ON p.state = s.code
|
||||
LEFT JOIN workers_by_state w ON w.current_state = s.code
|
||||
ORDER BY COALESCE(p.count, 0) DESC;
|
||||
@@ -1,354 +0,0 @@
|
||||
-- Migration: 109_worker_identity_pool.sql
|
||||
-- Description: Identity pool for diverse IP/fingerprint rotation
|
||||
-- Created: 2025-12-14
|
||||
--
|
||||
-- Workers claim identities (IP + fingerprint) from pool.
|
||||
-- Each identity used for 3-5 tasks, then cools down 2-3 hours.
|
||||
-- This creates natural browsing patterns - same person doesn't hit 20 stores.
|
||||
|
||||
-- ============================================================
|
||||
-- IDENTITY POOL TABLE
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS worker_identities (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Evomi session controls the IP
|
||||
session_id VARCHAR(100) UNIQUE NOT NULL,
|
||||
|
||||
-- Detected IP from this session
|
||||
ip_address INET,
|
||||
|
||||
-- Geo targeting
|
||||
state_code VARCHAR(2) NOT NULL,
|
||||
city VARCHAR(100), -- City-level targeting for diversity
|
||||
|
||||
-- Fingerprint data (UA, timezone, locale, device, etc.)
|
||||
fingerprint JSONB NOT NULL,
|
||||
|
||||
-- Timestamps
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
last_used_at TIMESTAMPTZ,
|
||||
cooldown_until TIMESTAMPTZ, -- Can't reuse until this time
|
||||
|
||||
-- Usage stats
|
||||
total_tasks_completed INT DEFAULT 0,
|
||||
total_sessions INT DEFAULT 1, -- How many times this identity has been used
|
||||
|
||||
-- Current state
|
||||
is_active BOOLEAN DEFAULT FALSE, -- Currently claimed by a worker
|
||||
active_worker_id VARCHAR(100), -- Which worker has it
|
||||
|
||||
-- Health tracking
|
||||
consecutive_failures INT DEFAULT 0,
|
||||
is_healthy BOOLEAN DEFAULT TRUE -- Set false if IP gets blocked
|
||||
);
|
||||
|
||||
-- Indexes for efficient lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_identities_state_city
|
||||
ON worker_identities(state_code, city);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_identities_available
|
||||
ON worker_identities(state_code, is_active, cooldown_until)
|
||||
WHERE is_healthy = TRUE;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_identities_cooldown
|
||||
ON worker_identities(cooldown_until)
|
||||
WHERE is_healthy = TRUE AND is_active = FALSE;
|
||||
|
||||
-- ============================================================
|
||||
-- METRO AREA MAPPING
|
||||
-- For fallback when exact city not available
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS metro_areas (
|
||||
id SERIAL PRIMARY KEY,
|
||||
metro_name VARCHAR(100) NOT NULL,
|
||||
state_code VARCHAR(2) NOT NULL,
|
||||
city VARCHAR(100) NOT NULL,
|
||||
is_primary BOOLEAN DEFAULT FALSE, -- Primary city of the metro
|
||||
UNIQUE(state_code, city)
|
||||
);
|
||||
|
||||
-- Phoenix Metro Area
|
||||
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||
('Phoenix Metro', 'AZ', 'Phoenix', TRUE),
|
||||
('Phoenix Metro', 'AZ', 'Mesa', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Glendale', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Tempe', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Scottsdale', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Chandler', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Peoria', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'El Mirage', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Tolleson', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Sun City', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Apache Junction', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Cave Creek', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Gilbert', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Surprise', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Avondale', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Goodyear', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Buckeye', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Queen Creek', FALSE)
|
||||
ON CONFLICT (state_code, city) DO NOTHING;
|
||||
|
||||
-- Tucson Metro Area
|
||||
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||
('Tucson Metro', 'AZ', 'Tucson', TRUE),
|
||||
('Tucson Metro', 'AZ', 'Oro Valley', FALSE),
|
||||
('Tucson Metro', 'AZ', 'Marana', FALSE),
|
||||
('Tucson Metro', 'AZ', 'Sahuarita', FALSE),
|
||||
('Tucson Metro', 'AZ', 'South Tucson', FALSE)
|
||||
ON CONFLICT (state_code, city) DO NOTHING;
|
||||
|
||||
-- Flagstaff Area
|
||||
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||
('Flagstaff Area', 'AZ', 'Flagstaff', TRUE),
|
||||
('Flagstaff Area', 'AZ', 'Sedona', FALSE)
|
||||
ON CONFLICT (state_code, city) DO NOTHING;
|
||||
|
||||
-- Prescott Area
|
||||
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||
('Prescott Area', 'AZ', 'Prescott', TRUE),
|
||||
('Prescott Area', 'AZ', 'Prescott Valley', FALSE)
|
||||
ON CONFLICT (state_code, city) DO NOTHING;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: claim_identity
|
||||
-- Claims an available identity for a worker
|
||||
-- Tries: exact city -> metro area -> any in state -> create new
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION claim_identity(
|
||||
p_worker_id VARCHAR(100),
|
||||
p_state_code VARCHAR(2),
|
||||
p_city VARCHAR(100) DEFAULT NULL
|
||||
) RETURNS worker_identities AS $$
|
||||
DECLARE
|
||||
claimed_identity worker_identities;
|
||||
metro_name_val VARCHAR(100);
|
||||
primary_city VARCHAR(100);
|
||||
BEGIN
|
||||
-- 1. Try exact city match (if city provided)
|
||||
IF p_city IS NOT NULL THEN
|
||||
UPDATE worker_identities
|
||||
SET is_active = TRUE,
|
||||
active_worker_id = p_worker_id,
|
||||
last_used_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT id FROM worker_identities
|
||||
WHERE state_code = p_state_code
|
||||
AND city = p_city
|
||||
AND is_active = FALSE
|
||||
AND is_healthy = TRUE
|
||||
AND (cooldown_until IS NULL OR cooldown_until < NOW())
|
||||
ORDER BY last_used_at ASC NULLS FIRST
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_identity;
|
||||
|
||||
IF claimed_identity.id IS NOT NULL THEN
|
||||
RETURN claimed_identity;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- 2. Try metro area fallback
|
||||
IF p_city IS NOT NULL THEN
|
||||
-- Find the metro area for this city
|
||||
SELECT ma.metro_name INTO metro_name_val
|
||||
FROM metro_areas ma
|
||||
WHERE ma.state_code = p_state_code AND ma.city = p_city;
|
||||
|
||||
IF metro_name_val IS NOT NULL THEN
|
||||
-- Get primary city of metro
|
||||
SELECT ma.city INTO primary_city
|
||||
FROM metro_areas ma
|
||||
WHERE ma.metro_name = metro_name_val AND ma.is_primary = TRUE;
|
||||
|
||||
-- Try any city in same metro
|
||||
UPDATE worker_identities wi
|
||||
SET is_active = TRUE,
|
||||
active_worker_id = p_worker_id,
|
||||
last_used_at = NOW()
|
||||
WHERE wi.id = (
|
||||
SELECT wi2.id FROM worker_identities wi2
|
||||
JOIN metro_areas ma ON wi2.city = ma.city AND wi2.state_code = ma.state_code
|
||||
WHERE ma.metro_name = metro_name_val
|
||||
AND wi2.is_active = FALSE
|
||||
AND wi2.is_healthy = TRUE
|
||||
AND (wi2.cooldown_until IS NULL OR wi2.cooldown_until < NOW())
|
||||
ORDER BY wi2.last_used_at ASC NULLS FIRST
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_identity;
|
||||
|
||||
IF claimed_identity.id IS NOT NULL THEN
|
||||
RETURN claimed_identity;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- 3. Try any identity in state
|
||||
UPDATE worker_identities
|
||||
SET is_active = TRUE,
|
||||
active_worker_id = p_worker_id,
|
||||
last_used_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT id FROM worker_identities
|
||||
WHERE state_code = p_state_code
|
||||
AND is_active = FALSE
|
||||
AND is_healthy = TRUE
|
||||
AND (cooldown_until IS NULL OR cooldown_until < NOW())
|
||||
ORDER BY last_used_at ASC NULLS FIRST
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_identity;
|
||||
|
||||
-- Return whatever we got (NULL if nothing available - caller should create new)
|
||||
RETURN claimed_identity;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: release_identity
|
||||
-- Releases an identity back to pool with cooldown
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION release_identity(
|
||||
p_identity_id INT,
|
||||
p_tasks_completed INT DEFAULT 0,
|
||||
p_failed BOOLEAN DEFAULT FALSE
|
||||
) RETURNS VOID AS $$
|
||||
DECLARE
|
||||
cooldown_hours FLOAT;
|
||||
BEGIN
|
||||
-- Random cooldown between 2-3 hours for diversity
|
||||
cooldown_hours := 2 + random(); -- 2.0 to 3.0 hours
|
||||
|
||||
UPDATE worker_identities
|
||||
SET is_active = FALSE,
|
||||
active_worker_id = NULL,
|
||||
total_tasks_completed = total_tasks_completed + p_tasks_completed,
|
||||
total_sessions = total_sessions + 1,
|
||||
cooldown_until = NOW() + (cooldown_hours || ' hours')::INTERVAL,
|
||||
consecutive_failures = CASE WHEN p_failed THEN consecutive_failures + 1 ELSE 0 END,
|
||||
is_healthy = CASE WHEN consecutive_failures >= 3 THEN FALSE ELSE TRUE END
|
||||
WHERE id = p_identity_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: get_pending_tasks_by_geo
|
||||
-- Gets pending tasks grouped by state/city for identity assignment
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION get_pending_tasks_by_geo(
|
||||
p_limit INT DEFAULT 10
|
||||
) RETURNS TABLE (
|
||||
state_code VARCHAR(2),
|
||||
city VARCHAR(100),
|
||||
pending_count BIGINT,
|
||||
available_identities BIGINT
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
d.state as state_code,
|
||||
d.city,
|
||||
COUNT(t.id) as pending_count,
|
||||
(
|
||||
SELECT COUNT(*) FROM worker_identities wi
|
||||
WHERE wi.state_code = d.state
|
||||
AND (wi.city = d.city OR wi.city IS NULL)
|
||||
AND wi.is_active = FALSE
|
||||
AND wi.is_healthy = TRUE
|
||||
AND (wi.cooldown_until IS NULL OR wi.cooldown_until < NOW())
|
||||
) as available_identities
|
||||
FROM worker_tasks t
|
||||
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||
WHERE t.status = 'pending'
|
||||
AND d.state IS NOT NULL
|
||||
GROUP BY d.state, d.city
|
||||
ORDER BY COUNT(t.id) DESC
|
||||
LIMIT p_limit;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: get_tasks_for_identity
|
||||
-- Gets tasks matching an identity's geo (same city or metro)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION get_tasks_for_identity(
|
||||
p_state_code VARCHAR(2),
|
||||
p_city VARCHAR(100),
|
||||
p_limit INT DEFAULT 5
|
||||
) RETURNS TABLE (
|
||||
task_id INT,
|
||||
dispensary_id INT,
|
||||
dispensary_name VARCHAR(255),
|
||||
dispensary_city VARCHAR(100),
|
||||
role VARCHAR(50)
|
||||
) AS $$
|
||||
DECLARE
|
||||
metro_name_val VARCHAR(100);
|
||||
BEGIN
|
||||
-- Find metro area for this city
|
||||
SELECT ma.metro_name INTO metro_name_val
|
||||
FROM metro_areas ma
|
||||
WHERE ma.state_code = p_state_code AND ma.city = p_city;
|
||||
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
t.id as task_id,
|
||||
d.id as dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
d.city as dispensary_city,
|
||||
t.role
|
||||
FROM worker_tasks t
|
||||
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||
WHERE t.status = 'pending'
|
||||
AND d.state = p_state_code
|
||||
AND (
|
||||
-- Exact city match
|
||||
d.city = p_city
|
||||
-- Or same metro area
|
||||
OR (metro_name_val IS NOT NULL AND d.city IN (
|
||||
SELECT ma.city FROM metro_areas ma WHERE ma.metro_name = metro_name_val
|
||||
))
|
||||
-- Or any in state if no metro
|
||||
OR (metro_name_val IS NULL)
|
||||
)
|
||||
ORDER BY
|
||||
CASE WHEN d.city = p_city THEN 0 ELSE 1 END, -- Prefer exact city
|
||||
t.priority DESC,
|
||||
t.created_at ASC
|
||||
LIMIT p_limit;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: identity_pool_status
|
||||
-- Overview of identity pool health and availability
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW identity_pool_status AS
|
||||
SELECT
|
||||
state_code,
|
||||
city,
|
||||
COUNT(*) as total_identities,
|
||||
COUNT(*) FILTER (WHERE is_active) as active,
|
||||
COUNT(*) FILTER (WHERE NOT is_active AND is_healthy AND (cooldown_until IS NULL OR cooldown_until < NOW())) as available,
|
||||
COUNT(*) FILTER (WHERE NOT is_active AND cooldown_until > NOW()) as cooling_down,
|
||||
COUNT(*) FILTER (WHERE NOT is_healthy) as unhealthy,
|
||||
SUM(total_tasks_completed) as total_tasks,
|
||||
AVG(total_tasks_completed)::INT as avg_tasks_per_identity
|
||||
FROM worker_identities
|
||||
GROUP BY state_code, city
|
||||
ORDER BY state_code, city;
|
||||
|
||||
-- ============================================================
|
||||
-- Comments
|
||||
-- ============================================================
|
||||
COMMENT ON TABLE worker_identities IS 'Pool of IP/fingerprint identities for worker rotation';
|
||||
COMMENT ON TABLE metro_areas IS 'City groupings for geographic fallback matching';
|
||||
COMMENT ON FUNCTION claim_identity IS 'Claim an available identity: exact city -> metro -> state -> NULL (create new)';
|
||||
COMMENT ON FUNCTION release_identity IS 'Release identity with 2-3 hour random cooldown';
|
||||
COMMENT ON FUNCTION get_pending_tasks_by_geo IS 'Get pending task counts by state/city';
|
||||
COMMENT ON FUNCTION get_tasks_for_identity IS 'Get tasks matching identity geo (city or metro area)';
|
||||
@@ -1,92 +0,0 @@
|
||||
-- Migration: 110_trusted_origins.sql
|
||||
-- Description: Trusted origins for API access without token
|
||||
-- Created: 2024-12-14
|
||||
--
|
||||
-- Manages which domains, IPs, and patterns can access the API without a Bearer token.
|
||||
-- Used by auth middleware to grant 'internal' role to trusted requests.
|
||||
|
||||
-- ============================================================
|
||||
-- TRUSTED ORIGINS TABLE
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS trusted_origins (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Origin identification
|
||||
name VARCHAR(100) NOT NULL, -- Friendly name (e.g., "CannaIQ Production")
|
||||
origin_type VARCHAR(20) NOT NULL, -- 'domain', 'ip', or 'pattern'
|
||||
origin_value VARCHAR(255) NOT NULL, -- The actual value to match
|
||||
|
||||
-- Metadata
|
||||
description TEXT, -- Optional notes
|
||||
active BOOLEAN DEFAULT TRUE,
|
||||
|
||||
-- Tracking
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_by INTEGER REFERENCES users(id),
|
||||
|
||||
-- Constraints
|
||||
CONSTRAINT valid_origin_type CHECK (origin_type IN ('domain', 'ip', 'pattern')),
|
||||
UNIQUE(origin_type, origin_value)
|
||||
);
|
||||
|
||||
-- Index for active lookups (used by auth middleware)
|
||||
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active
|
||||
ON trusted_origins(active) WHERE active = TRUE;
|
||||
|
||||
-- Updated at trigger
|
||||
CREATE OR REPLACE FUNCTION update_trusted_origins_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
DROP TRIGGER IF EXISTS trusted_origins_updated_at ON trusted_origins;
|
||||
CREATE TRIGGER trusted_origins_updated_at
|
||||
BEFORE UPDATE ON trusted_origins
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION update_trusted_origins_updated_at();
|
||||
|
||||
-- ============================================================
|
||||
-- SEED DEFAULT TRUSTED ORIGINS
|
||||
-- These match the hardcoded fallbacks in middleware.ts
|
||||
-- ============================================================
|
||||
|
||||
-- Production domains
|
||||
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||
('CannaIQ Production', 'domain', 'https://cannaiq.co', 'Main CannaIQ dashboard'),
|
||||
('CannaIQ Production (www)', 'domain', 'https://www.cannaiq.co', 'Main CannaIQ dashboard with www'),
|
||||
('FindADispo Production', 'domain', 'https://findadispo.com', 'Consumer dispensary finder'),
|
||||
('FindADispo Production (www)', 'domain', 'https://www.findadispo.com', 'Consumer dispensary finder with www'),
|
||||
('Findagram Production', 'domain', 'https://findagram.co', 'Instagram-style cannabis discovery'),
|
||||
('Findagram Production (www)', 'domain', 'https://www.findagram.co', 'Instagram-style cannabis discovery with www')
|
||||
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||
|
||||
-- Wildcard patterns
|
||||
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||
('CannaBrands Subdomains', 'pattern', '^https://.*\\.cannabrands\\.app$', 'All *.cannabrands.app subdomains'),
|
||||
('CannaIQ Subdomains', 'pattern', '^https://.*\\.cannaiq\\.co$', 'All *.cannaiq.co subdomains')
|
||||
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||
|
||||
-- Local development
|
||||
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||
('Local API', 'domain', 'http://localhost:3010', 'Local backend API'),
|
||||
('Local Admin', 'domain', 'http://localhost:8080', 'Local admin dashboard'),
|
||||
('Local Vite Dev', 'domain', 'http://localhost:5173', 'Vite dev server')
|
||||
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||
|
||||
-- Trusted IPs (localhost)
|
||||
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||
('Localhost IPv4', 'ip', '127.0.0.1', 'Local machine'),
|
||||
('Localhost IPv6', 'ip', '::1', 'Local machine IPv6'),
|
||||
('Localhost IPv6 Mapped', 'ip', '::ffff:127.0.0.1', 'IPv6-mapped IPv4 localhost')
|
||||
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||
|
||||
-- ============================================================
|
||||
-- COMMENTS
|
||||
-- ============================================================
|
||||
COMMENT ON TABLE trusted_origins IS 'Domains, IPs, and patterns that can access API without token';
|
||||
COMMENT ON COLUMN trusted_origins.origin_type IS 'domain = exact URL match, ip = IP address, pattern = regex pattern';
|
||||
COMMENT ON COLUMN trusted_origins.origin_value IS 'For domain: full URL. For ip: IP address. For pattern: regex string';
|
||||
@@ -1,35 +0,0 @@
|
||||
-- Migration: 111_system_settings.sql
|
||||
-- Description: System settings table for runtime configuration
|
||||
-- Created: 2024-12-14
|
||||
|
||||
CREATE TABLE IF NOT EXISTS system_settings (
|
||||
key VARCHAR(100) PRIMARY KEY,
|
||||
value TEXT NOT NULL,
|
||||
description TEXT,
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_by INTEGER REFERENCES users(id)
|
||||
);
|
||||
|
||||
-- Task pool gate - controls whether workers can claim tasks
|
||||
INSERT INTO system_settings (key, value, description) VALUES
|
||||
('task_pool_open', 'true', 'When false, workers cannot claim new tasks from the pool')
|
||||
ON CONFLICT (key) DO NOTHING;
|
||||
|
||||
-- Updated at trigger
|
||||
CREATE OR REPLACE FUNCTION update_system_settings_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
DROP TRIGGER IF EXISTS system_settings_updated_at ON system_settings;
|
||||
CREATE TRIGGER system_settings_updated_at
|
||||
BEFORE UPDATE ON system_settings
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION update_system_settings_updated_at();
|
||||
|
||||
COMMENT ON TABLE system_settings IS 'Runtime configuration settings';
|
||||
COMMENT ON COLUMN system_settings.key IS 'Setting name (e.g., task_pool_open)';
|
||||
COMMENT ON COLUMN system_settings.value IS 'Setting value as string';
|
||||
@@ -1,390 +0,0 @@
|
||||
-- Migration 112: Worker Session Pool
|
||||
-- Tracks IP/fingerprint sessions with exclusive locks and cooldowns
|
||||
-- Each worker claims up to 6 tasks, uses one IP/fingerprint for those tasks,
|
||||
-- then retires the session (8hr cooldown before IP can be reused)
|
||||
|
||||
-- Drop old identity pool tables if they exist (replacing with simpler session model)
|
||||
DROP TABLE IF EXISTS worker_identity_claims CASCADE;
|
||||
DROP TABLE IF EXISTS worker_identities CASCADE;
|
||||
|
||||
-- Worker sessions: tracks active and cooling down IP/fingerprint pairs
|
||||
CREATE TABLE IF NOT EXISTS worker_sessions (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- IP and fingerprint for this session
|
||||
ip_address VARCHAR(45) NOT NULL,
|
||||
fingerprint_hash VARCHAR(64) NOT NULL,
|
||||
fingerprint_data JSONB,
|
||||
|
||||
-- Geo this session is locked to
|
||||
state_code VARCHAR(2) NOT NULL,
|
||||
city VARCHAR(100),
|
||||
|
||||
-- Ownership
|
||||
worker_id VARCHAR(255), -- NULL if in cooldown
|
||||
|
||||
-- Status: 'active' (locked to worker), 'cooldown' (8hr wait), 'available'
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'available',
|
||||
|
||||
-- Task tracking
|
||||
tasks_claimed INTEGER NOT NULL DEFAULT 0,
|
||||
tasks_completed INTEGER NOT NULL DEFAULT 0,
|
||||
tasks_failed INTEGER NOT NULL DEFAULT 0,
|
||||
max_tasks INTEGER NOT NULL DEFAULT 6,
|
||||
|
||||
-- Timestamps
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
locked_at TIMESTAMPTZ, -- When worker locked this session
|
||||
retired_at TIMESTAMPTZ, -- When session was retired (cooldown starts)
|
||||
cooldown_until TIMESTAMPTZ, -- When session becomes available again
|
||||
|
||||
-- Constraints
|
||||
CONSTRAINT valid_status CHECK (status IN ('active', 'cooldown', 'available'))
|
||||
);
|
||||
|
||||
-- Indexes for fast lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_sessions_ip ON worker_sessions(ip_address);
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_sessions_status ON worker_sessions(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_sessions_worker ON worker_sessions(worker_id) WHERE worker_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_sessions_geo ON worker_sessions(state_code, city);
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_sessions_cooldown ON worker_sessions(cooldown_until) WHERE status = 'cooldown';
|
||||
|
||||
-- Unique constraint: only one active session per IP
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_sessions_active_ip
|
||||
ON worker_sessions(ip_address)
|
||||
WHERE status = 'active';
|
||||
|
||||
-- Function: Check if IP is available (not active, not in cooldown)
|
||||
CREATE OR REPLACE FUNCTION is_ip_available(check_ip VARCHAR(45))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
-- Check if any session has this IP and is either active or in cooldown
|
||||
RETURN NOT EXISTS (
|
||||
SELECT 1 FROM worker_sessions
|
||||
WHERE ip_address = check_ip
|
||||
AND (status = 'active' OR (status = 'cooldown' AND cooldown_until > NOW()))
|
||||
);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Lock a session to a worker
|
||||
-- Returns the session if successful, NULL if IP not available
|
||||
CREATE OR REPLACE FUNCTION lock_worker_session(
|
||||
p_worker_id VARCHAR(255),
|
||||
p_ip_address VARCHAR(45),
|
||||
p_state_code VARCHAR(2),
|
||||
p_city VARCHAR(100) DEFAULT NULL,
|
||||
p_fingerprint_hash VARCHAR(64) DEFAULT NULL,
|
||||
p_fingerprint_data JSONB DEFAULT NULL
|
||||
) RETURNS worker_sessions AS $$
|
||||
DECLARE
|
||||
v_session worker_sessions;
|
||||
BEGIN
|
||||
-- First check if IP is available
|
||||
IF NOT is_ip_available(p_ip_address) THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Try to find an existing available session for this IP
|
||||
SELECT * INTO v_session
|
||||
FROM worker_sessions
|
||||
WHERE ip_address = p_ip_address
|
||||
AND status = 'available'
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT 1;
|
||||
|
||||
IF v_session.id IS NOT NULL THEN
|
||||
-- Reuse existing session
|
||||
UPDATE worker_sessions SET
|
||||
worker_id = p_worker_id,
|
||||
status = 'active',
|
||||
state_code = p_state_code,
|
||||
city = p_city,
|
||||
fingerprint_hash = COALESCE(p_fingerprint_hash, fingerprint_hash),
|
||||
fingerprint_data = COALESCE(p_fingerprint_data, fingerprint_data),
|
||||
tasks_claimed = 0,
|
||||
tasks_completed = 0,
|
||||
tasks_failed = 0,
|
||||
locked_at = NOW(),
|
||||
retired_at = NULL,
|
||||
cooldown_until = NULL
|
||||
WHERE id = v_session.id
|
||||
RETURNING * INTO v_session;
|
||||
ELSE
|
||||
-- Create new session
|
||||
INSERT INTO worker_sessions (
|
||||
ip_address, fingerprint_hash, fingerprint_data,
|
||||
state_code, city, worker_id, status, locked_at
|
||||
) VALUES (
|
||||
p_ip_address, COALESCE(p_fingerprint_hash, md5(random()::text)),
|
||||
p_fingerprint_data, p_state_code, p_city, p_worker_id, 'active', NOW()
|
||||
)
|
||||
RETURNING * INTO v_session;
|
||||
END IF;
|
||||
|
||||
RETURN v_session;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Retire a session (start 8hr cooldown)
|
||||
CREATE OR REPLACE FUNCTION retire_worker_session(p_worker_id VARCHAR(255))
|
||||
RETURNS BOOLEAN AS $$
|
||||
DECLARE
|
||||
v_updated INTEGER;
|
||||
BEGIN
|
||||
UPDATE worker_sessions SET
|
||||
status = 'cooldown',
|
||||
worker_id = NULL,
|
||||
retired_at = NOW(),
|
||||
cooldown_until = NOW() + INTERVAL '8 hours'
|
||||
WHERE worker_id = p_worker_id
|
||||
AND status = 'active';
|
||||
|
||||
GET DIAGNOSTICS v_updated = ROW_COUNT;
|
||||
RETURN v_updated > 0;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Release expired cooldowns
|
||||
CREATE OR REPLACE FUNCTION release_expired_sessions()
|
||||
RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
v_released INTEGER;
|
||||
BEGIN
|
||||
UPDATE worker_sessions SET
|
||||
status = 'available'
|
||||
WHERE status = 'cooldown'
|
||||
AND cooldown_until <= NOW();
|
||||
|
||||
GET DIAGNOSTICS v_released = ROW_COUNT;
|
||||
RETURN v_released;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Get session for worker
|
||||
CREATE OR REPLACE FUNCTION get_worker_session(p_worker_id VARCHAR(255))
|
||||
RETURNS worker_sessions AS $$
|
||||
SELECT * FROM worker_sessions
|
||||
WHERE worker_id = p_worker_id AND status = 'active'
|
||||
LIMIT 1;
|
||||
$$ LANGUAGE sql;
|
||||
|
||||
-- Function: Increment task counters
|
||||
CREATE OR REPLACE FUNCTION session_task_completed(p_worker_id VARCHAR(255))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_sessions SET
|
||||
tasks_completed = tasks_completed + 1
|
||||
WHERE worker_id = p_worker_id AND status = 'active';
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION session_task_failed(p_worker_id VARCHAR(255))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_sessions SET
|
||||
tasks_failed = tasks_failed + 1
|
||||
WHERE worker_id = p_worker_id AND status = 'active';
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION session_task_claimed(p_worker_id VARCHAR(255), p_count INTEGER DEFAULT 1)
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_sessions SET
|
||||
tasks_claimed = tasks_claimed + p_count
|
||||
WHERE worker_id = p_worker_id AND status = 'active';
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Scheduled job hint: Run release_expired_sessions() every 5 minutes
|
||||
COMMENT ON FUNCTION release_expired_sessions() IS
|
||||
'Run periodically to release sessions from cooldown. Suggest: every 5 minutes.';
|
||||
|
||||
-- =============================================================================
|
||||
-- ATOMIC TASK CLAIMING
|
||||
-- Worker claims up to 6 tasks for same geo in one transaction
|
||||
-- =============================================================================
|
||||
|
||||
-- Function: Claim up to N tasks for same geo
|
||||
-- Returns claimed tasks with dispensary geo info
|
||||
CREATE OR REPLACE FUNCTION claim_tasks_batch(
|
||||
p_worker_id VARCHAR(255),
|
||||
p_max_tasks INTEGER DEFAULT 6,
|
||||
p_role VARCHAR(50) DEFAULT NULL -- Optional role filter
|
||||
) RETURNS TABLE (
|
||||
task_id INTEGER,
|
||||
role VARCHAR(50),
|
||||
dispensary_id INTEGER,
|
||||
dispensary_name VARCHAR(255),
|
||||
city VARCHAR(100),
|
||||
state_code VARCHAR(2),
|
||||
platform VARCHAR(50),
|
||||
method VARCHAR(20)
|
||||
) AS $$
|
||||
DECLARE
|
||||
v_target_state VARCHAR(2);
|
||||
v_target_city VARCHAR(100);
|
||||
v_claimed_count INTEGER := 0;
|
||||
BEGIN
|
||||
-- First, find the geo with most pending tasks to target
|
||||
SELECT d.state, d.city INTO v_target_state, v_target_city
|
||||
FROM worker_tasks t
|
||||
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||
WHERE t.status = 'pending'
|
||||
AND (p_role IS NULL OR t.role = p_role)
|
||||
GROUP BY d.state, d.city
|
||||
ORDER BY COUNT(*) DESC
|
||||
LIMIT 1;
|
||||
|
||||
-- No pending tasks
|
||||
IF v_target_state IS NULL THEN
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
-- Claim up to p_max_tasks for this geo
|
||||
RETURN QUERY
|
||||
WITH claimed AS (
|
||||
UPDATE worker_tasks t SET
|
||||
status = 'claimed',
|
||||
worker_id = p_worker_id,
|
||||
claimed_at = NOW()
|
||||
FROM (
|
||||
SELECT t2.id
|
||||
FROM worker_tasks t2
|
||||
JOIN dispensaries d ON t2.dispensary_id = d.id
|
||||
WHERE t2.status = 'pending'
|
||||
AND d.state = v_target_state
|
||||
AND (v_target_city IS NULL OR d.city = v_target_city)
|
||||
AND (p_role IS NULL OR t2.role = p_role)
|
||||
ORDER BY t2.priority DESC, t2.created_at ASC
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT p_max_tasks
|
||||
) sub
|
||||
WHERE t.id = sub.id
|
||||
RETURNING t.id, t.role, t.dispensary_id, t.method
|
||||
)
|
||||
SELECT
|
||||
c.id as task_id,
|
||||
c.role,
|
||||
c.dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
d.city,
|
||||
d.state as state_code,
|
||||
d.platform,
|
||||
c.method
|
||||
FROM claimed c
|
||||
JOIN dispensaries d ON c.dispensary_id = d.id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Release claimed tasks back to pending (for failed worker or cleanup)
|
||||
CREATE OR REPLACE FUNCTION release_claimed_tasks(p_worker_id VARCHAR(255))
|
||||
RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
v_released INTEGER;
|
||||
BEGIN
|
||||
UPDATE worker_tasks SET
|
||||
status = 'pending',
|
||||
worker_id = NULL,
|
||||
claimed_at = NULL
|
||||
WHERE worker_id = p_worker_id
|
||||
AND status IN ('claimed', 'running');
|
||||
|
||||
GET DIAGNOSTICS v_released = ROW_COUNT;
|
||||
RETURN v_released;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Mark task as running
|
||||
CREATE OR REPLACE FUNCTION start_task(p_task_id INTEGER, p_worker_id VARCHAR(255))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_tasks SET
|
||||
status = 'running',
|
||||
started_at = NOW()
|
||||
WHERE id = p_task_id
|
||||
AND worker_id = p_worker_id
|
||||
AND status = 'claimed';
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Mark task as completed (leaves pool)
|
||||
CREATE OR REPLACE FUNCTION complete_task(
|
||||
p_task_id INTEGER,
|
||||
p_worker_id VARCHAR(255),
|
||||
p_result JSONB DEFAULT NULL
|
||||
) RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_tasks SET
|
||||
status = 'completed',
|
||||
completed_at = NOW(),
|
||||
result = p_result
|
||||
WHERE id = p_task_id
|
||||
AND worker_id = p_worker_id
|
||||
AND status = 'running';
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Mark task as failed (returns to pending for retry)
|
||||
CREATE OR REPLACE FUNCTION fail_task(
|
||||
p_task_id INTEGER,
|
||||
p_worker_id VARCHAR(255),
|
||||
p_error TEXT DEFAULT NULL,
|
||||
p_max_retries INTEGER DEFAULT 3
|
||||
) RETURNS BOOLEAN AS $$
|
||||
DECLARE
|
||||
v_retry_count INTEGER;
|
||||
BEGIN
|
||||
-- Get current retry count
|
||||
SELECT COALESCE(retry_count, 0) INTO v_retry_count
|
||||
FROM worker_tasks WHERE id = p_task_id;
|
||||
|
||||
IF v_retry_count >= p_max_retries THEN
|
||||
-- Max retries exceeded - mark as permanently failed
|
||||
UPDATE worker_tasks SET
|
||||
status = 'failed',
|
||||
completed_at = NOW(),
|
||||
error_message = p_error,
|
||||
retry_count = v_retry_count + 1
|
||||
WHERE id = p_task_id
|
||||
AND worker_id = p_worker_id;
|
||||
ELSE
|
||||
-- Return to pending for retry
|
||||
UPDATE worker_tasks SET
|
||||
status = 'pending',
|
||||
worker_id = NULL,
|
||||
claimed_at = NULL,
|
||||
started_at = NULL,
|
||||
error_message = p_error,
|
||||
retry_count = v_retry_count + 1
|
||||
WHERE id = p_task_id
|
||||
AND worker_id = p_worker_id;
|
||||
END IF;
|
||||
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Add retry_count column if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'worker_tasks' AND column_name = 'retry_count'
|
||||
) THEN
|
||||
ALTER TABLE worker_tasks ADD COLUMN retry_count INTEGER NOT NULL DEFAULT 0;
|
||||
END IF;
|
||||
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'worker_tasks' AND column_name = 'claimed_at'
|
||||
) THEN
|
||||
ALTER TABLE worker_tasks ADD COLUMN claimed_at TIMESTAMPTZ;
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -1,381 +0,0 @@
|
||||
-- Task Pools: Group tasks by geo area for worker assignment
|
||||
-- Workers claim a pool, get proxy for that geo, then pull tasks from pool
|
||||
|
||||
-- ============================================================================
|
||||
-- TASK POOLS TABLE
|
||||
-- ============================================================================
|
||||
-- Each pool represents a metro area (e.g., Phoenix AZ = 100mi radius)
|
||||
-- Dispensaries are assigned to pools based on location
|
||||
-- Workers claim a pool, not individual tasks
|
||||
|
||||
CREATE TABLE IF NOT EXISTS task_pools (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(100) NOT NULL UNIQUE, -- e.g., 'phoenix_az'
|
||||
display_name VARCHAR(100) NOT NULL, -- e.g., 'Phoenix, AZ'
|
||||
state_code VARCHAR(2) NOT NULL, -- e.g., 'AZ'
|
||||
city VARCHAR(100) NOT NULL, -- e.g., 'Phoenix'
|
||||
latitude DECIMAL(10, 6) NOT NULL, -- pool center lat
|
||||
longitude DECIMAL(10, 6) NOT NULL, -- pool center lng
|
||||
radius_miles INTEGER DEFAULT 100, -- pool radius (100mi default)
|
||||
timezone VARCHAR(50) NOT NULL, -- e.g., 'America/Phoenix'
|
||||
is_active BOOLEAN DEFAULT true,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for active pools
|
||||
CREATE INDEX IF NOT EXISTS idx_task_pools_active ON task_pools(is_active) WHERE is_active = true;
|
||||
|
||||
-- ============================================================================
|
||||
-- LINK DISPENSARIES TO POOLS
|
||||
-- ============================================================================
|
||||
-- Add pool_id to dispensaries table
|
||||
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
|
||||
|
||||
-- Index for pool membership
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_pool ON dispensaries(pool_id) WHERE pool_id IS NOT NULL;
|
||||
|
||||
-- ============================================================================
|
||||
-- WORKER POOL ASSIGNMENT
|
||||
-- ============================================================================
|
||||
-- Track which pool a worker is currently assigned to
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS current_pool_id INTEGER REFERENCES task_pools(id),
|
||||
ADD COLUMN IF NOT EXISTS pool_claimed_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS pool_stores_visited INTEGER DEFAULT 0,
|
||||
ADD COLUMN IF NOT EXISTS pool_max_stores INTEGER DEFAULT 6;
|
||||
|
||||
-- ============================================================================
|
||||
-- SEED INITIAL POOLS
|
||||
-- ============================================================================
|
||||
-- Major cannabis markets with approximate center coordinates
|
||||
|
||||
INSERT INTO task_pools (name, display_name, state_code, city, latitude, longitude, timezone, radius_miles) VALUES
|
||||
-- Arizona
|
||||
('phoenix_az', 'Phoenix, AZ', 'AZ', 'Phoenix', 33.4484, -112.0740, 'America/Phoenix', 100),
|
||||
('tucson_az', 'Tucson, AZ', 'AZ', 'Tucson', 32.2226, -110.9747, 'America/Phoenix', 75),
|
||||
|
||||
-- California
|
||||
('los_angeles_ca', 'Los Angeles, CA', 'CA', 'Los Angeles', 34.0522, -118.2437, 'America/Los_Angeles', 100),
|
||||
('san_francisco_ca', 'San Francisco, CA', 'CA', 'San Francisco', 37.7749, -122.4194, 'America/Los_Angeles', 75),
|
||||
('san_diego_ca', 'San Diego, CA', 'CA', 'San Diego', 32.7157, -117.1611, 'America/Los_Angeles', 75),
|
||||
('sacramento_ca', 'Sacramento, CA', 'CA', 'Sacramento', 38.5816, -121.4944, 'America/Los_Angeles', 75),
|
||||
|
||||
-- Colorado
|
||||
('denver_co', 'Denver, CO', 'CO', 'Denver', 39.7392, -104.9903, 'America/Denver', 100),
|
||||
|
||||
-- Illinois
|
||||
('chicago_il', 'Chicago, IL', 'IL', 'Chicago', 41.8781, -87.6298, 'America/Chicago', 100),
|
||||
|
||||
-- Massachusetts
|
||||
('boston_ma', 'Boston, MA', 'MA', 'Boston', 42.3601, -71.0589, 'America/New_York', 75),
|
||||
|
||||
-- Michigan
|
||||
('detroit_mi', 'Detroit, MI', 'MI', 'Detroit', 42.3314, -83.0458, 'America/Detroit', 100),
|
||||
|
||||
-- Nevada
|
||||
('las_vegas_nv', 'Las Vegas, NV', 'NV', 'Las Vegas', 36.1699, -115.1398, 'America/Los_Angeles', 75),
|
||||
('reno_nv', 'Reno, NV', 'NV', 'Reno', 39.5296, -119.8138, 'America/Los_Angeles', 50),
|
||||
|
||||
-- New Jersey
|
||||
('newark_nj', 'Newark, NJ', 'NJ', 'Newark', 40.7357, -74.1724, 'America/New_York', 75),
|
||||
|
||||
-- New York
|
||||
('new_york_ny', 'New York, NY', 'NY', 'New York', 40.7128, -74.0060, 'America/New_York', 75),
|
||||
|
||||
-- Oklahoma
|
||||
('oklahoma_city_ok', 'Oklahoma City, OK', 'OK', 'Oklahoma City', 35.4676, -97.5164, 'America/Chicago', 100),
|
||||
('tulsa_ok', 'Tulsa, OK', 'OK', 'Tulsa', 36.1540, -95.9928, 'America/Chicago', 75),
|
||||
|
||||
-- Oregon
|
||||
('portland_or', 'Portland, OR', 'OR', 'Portland', 45.5152, -122.6784, 'America/Los_Angeles', 75),
|
||||
|
||||
-- Washington
|
||||
('seattle_wa', 'Seattle, WA', 'WA', 'Seattle', 47.6062, -122.3321, 'America/Los_Angeles', 100)
|
||||
|
||||
ON CONFLICT (name) DO NOTHING;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Assign dispensary to nearest pool
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION assign_dispensary_to_pool(disp_id INTEGER)
|
||||
RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
disp_lat DECIMAL(10,6);
|
||||
disp_lng DECIMAL(10,6);
|
||||
nearest_pool_id INTEGER;
|
||||
BEGIN
|
||||
-- Get dispensary coordinates
|
||||
SELECT latitude, longitude INTO disp_lat, disp_lng
|
||||
FROM dispensaries WHERE id = disp_id;
|
||||
|
||||
IF disp_lat IS NULL OR disp_lng IS NULL THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Find nearest active pool within radius
|
||||
-- Using Haversine approximation (accurate enough for 100mi)
|
||||
SELECT id INTO nearest_pool_id
|
||||
FROM task_pools
|
||||
WHERE is_active = true
|
||||
AND (
|
||||
3959 * acos(
|
||||
cos(radians(latitude)) * cos(radians(disp_lat)) *
|
||||
cos(radians(disp_lng) - radians(longitude)) +
|
||||
sin(radians(latitude)) * sin(radians(disp_lat))
|
||||
)
|
||||
) <= radius_miles
|
||||
ORDER BY (
|
||||
3959 * acos(
|
||||
cos(radians(latitude)) * cos(radians(disp_lat)) *
|
||||
cos(radians(disp_lng) - radians(longitude)) +
|
||||
sin(radians(latitude)) * sin(radians(disp_lat))
|
||||
)
|
||||
)
|
||||
LIMIT 1;
|
||||
|
||||
-- Update dispensary
|
||||
IF nearest_pool_id IS NOT NULL THEN
|
||||
UPDATE dispensaries SET pool_id = nearest_pool_id WHERE id = disp_id;
|
||||
END IF;
|
||||
|
||||
RETURN nearest_pool_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Assign all dispensaries to pools (batch)
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION assign_all_dispensaries_to_pools()
|
||||
RETURNS TABLE(assigned INTEGER, unassigned INTEGER) AS $$
|
||||
DECLARE
|
||||
assigned_count INTEGER := 0;
|
||||
unassigned_count INTEGER := 0;
|
||||
disp RECORD;
|
||||
pool_id INTEGER;
|
||||
BEGIN
|
||||
FOR disp IN SELECT id FROM dispensaries WHERE pool_id IS NULL AND latitude IS NOT NULL LOOP
|
||||
pool_id := assign_dispensary_to_pool(disp.id);
|
||||
IF pool_id IS NOT NULL THEN
|
||||
assigned_count := assigned_count + 1;
|
||||
ELSE
|
||||
unassigned_count := unassigned_count + 1;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
RETURN QUERY SELECT assigned_count, unassigned_count;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Get pools with pending tasks
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION get_pools_with_pending_tasks()
|
||||
RETURNS TABLE(
|
||||
pool_id INTEGER,
|
||||
pool_name VARCHAR(100),
|
||||
display_name VARCHAR(100),
|
||||
state_code VARCHAR(2),
|
||||
city VARCHAR(100),
|
||||
timezone VARCHAR(50),
|
||||
pending_count BIGINT,
|
||||
store_count BIGINT
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
tp.id as pool_id,
|
||||
tp.name as pool_name,
|
||||
tp.display_name,
|
||||
tp.state_code,
|
||||
tp.city,
|
||||
tp.timezone,
|
||||
COUNT(DISTINCT t.id) as pending_count,
|
||||
COUNT(DISTINCT d.id) as store_count
|
||||
FROM task_pools tp
|
||||
JOIN dispensaries d ON d.pool_id = tp.id
|
||||
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
|
||||
WHERE tp.is_active = true
|
||||
GROUP BY tp.id, tp.name, tp.display_name, tp.state_code, tp.city, tp.timezone
|
||||
HAVING COUNT(DISTINCT t.id) > 0
|
||||
ORDER BY COUNT(DISTINCT t.id) DESC;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Worker claims a pool
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION worker_claim_pool(
|
||||
p_worker_id VARCHAR(100),
|
||||
p_pool_id INTEGER DEFAULT NULL
|
||||
)
|
||||
RETURNS TABLE(
|
||||
pool_id INTEGER,
|
||||
pool_name VARCHAR(100),
|
||||
display_name VARCHAR(100),
|
||||
state_code VARCHAR(2),
|
||||
city VARCHAR(100),
|
||||
latitude DECIMAL(10,6),
|
||||
longitude DECIMAL(10,6),
|
||||
timezone VARCHAR(50)
|
||||
) AS $$
|
||||
DECLARE
|
||||
claimed_pool_id INTEGER;
|
||||
BEGIN
|
||||
-- If no pool specified, pick the one with most pending tasks
|
||||
IF p_pool_id IS NULL THEN
|
||||
SELECT tp.id INTO claimed_pool_id
|
||||
FROM task_pools tp
|
||||
JOIN dispensaries d ON d.pool_id = tp.id
|
||||
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
|
||||
WHERE tp.is_active = true
|
||||
GROUP BY tp.id
|
||||
ORDER BY COUNT(DISTINCT t.id) DESC
|
||||
LIMIT 1;
|
||||
ELSE
|
||||
claimed_pool_id := p_pool_id;
|
||||
END IF;
|
||||
|
||||
IF claimed_pool_id IS NULL THEN
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
-- Update worker registry with pool assignment
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
current_pool_id = claimed_pool_id,
|
||||
pool_claimed_at = NOW(),
|
||||
pool_stores_visited = 0,
|
||||
pool_max_stores = 6,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
-- Return pool info
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
tp.id,
|
||||
tp.name,
|
||||
tp.display_name,
|
||||
tp.state_code,
|
||||
tp.city,
|
||||
tp.latitude,
|
||||
tp.longitude,
|
||||
tp.timezone
|
||||
FROM task_pools tp
|
||||
WHERE tp.id = claimed_pool_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Pull tasks from worker's pool (up to 6 stores)
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION pull_tasks_from_pool(
|
||||
p_worker_id VARCHAR(100),
|
||||
p_max_stores INTEGER DEFAULT 6
|
||||
)
|
||||
RETURNS TABLE(
|
||||
task_id INTEGER,
|
||||
dispensary_id INTEGER,
|
||||
dispensary_name VARCHAR(255),
|
||||
role VARCHAR(50),
|
||||
platform VARCHAR(50),
|
||||
method VARCHAR(20)
|
||||
) AS $$
|
||||
DECLARE
|
||||
worker_pool_id INTEGER;
|
||||
stores_visited INTEGER;
|
||||
max_stores INTEGER;
|
||||
stores_remaining INTEGER;
|
||||
BEGIN
|
||||
-- Get worker's current pool and store count
|
||||
SELECT current_pool_id, pool_stores_visited, pool_max_stores
|
||||
INTO worker_pool_id, stores_visited, max_stores
|
||||
FROM worker_registry
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
IF worker_pool_id IS NULL THEN
|
||||
RAISE EXCEPTION 'Worker % has no pool assigned', p_worker_id;
|
||||
END IF;
|
||||
|
||||
stores_remaining := max_stores - stores_visited;
|
||||
IF stores_remaining <= 0 THEN
|
||||
RETURN; -- Worker exhausted
|
||||
END IF;
|
||||
|
||||
-- Claim tasks from pool (one task per store, up to remaining capacity)
|
||||
RETURN QUERY
|
||||
WITH available_stores AS (
|
||||
SELECT DISTINCT ON (d.id)
|
||||
t.id as task_id,
|
||||
d.id as dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
t.role,
|
||||
t.platform,
|
||||
t.method
|
||||
FROM tasks t
|
||||
JOIN dispensaries d ON d.id = t.dispensary_id
|
||||
WHERE d.pool_id = worker_pool_id
|
||||
AND t.status = 'pending'
|
||||
AND t.scheduled_for <= NOW()
|
||||
ORDER BY d.id, t.priority DESC, t.created_at ASC
|
||||
LIMIT stores_remaining
|
||||
),
|
||||
claimed AS (
|
||||
UPDATE tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
claimed_by = p_worker_id,
|
||||
claimed_at = NOW()
|
||||
WHERE id IN (SELECT task_id FROM available_stores)
|
||||
RETURNING id
|
||||
)
|
||||
SELECT
|
||||
av.task_id,
|
||||
av.dispensary_id,
|
||||
av.dispensary_name,
|
||||
av.role,
|
||||
av.platform,
|
||||
av.method
|
||||
FROM available_stores av
|
||||
WHERE av.task_id IN (SELECT id FROM claimed);
|
||||
|
||||
-- Update worker store count
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
pool_stores_visited = pool_stores_visited + (
|
||||
SELECT COUNT(DISTINCT dispensary_id)
|
||||
FROM tasks
|
||||
WHERE claimed_by = p_worker_id AND status = 'claimed'
|
||||
),
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Worker releases pool (exhausted or done)
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION worker_release_pool(p_worker_id VARCHAR(100))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
current_pool_id = NULL,
|
||||
pool_claimed_at = NULL,
|
||||
pool_stores_visited = 0,
|
||||
current_state = NULL,
|
||||
current_city = NULL,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
RETURN true;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- RUN: Assign existing dispensaries to pools
|
||||
-- ============================================================================
|
||||
SELECT * FROM assign_all_dispensaries_to_pools();
|
||||
@@ -1,10 +0,0 @@
|
||||
-- Migration 114: Add pool_id to task_schedules
|
||||
-- Allows schedules to target specific geo pools
|
||||
|
||||
ALTER TABLE task_schedules
|
||||
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
|
||||
|
||||
-- Index for pool-based schedule queries
|
||||
CREATE INDEX IF NOT EXISTS idx_task_schedules_pool ON task_schedules(pool_id) WHERE pool_id IS NOT NULL;
|
||||
|
||||
COMMENT ON COLUMN task_schedules.pool_id IS 'Optional geo pool filter. NULL = all pools/dispensaries matching state_code';
|
||||
@@ -1,17 +0,0 @@
|
||||
-- Migration: Add proxy_ip tracking to worker_tasks
|
||||
-- Purpose: Prevent same IP from hitting multiple stores on same platform simultaneously
|
||||
--
|
||||
-- Anti-detection measure: Dutchie/Jane may flag if same IP makes requests
|
||||
-- for multiple different stores. This column lets us track and prevent that.
|
||||
|
||||
-- Add proxy_ip column to track which proxy IP is being used for each task
|
||||
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
|
||||
|
||||
-- Index for quick lookup of active tasks by proxy IP
|
||||
-- Used to check: "Is this IP already hitting another store?"
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip_active
|
||||
ON worker_tasks (proxy_ip, platform)
|
||||
WHERE status IN ('claimed', 'running') AND proxy_ip IS NOT NULL;
|
||||
|
||||
-- Comment
|
||||
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'Proxy IP assigned to this task. Used to prevent same IP hitting multiple stores on same platform.';
|
||||
@@ -1,16 +0,0 @@
|
||||
-- Migration: Add source tracking columns to worker_tasks
|
||||
-- Purpose: Track where tasks originated from (schedule, API, manual)
|
||||
|
||||
-- Add source tracking columns
|
||||
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source VARCHAR(50);
|
||||
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source_schedule_id INTEGER REFERENCES task_schedules(id);
|
||||
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source_metadata JSONB;
|
||||
|
||||
-- Index for tracking tasks by schedule
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_tasks_source_schedule
|
||||
ON worker_tasks (source_schedule_id) WHERE source_schedule_id IS NOT NULL;
|
||||
|
||||
-- Comments
|
||||
COMMENT ON COLUMN worker_tasks.source IS 'Origin of task: schedule, api, manual, chain';
|
||||
COMMENT ON COLUMN worker_tasks.source_schedule_id IS 'ID of schedule that created this task';
|
||||
COMMENT ON COLUMN worker_tasks.source_metadata IS 'Additional metadata about task origin';
|
||||
@@ -1,32 +0,0 @@
|
||||
-- Migration 117: Per-store crawl interval scheduling
|
||||
-- Adds columns for configurable per-store crawl intervals
|
||||
-- Part of Real-Time Inventory Tracking feature
|
||||
|
||||
-- Per-store crawl interval (NULL = use state schedule default 4h)
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_interval_minutes INT DEFAULT NULL;
|
||||
|
||||
-- When this store should next be crawled (used by high-frequency scheduler)
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS next_crawl_at TIMESTAMPTZ DEFAULT NULL;
|
||||
|
||||
-- Track last request time to enforce minimum spacing
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_crawl_started_at TIMESTAMPTZ DEFAULT NULL;
|
||||
|
||||
-- Change tracking for optimization
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_inventory_hash TEXT DEFAULT NULL;
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_price_hash TEXT DEFAULT NULL;
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS inventory_changes_24h INT DEFAULT 0;
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS price_changes_24h INT DEFAULT 0;
|
||||
|
||||
-- Index for scheduler query: find stores due for high-frequency crawl
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_next_crawl
|
||||
ON dispensaries(next_crawl_at)
|
||||
WHERE crawl_interval_minutes IS NOT NULL AND crawl_enabled = TRUE;
|
||||
|
||||
-- Comment for documentation
|
||||
COMMENT ON COLUMN dispensaries.crawl_interval_minutes IS 'Custom crawl interval in minutes. NULL = use state schedule (4h default). Set to 15/30/60 for high-frequency tracking.';
|
||||
COMMENT ON COLUMN dispensaries.next_crawl_at IS 'When this store should next be crawled. Updated after each crawl with interval + jitter.';
|
||||
COMMENT ON COLUMN dispensaries.last_crawl_started_at IS 'When the last crawl task was created. Used to enforce minimum spacing.';
|
||||
COMMENT ON COLUMN dispensaries.last_inventory_hash IS 'Hash of inventory state from last crawl. Used to detect changes and skip unchanged payloads.';
|
||||
COMMENT ON COLUMN dispensaries.last_price_hash IS 'Hash of price state from last crawl. Used to detect price changes.';
|
||||
COMMENT ON COLUMN dispensaries.inventory_changes_24h IS 'Number of inventory changes detected in last 24h. Indicates store volatility.';
|
||||
COMMENT ON COLUMN dispensaries.price_changes_24h IS 'Number of price changes detected in last 24h.';
|
||||
@@ -1,48 +0,0 @@
|
||||
-- Migration 118: Inventory snapshots table
|
||||
-- Lightweight per-product tracking for sales velocity estimation
|
||||
-- Part of Real-Time Inventory Tracking feature
|
||||
|
||||
CREATE TABLE IF NOT EXISTS inventory_snapshots (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
dispensary_id INT NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
product_id TEXT NOT NULL, -- provider_product_id (normalized across platforms)
|
||||
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Platform (for debugging/filtering)
|
||||
platform TEXT NOT NULL, -- 'dutchie' | 'jane' | 'treez'
|
||||
|
||||
-- Inventory fields (normalized from all platforms)
|
||||
quantity_available INT, -- Dutchie: quantityAvailable, Jane: quantity, Treez: quantityAvailable
|
||||
is_below_threshold BOOLEAN, -- Dutchie: isBelowThreshold, Jane: computed, Treez: lowInventory
|
||||
status TEXT, -- Active/Inactive/available
|
||||
|
||||
-- Price fields (normalized)
|
||||
price_rec NUMERIC(10,2), -- recreational price
|
||||
price_med NUMERIC(10,2), -- medical price (if different)
|
||||
|
||||
-- Denormalized for fast queries
|
||||
brand_name TEXT,
|
||||
category TEXT,
|
||||
product_name TEXT
|
||||
);
|
||||
|
||||
-- Primary query: get snapshots for a store over time
|
||||
CREATE INDEX idx_inv_snap_store_time ON inventory_snapshots(dispensary_id, captured_at DESC);
|
||||
|
||||
-- Delta calculation: get consecutive snapshots for a product
|
||||
CREATE INDEX idx_inv_snap_product_time ON inventory_snapshots(dispensary_id, product_id, captured_at DESC);
|
||||
|
||||
-- Brand-level analytics
|
||||
CREATE INDEX idx_inv_snap_brand_time ON inventory_snapshots(brand_name, captured_at DESC) WHERE brand_name IS NOT NULL;
|
||||
|
||||
-- Platform filtering
|
||||
CREATE INDEX idx_inv_snap_platform ON inventory_snapshots(platform, captured_at DESC);
|
||||
|
||||
-- Retention cleanup (30 days) - simple index, cleanup job handles the WHERE
|
||||
CREATE INDEX IF NOT EXISTS idx_inv_snap_cleanup ON inventory_snapshots(captured_at);
|
||||
|
||||
-- Comments
|
||||
COMMENT ON TABLE inventory_snapshots IS 'Lightweight inventory snapshots for sales velocity tracking. Retained 30 days.';
|
||||
COMMENT ON COLUMN inventory_snapshots.product_id IS 'Provider product ID, normalized across platforms';
|
||||
COMMENT ON COLUMN inventory_snapshots.platform IS 'Menu platform: dutchie, jane, or treez';
|
||||
COMMENT ON COLUMN inventory_snapshots.quantity_available IS 'Current quantity in stock (Dutchie: quantityAvailable, Jane: quantity)';
|
||||
@@ -1,53 +0,0 @@
|
||||
-- Migration 119: Product visibility events table
|
||||
-- Tracks OOS, brand drops, and other notable events for alerts
|
||||
-- Part of Real-Time Inventory Tracking feature
|
||||
|
||||
CREATE TABLE IF NOT EXISTS product_visibility_events (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INT NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
|
||||
-- Product identification (null for brand-level events)
|
||||
product_id TEXT, -- provider_product_id
|
||||
product_name TEXT, -- For display in alerts
|
||||
|
||||
-- Brand (always populated)
|
||||
brand_name TEXT,
|
||||
|
||||
-- Event details
|
||||
event_type TEXT NOT NULL, -- 'oos', 'back_in_stock', 'brand_dropped', 'brand_added', 'price_change'
|
||||
detected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Context
|
||||
previous_quantity INT, -- For OOS events: what quantity was before
|
||||
previous_price NUMERIC(10,2), -- For price change events
|
||||
new_price NUMERIC(10,2), -- For price change events
|
||||
price_change_pct NUMERIC(5,2), -- Percentage change (e.g., -15.5 for 15.5% decrease)
|
||||
|
||||
-- Platform
|
||||
platform TEXT, -- 'dutchie' | 'jane' | 'treez'
|
||||
|
||||
-- Alert status
|
||||
notified BOOLEAN DEFAULT FALSE, -- Has external system been notified?
|
||||
acknowledged_at TIMESTAMPTZ, -- When user acknowledged the alert
|
||||
acknowledged_by TEXT -- User who acknowledged
|
||||
);
|
||||
|
||||
-- Primary query: recent events by store
|
||||
CREATE INDEX idx_vis_events_store_time ON product_visibility_events(dispensary_id, detected_at DESC);
|
||||
|
||||
-- Alert queries: unnotified events
|
||||
CREATE INDEX idx_vis_events_unnotified ON product_visibility_events(notified, detected_at DESC) WHERE notified = FALSE;
|
||||
|
||||
-- Event type filtering
|
||||
CREATE INDEX idx_vis_events_type ON product_visibility_events(event_type, detected_at DESC);
|
||||
|
||||
-- Brand-level queries
|
||||
CREATE INDEX idx_vis_events_brand ON product_visibility_events(brand_name, event_type, detected_at DESC) WHERE brand_name IS NOT NULL;
|
||||
|
||||
-- Cleanup (90 days retention) - simple index, cleanup job handles the WHERE
|
||||
CREATE INDEX IF NOT EXISTS idx_vis_events_cleanup ON product_visibility_events(detected_at);
|
||||
|
||||
-- Comments
|
||||
COMMENT ON TABLE product_visibility_events IS 'Notable inventory events for alerting. OOS, brand drops, significant price changes. Retained 90 days.';
|
||||
COMMENT ON COLUMN product_visibility_events.event_type IS 'Event type: oos (out of stock), back_in_stock, brand_dropped, brand_added, price_change';
|
||||
COMMENT ON COLUMN product_visibility_events.notified IS 'Whether external systems (other apps) have been notified of this event';
|
||||
@@ -1,13 +0,0 @@
|
||||
-- Migration 120: Daily baseline tracking
|
||||
-- Track when each store's daily baseline payload was last saved
|
||||
-- Part of Real-Time Inventory Tracking feature
|
||||
|
||||
-- Add column to track last baseline save time
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_baseline_at TIMESTAMPTZ DEFAULT NULL;
|
||||
|
||||
-- Index for finding stores that need baselines
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_baseline ON dispensaries(last_baseline_at)
|
||||
WHERE crawl_enabled = TRUE;
|
||||
|
||||
-- Comment
|
||||
COMMENT ON COLUMN dispensaries.last_baseline_at IS 'Timestamp of last daily baseline payload save. Baselines saved once per day between 12:01 AM - 3:00 AM.';
|
||||
@@ -1,383 +0,0 @@
|
||||
-- Migration 121: Sales Analytics Materialized Views
|
||||
-- Pre-computed views for sales velocity, brand market share, and store performance
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW 1: Daily Sales Estimates (per product/store)
|
||||
-- Calculates delta between consecutive snapshots
|
||||
-- ============================================================
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_daily_sales_estimates AS
|
||||
WITH qty_deltas AS (
|
||||
SELECT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
brand_name,
|
||||
category,
|
||||
DATE(captured_at) AS sale_date,
|
||||
price_rec,
|
||||
quantity_available,
|
||||
LAG(quantity_available) OVER (
|
||||
PARTITION BY dispensary_id, product_id
|
||||
ORDER BY captured_at
|
||||
) AS prev_quantity
|
||||
FROM inventory_snapshots
|
||||
WHERE quantity_available IS NOT NULL
|
||||
AND captured_at >= NOW() - INTERVAL '30 days'
|
||||
)
|
||||
SELECT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
brand_name,
|
||||
category,
|
||||
sale_date,
|
||||
AVG(price_rec) AS avg_price,
|
||||
SUM(GREATEST(0, COALESCE(prev_quantity, 0) - quantity_available)) AS units_sold,
|
||||
SUM(GREATEST(0, quantity_available - COALESCE(prev_quantity, 0))) AS units_restocked,
|
||||
SUM(GREATEST(0, COALESCE(prev_quantity, 0) - quantity_available) * COALESCE(price_rec, 0)) AS revenue_estimate,
|
||||
COUNT(*) AS snapshot_count
|
||||
FROM qty_deltas
|
||||
WHERE prev_quantity IS NOT NULL
|
||||
GROUP BY dispensary_id, product_id, brand_name, category, sale_date;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_daily_sales_pk
|
||||
ON mv_daily_sales_estimates(dispensary_id, product_id, sale_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_brand
|
||||
ON mv_daily_sales_estimates(brand_name, sale_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_category
|
||||
ON mv_daily_sales_estimates(category, sale_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_date
|
||||
ON mv_daily_sales_estimates(sale_date DESC);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW 2: Brand Market Share by State
|
||||
-- Weighted distribution across stores
|
||||
-- ============================================================
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_brand_market_share AS
|
||||
WITH brand_presence AS (
|
||||
SELECT
|
||||
sp.brand AS brand_name,
|
||||
d.state AS state_code,
|
||||
COUNT(DISTINCT sp.dispensary_id) AS stores_carrying,
|
||||
COUNT(*) AS sku_count,
|
||||
SUM(CASE WHEN sp.is_in_stock THEN 1 ELSE 0 END) AS in_stock_skus,
|
||||
AVG(sp.price_rec) AS avg_price
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
WHERE sp.brand IS NOT NULL
|
||||
AND d.state IS NOT NULL
|
||||
GROUP BY sp.brand, d.state
|
||||
),
|
||||
state_totals AS (
|
||||
SELECT
|
||||
d.state AS state_code,
|
||||
COUNT(DISTINCT d.id) FILTER (WHERE d.crawl_enabled) AS total_stores
|
||||
FROM dispensaries d
|
||||
WHERE d.state IS NOT NULL
|
||||
GROUP BY d.state
|
||||
)
|
||||
SELECT
|
||||
bp.brand_name,
|
||||
bp.state_code,
|
||||
bp.stores_carrying,
|
||||
st.total_stores,
|
||||
ROUND(bp.stores_carrying::NUMERIC * 100 / NULLIF(st.total_stores, 0), 2) AS penetration_pct,
|
||||
bp.sku_count,
|
||||
bp.in_stock_skus,
|
||||
bp.avg_price,
|
||||
NOW() AS calculated_at
|
||||
FROM brand_presence bp
|
||||
JOIN state_totals st ON st.state_code = bp.state_code;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_brand_market_pk
|
||||
ON mv_brand_market_share(brand_name, state_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_brand_market_state
|
||||
ON mv_brand_market_share(state_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_brand_market_penetration
|
||||
ON mv_brand_market_share(penetration_pct DESC);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW 3: SKU Velocity (30-day rolling)
|
||||
-- Average daily units sold per SKU
|
||||
-- ============================================================
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_sku_velocity AS
|
||||
SELECT
|
||||
dse.product_id,
|
||||
dse.brand_name,
|
||||
dse.category,
|
||||
dse.dispensary_id,
|
||||
d.name AS dispensary_name,
|
||||
d.state AS state_code,
|
||||
SUM(dse.units_sold) AS total_units_30d,
|
||||
SUM(dse.revenue_estimate) AS total_revenue_30d,
|
||||
COUNT(DISTINCT dse.sale_date) AS days_with_sales,
|
||||
ROUND(SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0), 2) AS avg_daily_units,
|
||||
AVG(dse.avg_price) AS avg_price,
|
||||
CASE
|
||||
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 5 THEN 'hot'
|
||||
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 1 THEN 'steady'
|
||||
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 0.1 THEN 'slow'
|
||||
ELSE 'stale'
|
||||
END AS velocity_tier,
|
||||
NOW() AS calculated_at
|
||||
FROM mv_daily_sales_estimates dse
|
||||
JOIN dispensaries d ON d.id = dse.dispensary_id
|
||||
WHERE dse.sale_date >= CURRENT_DATE - INTERVAL '30 days'
|
||||
GROUP BY dse.product_id, dse.brand_name, dse.category, dse.dispensary_id, d.name, d.state;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_sku_velocity_pk
|
||||
ON mv_sku_velocity(dispensary_id, product_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_brand
|
||||
ON mv_sku_velocity(brand_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_tier
|
||||
ON mv_sku_velocity(velocity_tier);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_state
|
||||
ON mv_sku_velocity(state_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_units
|
||||
ON mv_sku_velocity(total_units_30d DESC);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW 4: Store Performance Rankings
|
||||
-- Revenue estimates and brand diversity per store
|
||||
-- ============================================================
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_store_performance AS
|
||||
SELECT
|
||||
d.id AS dispensary_id,
|
||||
d.name AS dispensary_name,
|
||||
d.city,
|
||||
d.state AS state_code,
|
||||
-- Revenue metrics from sales estimates
|
||||
COALESCE(sales.total_revenue_30d, 0) AS total_revenue_30d,
|
||||
COALESCE(sales.total_units_30d, 0) AS total_units_30d,
|
||||
-- Inventory metrics
|
||||
COUNT(DISTINCT sp.id) AS total_skus,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock) AS in_stock_skus,
|
||||
-- Brand diversity
|
||||
COUNT(DISTINCT sp.brand) AS unique_brands,
|
||||
COUNT(DISTINCT sp.category) AS unique_categories,
|
||||
-- Pricing
|
||||
AVG(sp.price_rec) AS avg_price,
|
||||
-- Activity
|
||||
MAX(sp.updated_at) AS last_updated,
|
||||
NOW() AS calculated_at
|
||||
FROM dispensaries d
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
dispensary_id,
|
||||
SUM(revenue_estimate) AS total_revenue_30d,
|
||||
SUM(units_sold) AS total_units_30d
|
||||
FROM mv_daily_sales_estimates
|
||||
WHERE sale_date >= CURRENT_DATE - INTERVAL '30 days'
|
||||
GROUP BY dispensary_id
|
||||
) sales ON sales.dispensary_id = d.id
|
||||
WHERE d.crawl_enabled = TRUE
|
||||
GROUP BY d.id, d.name, d.city, d.state, sales.total_revenue_30d, sales.total_units_30d;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_store_perf_pk
|
||||
ON mv_store_performance(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_store_perf_state
|
||||
ON mv_store_performance(state_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_store_perf_revenue
|
||||
ON mv_store_performance(total_revenue_30d DESC);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW 5: Weekly Category Trends
|
||||
-- Category performance over time
|
||||
-- ============================================================
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_category_weekly_trends AS
|
||||
SELECT
|
||||
dse.category,
|
||||
d.state AS state_code,
|
||||
DATE_TRUNC('week', dse.sale_date)::DATE AS week_start,
|
||||
COUNT(DISTINCT dse.product_id) AS sku_count,
|
||||
COUNT(DISTINCT dse.dispensary_id) AS store_count,
|
||||
SUM(dse.units_sold) AS total_units,
|
||||
SUM(dse.revenue_estimate) AS total_revenue,
|
||||
AVG(dse.avg_price) AS avg_price,
|
||||
NOW() AS calculated_at
|
||||
FROM mv_daily_sales_estimates dse
|
||||
JOIN dispensaries d ON d.id = dse.dispensary_id
|
||||
WHERE dse.category IS NOT NULL
|
||||
AND dse.sale_date >= CURRENT_DATE - INTERVAL '90 days'
|
||||
GROUP BY dse.category, d.state, DATE_TRUNC('week', dse.sale_date);
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_cat_weekly_pk
|
||||
ON mv_category_weekly_trends(category, state_code, week_start);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_cat_weekly_state
|
||||
ON mv_category_weekly_trends(state_code, week_start);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_cat_weekly_date
|
||||
ON mv_category_weekly_trends(week_start DESC);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW 6: Product Intelligence (Hoodie-style per-product metrics)
|
||||
-- Includes stock diff, days since OOS, days until stockout
|
||||
-- ============================================================
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_product_intelligence AS
|
||||
WITH
|
||||
-- Calculate stock diff over 120 days
|
||||
stock_diff AS (
|
||||
SELECT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
-- Get oldest and newest quantity in last 120 days
|
||||
FIRST_VALUE(quantity_available) OVER (
|
||||
PARTITION BY dispensary_id, product_id
|
||||
ORDER BY captured_at ASC
|
||||
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
||||
) AS qty_120d_ago,
|
||||
LAST_VALUE(quantity_available) OVER (
|
||||
PARTITION BY dispensary_id, product_id
|
||||
ORDER BY captured_at ASC
|
||||
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
||||
) AS qty_current
|
||||
FROM inventory_snapshots
|
||||
WHERE captured_at >= NOW() - INTERVAL '120 days'
|
||||
),
|
||||
stock_diff_calc AS (
|
||||
SELECT DISTINCT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
qty_current - COALESCE(qty_120d_ago, qty_current) AS stock_diff_120
|
||||
FROM stock_diff
|
||||
),
|
||||
-- Get days since last OOS event
|
||||
last_oos AS (
|
||||
SELECT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
MAX(detected_at) AS last_oos_date
|
||||
FROM product_visibility_events
|
||||
WHERE event_type = 'oos'
|
||||
GROUP BY dispensary_id, product_id
|
||||
),
|
||||
-- Calculate avg daily units sold (from velocity view)
|
||||
velocity AS (
|
||||
SELECT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
avg_daily_units
|
||||
FROM mv_sku_velocity
|
||||
)
|
||||
SELECT
|
||||
sp.dispensary_id,
|
||||
d.name AS dispensary_name,
|
||||
d.state AS state_code,
|
||||
d.city,
|
||||
sp.provider_product_id AS sku,
|
||||
sp.name_raw AS product_name,
|
||||
sp.brand_name_raw AS brand,
|
||||
sp.category_raw AS category,
|
||||
sp.is_in_stock,
|
||||
sp.stock_status,
|
||||
sp.stock_quantity,
|
||||
sp.price_rec AS price,
|
||||
sp.first_seen_at AS first_seen,
|
||||
sp.last_seen_at AS last_seen,
|
||||
-- Calculated fields
|
||||
COALESCE(sd.stock_diff_120, 0) AS stock_diff_120,
|
||||
CASE
|
||||
WHEN lo.last_oos_date IS NOT NULL
|
||||
THEN EXTRACT(DAY FROM NOW() - lo.last_oos_date)::INT
|
||||
ELSE NULL
|
||||
END AS days_since_oos,
|
||||
-- Days until stockout = current stock / daily burn rate
|
||||
CASE
|
||||
WHEN v.avg_daily_units > 0 AND sp.stock_quantity > 0
|
||||
THEN ROUND(sp.stock_quantity::NUMERIC / v.avg_daily_units)::INT
|
||||
ELSE NULL
|
||||
END AS days_until_stock_out,
|
||||
v.avg_daily_units,
|
||||
NOW() AS calculated_at
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
LEFT JOIN stock_diff_calc sd ON sd.dispensary_id = sp.dispensary_id
|
||||
AND sd.product_id = sp.provider_product_id
|
||||
LEFT JOIN last_oos lo ON lo.dispensary_id = sp.dispensary_id
|
||||
AND lo.product_id = sp.provider_product_id
|
||||
LEFT JOIN velocity v ON v.dispensary_id = sp.dispensary_id
|
||||
AND v.product_id = sp.provider_product_id
|
||||
WHERE d.crawl_enabled = TRUE;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_prod_intel_pk
|
||||
ON mv_product_intelligence(dispensary_id, sku);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_brand
|
||||
ON mv_product_intelligence(brand);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_state
|
||||
ON mv_product_intelligence(state_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_stock_out
|
||||
ON mv_product_intelligence(days_until_stock_out ASC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_oos
|
||||
ON mv_product_intelligence(days_since_oos DESC NULLS LAST);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- REFRESH FUNCTION
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION refresh_sales_analytics_views()
|
||||
RETURNS TABLE(view_name TEXT, rows_affected BIGINT) AS $$
|
||||
DECLARE
|
||||
row_count BIGINT;
|
||||
BEGIN
|
||||
-- Must refresh in dependency order:
|
||||
-- 1. daily_sales (base view)
|
||||
-- 2. sku_velocity (depends on daily_sales)
|
||||
-- 3. product_intelligence (depends on sku_velocity)
|
||||
-- 4. others (independent)
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_daily_sales_estimates;
|
||||
SELECT COUNT(*) INTO row_count FROM mv_daily_sales_estimates;
|
||||
view_name := 'mv_daily_sales_estimates';
|
||||
rows_affected := row_count;
|
||||
RETURN NEXT;
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_brand_market_share;
|
||||
SELECT COUNT(*) INTO row_count FROM mv_brand_market_share;
|
||||
view_name := 'mv_brand_market_share';
|
||||
rows_affected := row_count;
|
||||
RETURN NEXT;
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_sku_velocity;
|
||||
SELECT COUNT(*) INTO row_count FROM mv_sku_velocity;
|
||||
view_name := 'mv_sku_velocity';
|
||||
rows_affected := row_count;
|
||||
RETURN NEXT;
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_store_performance;
|
||||
SELECT COUNT(*) INTO row_count FROM mv_store_performance;
|
||||
view_name := 'mv_store_performance';
|
||||
rows_affected := row_count;
|
||||
RETURN NEXT;
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_category_weekly_trends;
|
||||
SELECT COUNT(*) INTO row_count FROM mv_category_weekly_trends;
|
||||
view_name := 'mv_category_weekly_trends';
|
||||
rows_affected := row_count;
|
||||
RETURN NEXT;
|
||||
|
||||
-- Product intelligence depends on sku_velocity, so refresh last
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_product_intelligence;
|
||||
SELECT COUNT(*) INTO row_count FROM mv_product_intelligence;
|
||||
view_name := 'mv_product_intelligence';
|
||||
rows_affected := row_count;
|
||||
RETURN NEXT;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
COMMENT ON FUNCTION refresh_sales_analytics_views IS
|
||||
'Refresh all sales analytics materialized views. Call hourly via scheduler.';
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- INITIAL REFRESH (populate views)
|
||||
-- ============================================================
|
||||
-- Note: Initial refresh must be non-concurrent (no unique index yet populated)
|
||||
-- Run these manually after migration:
|
||||
-- REFRESH MATERIALIZED VIEW mv_daily_sales_estimates;
|
||||
-- REFRESH MATERIALIZED VIEW mv_brand_market_share;
|
||||
-- REFRESH MATERIALIZED VIEW mv_sku_velocity;
|
||||
-- REFRESH MATERIALIZED VIEW mv_store_performance;
|
||||
-- REFRESH MATERIALIZED VIEW mv_category_weekly_trends;
|
||||
@@ -1,359 +0,0 @@
|
||||
-- Migration 122: Market Intelligence Schema
|
||||
-- Separate schema for external market data ingestion
|
||||
-- Supports product, brand, and dispensary data from third-party sources
|
||||
|
||||
-- Create dedicated schema
|
||||
CREATE SCHEMA IF NOT EXISTS market_intel;
|
||||
|
||||
-- ============================================================
|
||||
-- BRANDS: Brand/Company Intelligence
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS market_intel.brands (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Identity
|
||||
brand_name VARCHAR(255) NOT NULL,
|
||||
parent_brand VARCHAR(255),
|
||||
parent_company VARCHAR(255),
|
||||
slug VARCHAR(255),
|
||||
external_id VARCHAR(255) UNIQUE, -- objectID from source
|
||||
|
||||
-- Details
|
||||
brand_description TEXT,
|
||||
brand_logo_url TEXT,
|
||||
brand_url TEXT,
|
||||
linkedin_url TEXT,
|
||||
|
||||
-- Presence
|
||||
states JSONB DEFAULT '[]', -- Array of state names
|
||||
active_variants INTEGER DEFAULT 0,
|
||||
all_variants INTEGER DEFAULT 0,
|
||||
|
||||
-- Metadata
|
||||
source VARCHAR(50) DEFAULT 'external',
|
||||
fetched_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_name ON market_intel.brands(brand_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_parent ON market_intel.brands(parent_brand);
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_external ON market_intel.brands(external_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_states ON market_intel.brands USING GIN(states);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- DISPENSARIES: Dispensary/Store Intelligence
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS market_intel.dispensaries (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Identity
|
||||
dispensary_name VARCHAR(255) NOT NULL,
|
||||
dispensary_company_name VARCHAR(255),
|
||||
dispensary_company_id VARCHAR(255),
|
||||
slug VARCHAR(255),
|
||||
external_id VARCHAR(255) UNIQUE, -- objectID from source
|
||||
|
||||
-- Location
|
||||
street_address VARCHAR(255),
|
||||
city VARCHAR(100),
|
||||
state VARCHAR(100),
|
||||
postal_code VARCHAR(20),
|
||||
county_name VARCHAR(100),
|
||||
country_code VARCHAR(10) DEFAULT 'USA',
|
||||
full_address TEXT,
|
||||
latitude DECIMAL(10, 7),
|
||||
longitude DECIMAL(10, 7),
|
||||
timezone VARCHAR(50),
|
||||
urbanicity VARCHAR(50), -- Urban, Suburban, Rural
|
||||
|
||||
-- Contact
|
||||
phone VARCHAR(50),
|
||||
email VARCHAR(255),
|
||||
website TEXT,
|
||||
linkedin_url TEXT,
|
||||
|
||||
-- License
|
||||
license_number VARCHAR(100),
|
||||
license_type VARCHAR(100),
|
||||
|
||||
-- Store Type
|
||||
is_medical BOOLEAN DEFAULT FALSE,
|
||||
is_recreational BOOLEAN DEFAULT FALSE,
|
||||
delivery_enabled BOOLEAN DEFAULT FALSE,
|
||||
curbside_pickup BOOLEAN DEFAULT FALSE,
|
||||
instore_pickup BOOLEAN DEFAULT FALSE,
|
||||
location_type VARCHAR(50), -- RETAIL, DELIVERY, etc.
|
||||
|
||||
-- Sales Estimates
|
||||
estimated_daily_sales DECIMAL(12, 2),
|
||||
estimated_sales DECIMAL(12, 2),
|
||||
avg_daily_sales DECIMAL(12, 2),
|
||||
state_sales_bucket INTEGER,
|
||||
|
||||
-- Customer Demographics
|
||||
affluency JSONB DEFAULT '[]', -- Array of affluency segments
|
||||
age_skew JSONB DEFAULT '[]', -- Array of age brackets
|
||||
customer_segments JSONB DEFAULT '[]', -- Array of segment names
|
||||
|
||||
-- Inventory Stats
|
||||
menus_count INTEGER DEFAULT 0,
|
||||
menus_count_med INTEGER DEFAULT 0,
|
||||
menus_count_rec INTEGER DEFAULT 0,
|
||||
parent_brands JSONB DEFAULT '[]',
|
||||
brand_company_names JSONB DEFAULT '[]',
|
||||
|
||||
-- Business Info
|
||||
banner VARCHAR(255), -- Chain/banner name
|
||||
business_type VARCHAR(50), -- MSO, Independent, etc.
|
||||
pos_system VARCHAR(100),
|
||||
atm_presence BOOLEAN DEFAULT FALSE,
|
||||
tax_included BOOLEAN DEFAULT FALSE,
|
||||
|
||||
-- Ratings
|
||||
rating DECIMAL(3, 2),
|
||||
reviews_count INTEGER DEFAULT 0,
|
||||
|
||||
-- Status
|
||||
is_closed BOOLEAN DEFAULT FALSE,
|
||||
open_date TIMESTAMPTZ,
|
||||
last_updated_at TIMESTAMPTZ,
|
||||
|
||||
-- Media
|
||||
logo_url TEXT,
|
||||
cover_url TEXT,
|
||||
|
||||
-- Metadata
|
||||
source VARCHAR(50) DEFAULT 'external',
|
||||
fetched_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_name ON market_intel.dispensaries(dispensary_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON market_intel.dispensaries(state);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON market_intel.dispensaries(city);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_external ON market_intel.dispensaries(external_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_banner ON market_intel.dispensaries(banner);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_business_type ON market_intel.dispensaries(business_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_geo ON market_intel.dispensaries(latitude, longitude);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_segments ON market_intel.dispensaries USING GIN(customer_segments);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- PRODUCTS: Product/SKU Intelligence
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS market_intel.products (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Identity
|
||||
name VARCHAR(500) NOT NULL,
|
||||
brand VARCHAR(255),
|
||||
brand_id VARCHAR(255),
|
||||
brand_company_name VARCHAR(255),
|
||||
parent_brand VARCHAR(255),
|
||||
external_id VARCHAR(255) UNIQUE, -- objectID from source
|
||||
cm_id VARCHAR(100), -- Canonical menu ID
|
||||
|
||||
-- Category Hierarchy
|
||||
category_0 VARCHAR(100), -- Top level: Flower, Edibles, Vapes
|
||||
category_1 VARCHAR(255), -- Mid level: Flower > Pre-Rolls
|
||||
category_2 VARCHAR(500), -- Detailed: Flower > Pre-Rolls > Singles
|
||||
|
||||
-- Cannabis Classification
|
||||
cannabis_type VARCHAR(50), -- SATIVA, INDICA, HYBRID
|
||||
strain VARCHAR(255),
|
||||
flavor VARCHAR(255),
|
||||
pack_size VARCHAR(100),
|
||||
description TEXT,
|
||||
|
||||
-- Cannabinoids
|
||||
thc_mg DECIMAL(10, 2),
|
||||
cbd_mg DECIMAL(10, 2),
|
||||
percent_thc DECIMAL(5, 2),
|
||||
percent_cbd DECIMAL(5, 2),
|
||||
|
||||
-- Dispensary Context (denormalized for query performance)
|
||||
master_dispensary_name VARCHAR(255),
|
||||
master_dispensary_id VARCHAR(255),
|
||||
dispensary_count INTEGER DEFAULT 0, -- How many stores carry this
|
||||
d_state VARCHAR(100),
|
||||
d_city VARCHAR(100),
|
||||
d_banner VARCHAR(255),
|
||||
d_business_type VARCHAR(50),
|
||||
d_medical BOOLEAN,
|
||||
d_recreational BOOLEAN,
|
||||
|
||||
-- Customer Demographics (from dispensary)
|
||||
d_customer_segments JSONB DEFAULT '[]',
|
||||
d_age_skew JSONB DEFAULT '[]',
|
||||
d_affluency JSONB DEFAULT '[]',
|
||||
d_urbanicity VARCHAR(50),
|
||||
|
||||
-- Stock Status
|
||||
in_stock BOOLEAN DEFAULT TRUE,
|
||||
last_seen_at DATE,
|
||||
last_seen_at_ts BIGINT,
|
||||
|
||||
-- Media
|
||||
img_url TEXT,
|
||||
product_url TEXT,
|
||||
menu_slug VARCHAR(500),
|
||||
|
||||
-- Geo
|
||||
latitude DECIMAL(10, 7),
|
||||
longitude DECIMAL(10, 7),
|
||||
|
||||
-- Metadata
|
||||
source VARCHAR(50) DEFAULT 'external',
|
||||
fetched_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_products_name ON market_intel.products(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_brand ON market_intel.products(brand);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_external ON market_intel.products(external_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_category ON market_intel.products(category_0, category_1);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_cannabis_type ON market_intel.products(cannabis_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_strain ON market_intel.products(strain);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_state ON market_intel.products(d_state);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_in_stock ON market_intel.products(in_stock);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_dispensary_count ON market_intel.products(dispensary_count DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_segments ON market_intel.products USING GIN(d_customer_segments);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- PRODUCT_VARIANTS: Variant-Level Data (Pricing, Stock)
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS market_intel.product_variants (
|
||||
id SERIAL PRIMARY KEY,
|
||||
product_id INTEGER REFERENCES market_intel.products(id) ON DELETE CASCADE,
|
||||
|
||||
-- Identity
|
||||
variant_id VARCHAR(255) NOT NULL,
|
||||
pos_sku VARCHAR(255),
|
||||
pos_product_id VARCHAR(255),
|
||||
pos_system VARCHAR(100),
|
||||
|
||||
-- Pricing
|
||||
actual_price DECIMAL(10, 2),
|
||||
original_price DECIMAL(10, 2),
|
||||
discounted_price DECIMAL(10, 2),
|
||||
|
||||
-- Presentation
|
||||
product_presentation VARCHAR(255), -- "100.00 mg", "3.5g", etc.
|
||||
quantity DECIMAL(10, 2),
|
||||
unit VARCHAR(50), -- mg, g, oz, each
|
||||
|
||||
-- Availability
|
||||
is_medical BOOLEAN DEFAULT FALSE,
|
||||
is_recreational BOOLEAN DEFAULT FALSE,
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
|
||||
-- Stock Intelligence
|
||||
stock_status VARCHAR(50), -- In Stock, Low Stock, Out of Stock
|
||||
stock_diff_120 DECIMAL(10, 2), -- 120-day stock change
|
||||
days_since_oos INTEGER,
|
||||
days_until_stock_out INTEGER,
|
||||
|
||||
-- Timestamps
|
||||
first_seen_at_ts BIGINT,
|
||||
first_seen_at TIMESTAMPTZ,
|
||||
last_seen_at DATE,
|
||||
|
||||
-- Metadata
|
||||
fetched_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
UNIQUE(product_id, variant_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_variants_product ON market_intel.product_variants(product_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_variants_sku ON market_intel.product_variants(pos_sku);
|
||||
CREATE INDEX IF NOT EXISTS idx_variants_stock_status ON market_intel.product_variants(stock_status);
|
||||
CREATE INDEX IF NOT EXISTS idx_variants_price ON market_intel.product_variants(actual_price);
|
||||
CREATE INDEX IF NOT EXISTS idx_variants_days_out ON market_intel.product_variants(days_until_stock_out);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- FETCH_LOG: Track data fetches
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS market_intel.fetch_log (
|
||||
id SERIAL PRIMARY KEY,
|
||||
fetch_type VARCHAR(50) NOT NULL, -- brands, dispensaries, products
|
||||
state_code VARCHAR(10),
|
||||
query_params JSONB,
|
||||
records_fetched INTEGER DEFAULT 0,
|
||||
records_inserted INTEGER DEFAULT 0,
|
||||
records_updated INTEGER DEFAULT 0,
|
||||
duration_ms INTEGER,
|
||||
error_message TEXT,
|
||||
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
completed_at TIMESTAMPTZ
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_fetch_log_type ON market_intel.fetch_log(fetch_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_fetch_log_state ON market_intel.fetch_log(state_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_fetch_log_started ON market_intel.fetch_log(started_at DESC);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- HELPER VIEWS
|
||||
-- ============================================================
|
||||
|
||||
-- Brand market presence summary
|
||||
CREATE OR REPLACE VIEW market_intel.v_brand_presence AS
|
||||
SELECT
|
||||
b.brand_name,
|
||||
b.parent_company,
|
||||
b.active_variants,
|
||||
b.all_variants,
|
||||
jsonb_array_length(b.states) as state_count,
|
||||
b.states,
|
||||
b.fetched_at
|
||||
FROM market_intel.brands b
|
||||
ORDER BY b.active_variants DESC;
|
||||
|
||||
-- Dispensary sales rankings by state
|
||||
CREATE OR REPLACE VIEW market_intel.v_dispensary_rankings AS
|
||||
SELECT
|
||||
d.dispensary_name,
|
||||
d.city,
|
||||
d.state,
|
||||
d.banner,
|
||||
d.business_type,
|
||||
d.estimated_daily_sales,
|
||||
d.menus_count,
|
||||
d.is_medical,
|
||||
d.is_recreational,
|
||||
d.customer_segments,
|
||||
RANK() OVER (PARTITION BY d.state ORDER BY d.estimated_daily_sales DESC NULLS LAST) as state_rank
|
||||
FROM market_intel.dispensaries d
|
||||
WHERE d.is_closed = FALSE;
|
||||
|
||||
-- Product distribution by brand and state
|
||||
CREATE OR REPLACE VIEW market_intel.v_product_distribution AS
|
||||
SELECT
|
||||
p.brand,
|
||||
p.d_state as state,
|
||||
p.category_0 as category,
|
||||
COUNT(*) as product_count,
|
||||
COUNT(*) FILTER (WHERE p.in_stock) as in_stock_count,
|
||||
AVG(p.dispensary_count) as avg_store_count,
|
||||
COUNT(DISTINCT p.master_dispensary_id) as unique_stores
|
||||
FROM market_intel.products p
|
||||
GROUP BY p.brand, p.d_state, p.category_0;
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- COMMENTS
|
||||
-- ============================================================
|
||||
COMMENT ON SCHEMA market_intel IS 'Market intelligence data from external sources';
|
||||
COMMENT ON TABLE market_intel.brands IS 'Brand/company data with multi-state presence';
|
||||
COMMENT ON TABLE market_intel.dispensaries IS 'Dispensary data with sales estimates and demographics';
|
||||
COMMENT ON TABLE market_intel.products IS 'Product/SKU data with cannabinoid and category info';
|
||||
COMMENT ON TABLE market_intel.product_variants IS 'Variant-level pricing and stock data';
|
||||
COMMENT ON TABLE market_intel.fetch_log IS 'Log of data fetches for monitoring';
|
||||
@@ -1,159 +0,0 @@
|
||||
-- Migration 123: Extract unmapped fields from provider_data
|
||||
-- These fields exist in our crawl payloads but weren't being stored in columns
|
||||
|
||||
-- ============================================================
|
||||
-- ADD NEW COLUMNS TO store_products
|
||||
-- ============================================================
|
||||
|
||||
-- Cannabis classification (SATIVA, INDICA, HYBRID, CBD)
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabis_type VARCHAR(50);
|
||||
|
||||
-- Canonical IDs from POS systems
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_strain_id VARCHAR(100);
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_vendor_id VARCHAR(100);
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_brand_id VARCHAR(100);
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_category_id VARCHAR(100);
|
||||
|
||||
-- Lab results
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS lab_result_url TEXT;
|
||||
|
||||
-- Flavors (extracted from JSONB to text array for easier querying)
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS flavors_list TEXT[];
|
||||
|
||||
-- ============================================================
|
||||
-- BACKFILL FROM provider_data
|
||||
-- ============================================================
|
||||
|
||||
-- Backfill cannabis_type from classification
|
||||
UPDATE store_products
|
||||
SET cannabis_type = CASE
|
||||
WHEN provider_data->>'classification' IN ('HYBRID', 'H') THEN 'HYBRID'
|
||||
WHEN provider_data->>'classification' IN ('INDICA', 'I') THEN 'INDICA'
|
||||
WHEN provider_data->>'classification' IN ('SATIVA', 'S') THEN 'SATIVA'
|
||||
WHEN provider_data->>'classification' = 'I/S' THEN 'INDICA_DOMINANT'
|
||||
WHEN provider_data->>'classification' = 'S/I' THEN 'SATIVA_DOMINANT'
|
||||
WHEN provider_data->>'classification' = 'CBD' THEN 'CBD'
|
||||
ELSE provider_data->>'classification'
|
||||
END
|
||||
WHERE provider_data->>'classification' IS NOT NULL
|
||||
AND cannabis_type IS NULL;
|
||||
|
||||
-- Also backfill from strain_type if cannabis_type still null
|
||||
UPDATE store_products
|
||||
SET cannabis_type = CASE
|
||||
WHEN strain_type ILIKE '%indica%hybrid%' OR strain_type ILIKE '%hybrid%indica%' THEN 'INDICA_DOMINANT'
|
||||
WHEN strain_type ILIKE '%sativa%hybrid%' OR strain_type ILIKE '%hybrid%sativa%' THEN 'SATIVA_DOMINANT'
|
||||
WHEN strain_type ILIKE '%indica%' THEN 'INDICA'
|
||||
WHEN strain_type ILIKE '%sativa%' THEN 'SATIVA'
|
||||
WHEN strain_type ILIKE '%hybrid%' THEN 'HYBRID'
|
||||
WHEN strain_type ILIKE '%cbd%' THEN 'CBD'
|
||||
ELSE NULL
|
||||
END
|
||||
WHERE strain_type IS NOT NULL
|
||||
AND cannabis_type IS NULL;
|
||||
|
||||
-- Backfill canonical IDs from POSMetaData
|
||||
UPDATE store_products
|
||||
SET
|
||||
canonical_strain_id = provider_data->'POSMetaData'->>'canonicalStrainId',
|
||||
canonical_vendor_id = provider_data->'POSMetaData'->>'canonicalVendorId',
|
||||
canonical_brand_id = provider_data->'POSMetaData'->>'canonicalBrandId',
|
||||
canonical_category_id = provider_data->'POSMetaData'->>'canonicalCategoryId'
|
||||
WHERE provider_data->'POSMetaData' IS NOT NULL
|
||||
AND canonical_strain_id IS NULL;
|
||||
|
||||
-- Backfill lab result URLs
|
||||
UPDATE store_products
|
||||
SET lab_result_url = provider_data->'POSMetaData'->>'canonicalLabResultUrl'
|
||||
WHERE provider_data->'POSMetaData'->>'canonicalLabResultUrl' IS NOT NULL
|
||||
AND lab_result_url IS NULL;
|
||||
|
||||
-- ============================================================
|
||||
-- INDEXES
|
||||
-- ============================================================
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_cannabis_type ON store_products(cannabis_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_vendor_id ON store_products(canonical_vendor_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_strain_id ON store_products(canonical_strain_id);
|
||||
|
||||
-- ============================================================
|
||||
-- ADD MSO FLAG TO DISPENSARIES
|
||||
-- ============================================================
|
||||
|
||||
-- Multi-State Operator flag (calculated from chain presence in multiple states)
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_mso BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- Update MSO flag based on chain presence in multiple states
|
||||
WITH mso_chains AS (
|
||||
SELECT chain_id
|
||||
FROM dispensaries
|
||||
WHERE chain_id IS NOT NULL
|
||||
GROUP BY chain_id
|
||||
HAVING COUNT(DISTINCT state) > 1
|
||||
)
|
||||
UPDATE dispensaries d
|
||||
SET is_mso = TRUE
|
||||
WHERE d.chain_id IN (SELECT chain_id FROM mso_chains);
|
||||
|
||||
-- Index for MSO queries
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_is_mso ON dispensaries(is_mso) WHERE is_mso = TRUE;
|
||||
|
||||
-- ============================================================
|
||||
-- PRODUCT DISTRIBUTION VIEW
|
||||
-- ============================================================
|
||||
|
||||
-- View: How many stores carry each product (by brand + canonical name)
|
||||
CREATE OR REPLACE VIEW v_product_distribution AS
|
||||
SELECT
|
||||
sp.brand_name_raw as brand,
|
||||
sp.c_name as product_canonical_name,
|
||||
COUNT(DISTINCT sp.dispensary_id) as store_count,
|
||||
COUNT(DISTINCT d.state) as state_count,
|
||||
ARRAY_AGG(DISTINCT d.state) as states,
|
||||
AVG(sp.price_rec) as avg_price,
|
||||
MIN(sp.price_rec) as min_price,
|
||||
MAX(sp.price_rec) as max_price
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
WHERE sp.c_name IS NOT NULL
|
||||
AND sp.brand_name_raw IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
GROUP BY sp.brand_name_raw, sp.c_name
|
||||
HAVING COUNT(DISTINCT sp.dispensary_id) > 1
|
||||
ORDER BY store_count DESC;
|
||||
|
||||
-- ============================================================
|
||||
-- MSO SUMMARY VIEW
|
||||
-- ============================================================
|
||||
|
||||
CREATE OR REPLACE VIEW v_mso_summary AS
|
||||
SELECT
|
||||
c.name as chain_name,
|
||||
COUNT(DISTINCT d.id) as store_count,
|
||||
COUNT(DISTINCT d.state) as state_count,
|
||||
ARRAY_AGG(DISTINCT d.state ORDER BY d.state) as states,
|
||||
SUM(d.product_count) as total_products,
|
||||
TRUE as is_mso
|
||||
FROM dispensaries d
|
||||
JOIN chains c ON c.id = d.chain_id
|
||||
WHERE d.chain_id IN (
|
||||
SELECT chain_id
|
||||
FROM dispensaries
|
||||
WHERE chain_id IS NOT NULL
|
||||
GROUP BY chain_id
|
||||
HAVING COUNT(DISTINCT state) > 1
|
||||
)
|
||||
GROUP BY c.id, c.name
|
||||
ORDER BY state_count DESC, store_count DESC;
|
||||
|
||||
-- ============================================================
|
||||
-- COMMENTS
|
||||
-- ============================================================
|
||||
|
||||
COMMENT ON COLUMN store_products.cannabis_type IS 'Normalized cannabis classification: SATIVA, INDICA, HYBRID, INDICA_DOMINANT, SATIVA_DOMINANT, CBD';
|
||||
COMMENT ON COLUMN store_products.canonical_strain_id IS 'POS system strain identifier for cross-store matching';
|
||||
COMMENT ON COLUMN store_products.canonical_vendor_id IS 'POS system vendor/supplier identifier';
|
||||
COMMENT ON COLUMN store_products.lab_result_url IS 'Link to Certificate of Analysis / lab test results';
|
||||
COMMENT ON COLUMN dispensaries.is_mso IS 'Multi-State Operator: chain operates in 2+ states';
|
||||
COMMENT ON VIEW v_product_distribution IS 'Shows how many stores carry each product for distribution analysis';
|
||||
COMMENT ON VIEW v_mso_summary IS 'Summary of multi-state operator chains';
|
||||
@@ -1,73 +0,0 @@
|
||||
-- Migration 124: Convert inventory_snapshots to TimescaleDB hypertable
|
||||
-- Requires: CREATE EXTENSION timescaledb; (run after installing TimescaleDB)
|
||||
|
||||
-- ============================================================
|
||||
-- STEP 1: Enable TimescaleDB extension
|
||||
-- ============================================================
|
||||
CREATE EXTENSION IF NOT EXISTS timescaledb;
|
||||
|
||||
-- ============================================================
|
||||
-- STEP 2: Convert to hypertable
|
||||
-- ============================================================
|
||||
-- Note: Table must have a time column and no foreign key constraints
|
||||
|
||||
-- First, drop any foreign keys if they exist
|
||||
ALTER TABLE inventory_snapshots DROP CONSTRAINT IF EXISTS inventory_snapshots_dispensary_id_fkey;
|
||||
|
||||
-- Convert to hypertable, partitioned by captured_at (1 day chunks)
|
||||
SELECT create_hypertable(
|
||||
'inventory_snapshots',
|
||||
'captured_at',
|
||||
chunk_time_interval => INTERVAL '1 day',
|
||||
if_not_exists => TRUE,
|
||||
migrate_data => TRUE
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- STEP 3: Enable compression
|
||||
-- ============================================================
|
||||
-- Compress by dispensary_id and product_id (common query patterns)
|
||||
ALTER TABLE inventory_snapshots SET (
|
||||
timescaledb.compress,
|
||||
timescaledb.compress_segmentby = 'dispensary_id, product_id',
|
||||
timescaledb.compress_orderby = 'captured_at DESC'
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- STEP 4: Compression policy (compress chunks older than 1 day)
|
||||
-- ============================================================
|
||||
SELECT add_compression_policy('inventory_snapshots', INTERVAL '1 day');
|
||||
|
||||
-- ============================================================
|
||||
-- STEP 5: Retention policy (optional - drop chunks older than 90 days)
|
||||
-- ============================================================
|
||||
-- Uncomment if you want automatic cleanup:
|
||||
-- SELECT add_retention_policy('inventory_snapshots', INTERVAL '90 days');
|
||||
|
||||
-- ============================================================
|
||||
-- STEP 6: Optimize indexes for time-series queries
|
||||
-- ============================================================
|
||||
-- TimescaleDB automatically creates time-based indexes
|
||||
-- Add composite index for common queries
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_disp_prod_time
|
||||
ON inventory_snapshots (dispensary_id, product_id, captured_at DESC);
|
||||
|
||||
-- ============================================================
|
||||
-- VERIFICATION QUERIES (run after migration)
|
||||
-- ============================================================
|
||||
-- Check hypertable status:
|
||||
-- SELECT * FROM timescaledb_information.hypertables WHERE hypertable_name = 'inventory_snapshots';
|
||||
|
||||
-- Check compression status:
|
||||
-- SELECT * FROM timescaledb_information.compression_settings WHERE hypertable_name = 'inventory_snapshots';
|
||||
|
||||
-- Check chunk sizes:
|
||||
-- SELECT chunk_name, pg_size_pretty(before_compression_total_bytes) as before,
|
||||
-- pg_size_pretty(after_compression_total_bytes) as after,
|
||||
-- round(100 - (after_compression_total_bytes::numeric / before_compression_total_bytes * 100), 1) as compression_pct
|
||||
-- FROM chunk_compression_stats('inventory_snapshots');
|
||||
|
||||
-- ============================================================
|
||||
-- COMMENTS
|
||||
-- ============================================================
|
||||
COMMENT ON TABLE inventory_snapshots IS 'TimescaleDB hypertable for inventory time-series data. Compressed after 1 day.';
|
||||
@@ -1,402 +0,0 @@
|
||||
-- Migration 125: Delta-only inventory snapshots
|
||||
-- Only store a row when something meaningful changes
|
||||
-- Revenue calculated as: effective_price × qty_sold
|
||||
|
||||
-- ============================================================
|
||||
-- ADD DELTA TRACKING COLUMNS
|
||||
-- ============================================================
|
||||
|
||||
-- Previous values (to show what changed)
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_quantity INTEGER;
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_price_rec DECIMAL(10,2);
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_price_med DECIMAL(10,2);
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_status VARCHAR(50);
|
||||
|
||||
-- Calculated deltas
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS qty_delta INTEGER; -- negative = sold, positive = restocked
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS price_delta DECIMAL(10,2);
|
||||
|
||||
-- Change type flags
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS change_type VARCHAR(50); -- 'sale', 'restock', 'price_change', 'oos', 'back_in_stock'
|
||||
|
||||
-- ============================================================
|
||||
-- INDEX FOR CHANGE TYPE QUERIES
|
||||
-- ============================================================
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_change_type ON inventory_snapshots(change_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_qty_delta ON inventory_snapshots(qty_delta) WHERE qty_delta != 0;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Latest product state (for delta comparison)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_product_latest_state AS
|
||||
SELECT DISTINCT ON (dispensary_id, product_id)
|
||||
dispensary_id,
|
||||
product_id,
|
||||
quantity_available,
|
||||
price_rec,
|
||||
price_med,
|
||||
status,
|
||||
captured_at
|
||||
FROM inventory_snapshots
|
||||
ORDER BY dispensary_id, product_id, captured_at DESC;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: Check if product state changed
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION should_capture_snapshot(
|
||||
p_dispensary_id INTEGER,
|
||||
p_product_id TEXT,
|
||||
p_quantity INTEGER,
|
||||
p_price_rec DECIMAL,
|
||||
p_price_med DECIMAL,
|
||||
p_status VARCHAR
|
||||
) RETURNS TABLE (
|
||||
should_capture BOOLEAN,
|
||||
prev_quantity INTEGER,
|
||||
prev_price_rec DECIMAL,
|
||||
prev_price_med DECIMAL,
|
||||
prev_status VARCHAR,
|
||||
qty_delta INTEGER,
|
||||
price_delta DECIMAL,
|
||||
change_type VARCHAR
|
||||
) AS $$
|
||||
DECLARE
|
||||
v_prev RECORD;
|
||||
BEGIN
|
||||
-- Get previous state
|
||||
SELECT
|
||||
ls.quantity_available,
|
||||
ls.price_rec,
|
||||
ls.price_med,
|
||||
ls.status
|
||||
INTO v_prev
|
||||
FROM v_product_latest_state ls
|
||||
WHERE ls.dispensary_id = p_dispensary_id
|
||||
AND ls.product_id = p_product_id;
|
||||
|
||||
-- First time seeing this product
|
||||
IF NOT FOUND THEN
|
||||
RETURN QUERY SELECT
|
||||
TRUE,
|
||||
NULL::INTEGER,
|
||||
NULL::DECIMAL,
|
||||
NULL::DECIMAL,
|
||||
NULL::VARCHAR,
|
||||
NULL::INTEGER,
|
||||
NULL::DECIMAL,
|
||||
'new_product'::VARCHAR;
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
-- Check for changes
|
||||
IF v_prev.quantity_available IS DISTINCT FROM p_quantity
|
||||
OR v_prev.price_rec IS DISTINCT FROM p_price_rec
|
||||
OR v_prev.price_med IS DISTINCT FROM p_price_med
|
||||
OR v_prev.status IS DISTINCT FROM p_status THEN
|
||||
|
||||
RETURN QUERY SELECT
|
||||
TRUE,
|
||||
v_prev.quantity_available,
|
||||
v_prev.price_rec,
|
||||
v_prev.price_med,
|
||||
v_prev.status,
|
||||
COALESCE(p_quantity, 0) - COALESCE(v_prev.quantity_available, 0),
|
||||
COALESCE(p_price_rec, 0) - COALESCE(v_prev.price_rec, 0),
|
||||
CASE
|
||||
WHEN COALESCE(p_quantity, 0) < COALESCE(v_prev.quantity_available, 0) THEN 'sale'
|
||||
WHEN COALESCE(p_quantity, 0) > COALESCE(v_prev.quantity_available, 0) THEN 'restock'
|
||||
WHEN p_quantity = 0 AND v_prev.quantity_available > 0 THEN 'oos'
|
||||
WHEN p_quantity > 0 AND v_prev.quantity_available = 0 THEN 'back_in_stock'
|
||||
WHEN p_price_rec IS DISTINCT FROM v_prev.price_rec THEN 'price_change'
|
||||
ELSE 'status_change'
|
||||
END;
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
-- No change
|
||||
RETURN QUERY SELECT
|
||||
FALSE,
|
||||
NULL::INTEGER,
|
||||
NULL::DECIMAL,
|
||||
NULL::DECIMAL,
|
||||
NULL::VARCHAR,
|
||||
NULL::INTEGER,
|
||||
NULL::DECIMAL,
|
||||
NULL::VARCHAR;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- REVENUE CALCULATION COLUMNS
|
||||
-- ============================================================
|
||||
-- Effective prices (sale price if on special, otherwise regular)
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS effective_price_rec DECIMAL(10,2);
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS effective_price_med DECIMAL(10,2);
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS is_on_special BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- Revenue by market type
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS revenue_rec DECIMAL(10,2);
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS revenue_med DECIMAL(10,2);
|
||||
|
||||
-- Time between snapshots (for velocity calc)
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS time_since_last_snapshot INTERVAL;
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS hours_since_last DECIMAL(10,2);
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Hourly Sales Velocity
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_hourly_sales AS
|
||||
SELECT
|
||||
dispensary_id,
|
||||
DATE(captured_at) as sale_date,
|
||||
EXTRACT(HOUR FROM captured_at) as sale_hour,
|
||||
COUNT(*) FILTER (WHERE qty_delta < 0) as transactions,
|
||||
SUM(ABS(qty_delta)) FILTER (WHERE qty_delta < 0) as units_sold,
|
||||
SUM(revenue_estimate) FILTER (WHERE qty_delta < 0) as revenue,
|
||||
COUNT(DISTINCT product_id) FILTER (WHERE qty_delta < 0) as unique_products_sold
|
||||
FROM inventory_snapshots
|
||||
WHERE change_type = 'sale'
|
||||
GROUP BY dispensary_id, DATE(captured_at), EXTRACT(HOUR FROM captured_at);
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Daily Sales by Store
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_daily_store_sales AS
|
||||
SELECT
|
||||
s.dispensary_id,
|
||||
d.name as store_name,
|
||||
d.state,
|
||||
DATE(s.captured_at) as sale_date,
|
||||
SUM(ABS(s.qty_delta)) as units_sold,
|
||||
SUM(s.revenue_estimate) as revenue,
|
||||
COUNT(*) as sale_events,
|
||||
COUNT(DISTINCT s.product_id) as unique_products
|
||||
FROM inventory_snapshots s
|
||||
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||
WHERE s.change_type = 'sale'
|
||||
GROUP BY s.dispensary_id, d.name, d.state, DATE(s.captured_at);
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Daily Sales by Brand
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_daily_brand_sales AS
|
||||
SELECT
|
||||
s.brand_name,
|
||||
d.state,
|
||||
DATE(s.captured_at) as sale_date,
|
||||
SUM(ABS(s.qty_delta)) as units_sold,
|
||||
SUM(s.revenue_estimate) as revenue,
|
||||
COUNT(DISTINCT s.dispensary_id) as stores_with_sales,
|
||||
COUNT(DISTINCT s.product_id) as unique_skus_sold
|
||||
FROM inventory_snapshots s
|
||||
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||
WHERE s.change_type = 'sale'
|
||||
AND s.brand_name IS NOT NULL
|
||||
GROUP BY s.brand_name, d.state, DATE(s.captured_at);
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Product Velocity Rankings
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_product_velocity AS
|
||||
SELECT
|
||||
s.product_id,
|
||||
s.brand_name,
|
||||
s.category,
|
||||
s.dispensary_id,
|
||||
d.name as store_name,
|
||||
d.state,
|
||||
SUM(ABS(s.qty_delta)) as units_sold_30d,
|
||||
SUM(s.revenue_estimate) as revenue_30d,
|
||||
COUNT(*) as sale_events,
|
||||
ROUND(SUM(ABS(s.qty_delta))::NUMERIC / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0), 2) as avg_daily_units,
|
||||
ROUND(SUM(s.revenue_estimate) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0), 2) as avg_daily_revenue,
|
||||
CASE
|
||||
WHEN SUM(ABS(s.qty_delta)) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0) >= 10 THEN 'hot'
|
||||
WHEN SUM(ABS(s.qty_delta)) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0) >= 3 THEN 'steady'
|
||||
WHEN SUM(ABS(s.qty_delta)) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0) >= 1 THEN 'slow'
|
||||
ELSE 'stale'
|
||||
END as velocity_tier
|
||||
FROM inventory_snapshots s
|
||||
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||
WHERE s.change_type = 'sale'
|
||||
AND s.captured_at >= NOW() - INTERVAL '30 days'
|
||||
GROUP BY s.product_id, s.brand_name, s.category, s.dispensary_id, d.name, d.state;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Busiest Hours by Store
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_busiest_hours AS
|
||||
SELECT
|
||||
dispensary_id,
|
||||
sale_hour,
|
||||
AVG(units_sold) as avg_units_per_hour,
|
||||
AVG(revenue) as avg_revenue_per_hour,
|
||||
SUM(units_sold) as total_units,
|
||||
SUM(revenue) as total_revenue,
|
||||
COUNT(*) as days_with_data,
|
||||
RANK() OVER (PARTITION BY dispensary_id ORDER BY AVG(revenue) DESC) as hour_rank
|
||||
FROM v_hourly_sales
|
||||
GROUP BY dispensary_id, sale_hour;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Promotion Effectiveness (compare sale vs non-sale prices)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_promotion_effectiveness AS
|
||||
SELECT
|
||||
s.dispensary_id,
|
||||
d.name as store_name,
|
||||
s.product_id,
|
||||
s.brand_name,
|
||||
DATE(s.captured_at) as sale_date,
|
||||
SUM(ABS(s.qty_delta)) FILTER (WHERE s.price_rec < s.prev_price_rec) as units_on_discount,
|
||||
SUM(ABS(s.qty_delta)) FILTER (WHERE s.price_rec >= COALESCE(s.prev_price_rec, s.price_rec)) as units_full_price,
|
||||
SUM(s.revenue_estimate) FILTER (WHERE s.price_rec < s.prev_price_rec) as revenue_discounted,
|
||||
SUM(s.revenue_estimate) FILTER (WHERE s.price_rec >= COALESCE(s.prev_price_rec, s.price_rec)) as revenue_full_price
|
||||
FROM inventory_snapshots s
|
||||
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||
WHERE s.change_type = 'sale'
|
||||
GROUP BY s.dispensary_id, d.name, s.product_id, s.brand_name, DATE(s.captured_at);
|
||||
|
||||
-- ============================================================
|
||||
-- COMMENTS
|
||||
-- ============================================================
|
||||
COMMENT ON COLUMN inventory_snapshots.qty_delta IS 'Quantity change: negative=sold, positive=restocked';
|
||||
COMMENT ON COLUMN inventory_snapshots.revenue_estimate IS 'Estimated revenue: ABS(qty_delta) * price_rec when qty_delta < 0';
|
||||
COMMENT ON COLUMN inventory_snapshots.change_type IS 'Type of change: sale, restock, price_change, oos, back_in_stock, new_product';
|
||||
COMMENT ON FUNCTION should_capture_snapshot IS 'Returns whether a snapshot should be captured and delta values';
|
||||
COMMENT ON VIEW v_hourly_sales IS 'Sales aggregated by hour - find busiest times';
|
||||
COMMENT ON VIEW v_daily_store_sales IS 'Daily revenue by store';
|
||||
COMMENT ON VIEW v_daily_brand_sales IS 'Daily brand performance by state';
|
||||
COMMENT ON VIEW v_product_velocity IS 'Product sales velocity rankings (hot/steady/slow/stale)';
|
||||
COMMENT ON VIEW v_busiest_hours IS 'Rank hours by sales volume per store';
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Days Until Stock Out (Predictive)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_stock_out_prediction AS
|
||||
WITH velocity AS (
|
||||
SELECT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
brand_name,
|
||||
-- Average units sold per day (last 7 days)
|
||||
ROUND(SUM(ABS(qty_delta))::NUMERIC / NULLIF(COUNT(DISTINCT DATE(captured_at)), 0), 2) as daily_velocity,
|
||||
-- Hours between sales
|
||||
AVG(hours_since_last) FILTER (WHERE qty_delta < 0) as avg_hours_between_sales
|
||||
FROM inventory_snapshots
|
||||
WHERE change_type = 'sale'
|
||||
AND captured_at >= NOW() - INTERVAL '7 days'
|
||||
GROUP BY dispensary_id, product_id, brand_name
|
||||
),
|
||||
current_stock AS (
|
||||
SELECT DISTINCT ON (dispensary_id, product_id)
|
||||
dispensary_id,
|
||||
product_id,
|
||||
quantity_available as current_qty,
|
||||
captured_at as last_seen
|
||||
FROM inventory_snapshots
|
||||
ORDER BY dispensary_id, product_id, captured_at DESC
|
||||
)
|
||||
SELECT
|
||||
cs.dispensary_id,
|
||||
d.name as store_name,
|
||||
cs.product_id,
|
||||
v.brand_name,
|
||||
cs.current_qty,
|
||||
v.daily_velocity,
|
||||
CASE
|
||||
WHEN v.daily_velocity > 0 THEN ROUND(cs.current_qty / v.daily_velocity, 1)
|
||||
ELSE NULL
|
||||
END as days_until_stock_out,
|
||||
CASE
|
||||
WHEN v.daily_velocity > 0 AND cs.current_qty / v.daily_velocity <= 3 THEN 'critical'
|
||||
WHEN v.daily_velocity > 0 AND cs.current_qty / v.daily_velocity <= 7 THEN 'low'
|
||||
WHEN v.daily_velocity > 0 AND cs.current_qty / v.daily_velocity <= 14 THEN 'moderate'
|
||||
ELSE 'healthy'
|
||||
END as stock_health,
|
||||
cs.last_seen
|
||||
FROM current_stock cs
|
||||
JOIN velocity v ON v.dispensary_id = cs.dispensary_id AND v.product_id = cs.product_id
|
||||
JOIN dispensaries d ON d.id = cs.dispensary_id
|
||||
WHERE cs.current_qty > 0
|
||||
AND v.daily_velocity > 0;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Days Since OOS (for products currently out of stock)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_days_since_oos AS
|
||||
SELECT
|
||||
s.dispensary_id,
|
||||
d.name as store_name,
|
||||
s.product_id,
|
||||
s.brand_name,
|
||||
s.captured_at as went_oos_at,
|
||||
EXTRACT(EPOCH FROM (NOW() - s.captured_at)) / 86400 as days_since_oos,
|
||||
s.prev_quantity as last_known_qty
|
||||
FROM inventory_snapshots s
|
||||
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||
WHERE s.change_type = 'oos'
|
||||
AND NOT EXISTS (
|
||||
-- No back_in_stock event after this OOS
|
||||
SELECT 1 FROM inventory_snapshots s2
|
||||
WHERE s2.dispensary_id = s.dispensary_id
|
||||
AND s2.product_id = s.product_id
|
||||
AND s2.change_type = 'back_in_stock'
|
||||
AND s2.captured_at > s.captured_at
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Brand Variant Counts (track brand growth)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_brand_variants AS
|
||||
SELECT
|
||||
sp.brand_name_raw as brand_name,
|
||||
d.state,
|
||||
COUNT(DISTINCT sp.id) as total_variants,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = TRUE) as active_variants,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = FALSE) as inactive_variants,
|
||||
COUNT(DISTINCT sp.dispensary_id) as stores_carrying,
|
||||
COUNT(DISTINCT sp.category_raw) as categories,
|
||||
MIN(sp.first_seen_at) as brand_first_seen,
|
||||
MAX(sp.last_seen_at) as brand_last_seen
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
WHERE sp.brand_name_raw IS NOT NULL
|
||||
GROUP BY sp.brand_name_raw, d.state;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Brand Growth (compare variant counts over time)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_brand_growth AS
|
||||
WITH weekly_counts AS (
|
||||
SELECT
|
||||
brand_name_raw as brand_name,
|
||||
DATE_TRUNC('week', last_seen_at) as week,
|
||||
COUNT(DISTINCT id) as variant_count
|
||||
FROM store_products
|
||||
WHERE brand_name_raw IS NOT NULL
|
||||
AND last_seen_at >= NOW() - INTERVAL '90 days'
|
||||
GROUP BY brand_name_raw, DATE_TRUNC('week', last_seen_at)
|
||||
)
|
||||
SELECT
|
||||
w1.brand_name,
|
||||
w1.week as current_week,
|
||||
w1.variant_count as current_variants,
|
||||
w2.variant_count as prev_week_variants,
|
||||
w1.variant_count - COALESCE(w2.variant_count, 0) as variant_change,
|
||||
CASE
|
||||
WHEN w2.variant_count IS NULL THEN 'new'
|
||||
WHEN w1.variant_count > w2.variant_count THEN 'growing'
|
||||
WHEN w1.variant_count < w2.variant_count THEN 'declining'
|
||||
ELSE 'stable'
|
||||
END as growth_status
|
||||
FROM weekly_counts w1
|
||||
LEFT JOIN weekly_counts w2
|
||||
ON w2.brand_name = w1.brand_name
|
||||
AND w2.week = w1.week - INTERVAL '1 week'
|
||||
ORDER BY w1.brand_name, w1.week DESC;
|
||||
|
||||
COMMENT ON VIEW v_stock_out_prediction IS 'Predict days until stock out based on velocity';
|
||||
COMMENT ON VIEW v_days_since_oos IS 'Products currently OOS and how long they have been out';
|
||||
COMMENT ON VIEW v_brand_variants IS 'Active vs inactive SKU counts per brand per state';
|
||||
COMMENT ON VIEW v_brand_growth IS 'Week-over-week brand variant growth tracking';
|
||||
@@ -1,53 +0,0 @@
|
||||
-- Migration 126: Set AZ stores to 5-minute high-frequency crawls
|
||||
-- Other states default to 60-minute (1 hour) intervals
|
||||
|
||||
-- ============================================================
|
||||
-- SET AZ STORES TO 5-MINUTE INTERVALS (with 3-min jitter)
|
||||
-- ============================================================
|
||||
-- Base interval: 5 minutes
|
||||
-- Jitter: +/- 3 minutes (so 2-8 minute effective range)
|
||||
UPDATE dispensaries
|
||||
SET
|
||||
crawl_interval_minutes = 5,
|
||||
next_crawl_at = NOW() + (RANDOM() * INTERVAL '5 minutes') -- Stagger initial crawls
|
||||
WHERE state = 'AZ'
|
||||
AND crawl_enabled = TRUE;
|
||||
|
||||
-- ============================================================
|
||||
-- SET OTHER STATES TO 60-MINUTE INTERVALS (with 3-min jitter)
|
||||
-- ============================================================
|
||||
UPDATE dispensaries
|
||||
SET
|
||||
crawl_interval_minutes = 60,
|
||||
next_crawl_at = NOW() + (RANDOM() * INTERVAL '60 minutes') -- Stagger initial crawls
|
||||
WHERE state != 'AZ'
|
||||
AND crawl_enabled = TRUE
|
||||
AND crawl_interval_minutes IS NULL;
|
||||
|
||||
-- ============================================================
|
||||
-- VERIFY RESULTS
|
||||
-- ============================================================
|
||||
-- SELECT state, crawl_interval_minutes, COUNT(*)
|
||||
-- FROM dispensaries
|
||||
-- WHERE crawl_enabled = TRUE
|
||||
-- GROUP BY state, crawl_interval_minutes
|
||||
-- ORDER BY state;
|
||||
|
||||
-- ============================================================
|
||||
-- CREATE VIEW FOR MONITORING CRAWL LOAD
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_crawl_load AS
|
||||
SELECT
|
||||
state,
|
||||
crawl_interval_minutes,
|
||||
COUNT(*) as store_count,
|
||||
-- Crawls per hour = stores * (60 / interval)
|
||||
ROUND(COUNT(*) * (60.0 / COALESCE(crawl_interval_minutes, 60))) as crawls_per_hour,
|
||||
-- Assuming 30 sec per crawl, workers needed = crawls_per_hour / 120
|
||||
ROUND(COUNT(*) * (60.0 / COALESCE(crawl_interval_minutes, 60)) / 120, 1) as workers_needed
|
||||
FROM dispensaries
|
||||
WHERE crawl_enabled = TRUE
|
||||
GROUP BY state, crawl_interval_minutes
|
||||
ORDER BY crawls_per_hour DESC;
|
||||
|
||||
COMMENT ON VIEW v_crawl_load IS 'Monitor crawl load by state and interval';
|
||||
@@ -1,164 +0,0 @@
|
||||
-- Migration 127: Fix worker task concurrency limit
|
||||
-- Problem: claim_task function checks session_task_count but never increments it
|
||||
-- Solution: Increment on claim, decrement on complete/fail/release
|
||||
|
||||
-- =============================================================================
|
||||
-- STEP 1: Set max tasks to 5 for all workers
|
||||
-- =============================================================================
|
||||
UPDATE worker_registry SET session_max_tasks = 5;
|
||||
|
||||
-- Set default to 5 for new workers
|
||||
ALTER TABLE worker_registry ALTER COLUMN session_max_tasks SET DEFAULT 5;
|
||||
|
||||
-- =============================================================================
|
||||
-- STEP 2: Reset all session_task_count to match actual active tasks
|
||||
-- =============================================================================
|
||||
UPDATE worker_registry wr SET session_task_count = (
|
||||
SELECT COUNT(*) FROM worker_tasks wt
|
||||
WHERE wt.worker_id = wr.worker_id
|
||||
AND wt.status IN ('claimed', 'running')
|
||||
);
|
||||
|
||||
-- =============================================================================
|
||||
-- STEP 3: Update claim_task function to increment session_task_count
|
||||
-- =============================================================================
|
||||
CREATE OR REPLACE FUNCTION claim_task(
|
||||
p_role VARCHAR(50),
|
||||
p_worker_id VARCHAR(100),
|
||||
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||
p_http_passed BOOLEAN DEFAULT FALSE
|
||||
) RETURNS worker_tasks AS $$
|
||||
DECLARE
|
||||
claimed_task worker_tasks;
|
||||
worker_state VARCHAR(2);
|
||||
session_valid BOOLEAN;
|
||||
session_tasks INT;
|
||||
max_tasks INT;
|
||||
BEGIN
|
||||
-- Get worker's current geo session info
|
||||
SELECT
|
||||
current_state,
|
||||
session_task_count,
|
||||
session_max_tasks,
|
||||
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
|
||||
INTO worker_state, session_tasks, max_tasks, session_valid
|
||||
FROM worker_registry
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
-- Check if worker has reached max concurrent tasks (default 5)
|
||||
IF session_tasks >= COALESCE(max_tasks, 5) THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- If no valid geo session, or session expired, worker can't claim tasks
|
||||
-- Worker must re-qualify first
|
||||
IF worker_state IS NULL OR NOT session_valid THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Claim task matching worker's state
|
||||
UPDATE worker_tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
worker_id = p_worker_id,
|
||||
claimed_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT wt.id FROM worker_tasks wt
|
||||
JOIN dispensaries d ON wt.dispensary_id = d.id
|
||||
WHERE wt.role = p_role
|
||||
AND wt.status = 'pending'
|
||||
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
|
||||
-- GEO FILTER: Task's dispensary must match worker's state
|
||||
AND d.state = worker_state
|
||||
-- Method compatibility: worker must have passed the required preflight
|
||||
AND (
|
||||
wt.method IS NULL -- No preference, any worker can claim
|
||||
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
|
||||
OR (wt.method = 'http' AND p_http_passed = TRUE)
|
||||
)
|
||||
-- Exclude stores that already have an active task
|
||||
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
|
||||
SELECT dispensary_id FROM worker_tasks
|
||||
WHERE status IN ('claimed', 'running')
|
||||
AND dispensary_id IS NOT NULL
|
||||
AND dispensary_id != wt.dispensary_id
|
||||
))
|
||||
ORDER BY wt.priority DESC, wt.created_at ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_task;
|
||||
|
||||
-- INCREMENT session_task_count if we claimed a task
|
||||
IF claimed_task.id IS NOT NULL THEN
|
||||
UPDATE worker_registry
|
||||
SET session_task_count = session_task_count + 1
|
||||
WHERE worker_id = p_worker_id;
|
||||
END IF;
|
||||
|
||||
RETURN claimed_task;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- =============================================================================
|
||||
-- STEP 4: Create trigger to decrement on task completion/failure/release
|
||||
-- =============================================================================
|
||||
CREATE OR REPLACE FUNCTION decrement_worker_task_count()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
-- Only decrement when task was assigned to a worker and is now complete/released
|
||||
IF OLD.worker_id IS NOT NULL AND OLD.status IN ('claimed', 'running') THEN
|
||||
-- Task completed/failed/released - decrement count
|
||||
IF NEW.status IN ('pending', 'completed', 'failed') OR NEW.worker_id IS NULL THEN
|
||||
UPDATE worker_registry
|
||||
SET session_task_count = GREATEST(0, session_task_count - 1)
|
||||
WHERE worker_id = OLD.worker_id;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Drop existing trigger if any
|
||||
DROP TRIGGER IF EXISTS trg_decrement_worker_task_count ON worker_tasks;
|
||||
|
||||
-- Create trigger on UPDATE (status change or worker_id cleared)
|
||||
CREATE TRIGGER trg_decrement_worker_task_count
|
||||
AFTER UPDATE ON worker_tasks
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION decrement_worker_task_count();
|
||||
|
||||
-- Also handle DELETE (completed tasks are deleted from pool)
|
||||
CREATE OR REPLACE FUNCTION decrement_worker_task_count_delete()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
IF OLD.worker_id IS NOT NULL AND OLD.status IN ('claimed', 'running') THEN
|
||||
UPDATE worker_registry
|
||||
SET session_task_count = GREATEST(0, session_task_count - 1)
|
||||
WHERE worker_id = OLD.worker_id;
|
||||
END IF;
|
||||
RETURN OLD;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
DROP TRIGGER IF EXISTS trg_decrement_worker_task_count_delete ON worker_tasks;
|
||||
|
||||
CREATE TRIGGER trg_decrement_worker_task_count_delete
|
||||
AFTER DELETE ON worker_tasks
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION decrement_worker_task_count_delete();
|
||||
|
||||
-- =============================================================================
|
||||
-- STEP 5: Verify current state
|
||||
-- =============================================================================
|
||||
SELECT
|
||||
wr.worker_id,
|
||||
wr.friendly_name,
|
||||
wr.session_task_count,
|
||||
wr.session_max_tasks,
|
||||
(SELECT COUNT(*) FROM worker_tasks wt WHERE wt.worker_id = wr.worker_id AND wt.status IN ('claimed', 'running')) as actual_count
|
||||
FROM worker_registry wr
|
||||
WHERE wr.status = 'active'
|
||||
ORDER BY wr.friendly_name;
|
||||
@@ -1,109 +0,0 @@
|
||||
-- Migration 128: Pool configuration table
|
||||
-- Controls whether workers can claim tasks from the pool
|
||||
|
||||
CREATE TABLE IF NOT EXISTS pool_config (
|
||||
id SERIAL PRIMARY KEY,
|
||||
pool_open BOOLEAN NOT NULL DEFAULT true,
|
||||
closed_reason TEXT,
|
||||
closed_at TIMESTAMPTZ,
|
||||
closed_by VARCHAR(100),
|
||||
opened_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Insert default config (pool open)
|
||||
INSERT INTO pool_config (pool_open, opened_at)
|
||||
VALUES (true, NOW())
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
-- Update claim_task function to check pool status
|
||||
CREATE OR REPLACE FUNCTION claim_task(
|
||||
p_role VARCHAR(50),
|
||||
p_worker_id VARCHAR(100),
|
||||
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||
p_http_passed BOOLEAN DEFAULT FALSE
|
||||
) RETURNS worker_tasks AS $$
|
||||
DECLARE
|
||||
claimed_task worker_tasks;
|
||||
worker_state VARCHAR(2);
|
||||
session_valid BOOLEAN;
|
||||
session_tasks INT;
|
||||
max_tasks INT;
|
||||
is_pool_open BOOLEAN;
|
||||
BEGIN
|
||||
-- Check if pool is open
|
||||
SELECT pool_open INTO is_pool_open FROM pool_config LIMIT 1;
|
||||
IF NOT COALESCE(is_pool_open, true) THEN
|
||||
RETURN NULL; -- Pool is closed, no claiming allowed
|
||||
END IF;
|
||||
|
||||
-- Get worker's current geo session info
|
||||
SELECT
|
||||
current_state,
|
||||
session_task_count,
|
||||
session_max_tasks,
|
||||
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
|
||||
INTO worker_state, session_tasks, max_tasks, session_valid
|
||||
FROM worker_registry
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
-- Check if worker has reached max concurrent tasks (default 5)
|
||||
IF session_tasks >= COALESCE(max_tasks, 5) THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- If no valid geo session, or session expired, worker can't claim tasks
|
||||
-- Worker must re-qualify first
|
||||
IF worker_state IS NULL OR NOT session_valid THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Claim task matching worker's state
|
||||
UPDATE worker_tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
worker_id = p_worker_id,
|
||||
claimed_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT wt.id FROM worker_tasks wt
|
||||
JOIN dispensaries d ON wt.dispensary_id = d.id
|
||||
WHERE wt.role = p_role
|
||||
AND wt.status = 'pending'
|
||||
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
|
||||
-- GEO FILTER: Task's dispensary must match worker's state
|
||||
AND d.state = worker_state
|
||||
-- Method compatibility: worker must have passed the required preflight
|
||||
AND (
|
||||
wt.method IS NULL -- No preference, any worker can claim
|
||||
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
|
||||
OR (wt.method = 'http' AND p_http_passed = TRUE)
|
||||
)
|
||||
-- Exclude stores that already have an active task
|
||||
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
|
||||
SELECT dispensary_id FROM worker_tasks
|
||||
WHERE status IN ('claimed', 'running')
|
||||
AND dispensary_id IS NOT NULL
|
||||
AND dispensary_id != wt.dispensary_id
|
||||
))
|
||||
ORDER BY wt.priority DESC, wt.created_at ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_task;
|
||||
|
||||
-- INCREMENT session_task_count if we claimed a task
|
||||
IF claimed_task.id IS NOT NULL THEN
|
||||
UPDATE worker_registry
|
||||
SET session_task_count = session_task_count + 1
|
||||
WHERE worker_id = p_worker_id;
|
||||
END IF;
|
||||
|
||||
RETURN claimed_task;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Verify
|
||||
SELECT 'pool_config table created' as status;
|
||||
SELECT * FROM pool_config;
|
||||
@@ -1,60 +0,0 @@
|
||||
-- Migration 129: Claim tasks for specific geo
|
||||
-- Used after worker gets IP to claim more tasks for same geo
|
||||
|
||||
-- Function: Claim up to N tasks for a SPECIFIC geo (state/city)
|
||||
-- Different from claim_tasks_batch which picks the geo with most tasks
|
||||
CREATE OR REPLACE FUNCTION claim_tasks_batch_for_geo(
|
||||
p_worker_id VARCHAR(255),
|
||||
p_max_tasks INTEGER DEFAULT 4,
|
||||
p_state_code VARCHAR(2),
|
||||
p_city VARCHAR(100) DEFAULT NULL,
|
||||
p_role VARCHAR(50) DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
task_id INTEGER,
|
||||
role VARCHAR(50),
|
||||
dispensary_id INTEGER,
|
||||
dispensary_name VARCHAR(255),
|
||||
city VARCHAR(100),
|
||||
state_code VARCHAR(2),
|
||||
platform VARCHAR(50),
|
||||
method VARCHAR(20)
|
||||
) AS $$
|
||||
BEGIN
|
||||
-- Claim up to p_max_tasks for the specified geo
|
||||
RETURN QUERY
|
||||
WITH claimed AS (
|
||||
UPDATE worker_tasks t SET
|
||||
status = 'claimed',
|
||||
worker_id = p_worker_id,
|
||||
claimed_at = NOW()
|
||||
FROM (
|
||||
SELECT t2.id
|
||||
FROM worker_tasks t2
|
||||
JOIN dispensaries d ON t2.dispensary_id = d.id
|
||||
WHERE t2.status = 'pending'
|
||||
AND d.state = p_state_code
|
||||
AND (p_city IS NULL OR d.city = p_city)
|
||||
AND (p_role IS NULL OR t2.role = p_role)
|
||||
ORDER BY t2.priority DESC, t2.created_at ASC
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT p_max_tasks
|
||||
) sub
|
||||
WHERE t.id = sub.id
|
||||
RETURNING t.id, t.role, t.dispensary_id, t.method
|
||||
)
|
||||
SELECT
|
||||
c.id as task_id,
|
||||
c.role,
|
||||
c.dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
d.city,
|
||||
d.state as state_code,
|
||||
d.platform,
|
||||
c.method
|
||||
FROM claimed c
|
||||
JOIN dispensaries d ON c.dispensary_id = d.id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Verify
|
||||
SELECT 'claim_tasks_batch_for_geo function created' as status;
|
||||
@@ -1,53 +0,0 @@
|
||||
-- Migration 130: Worker qualification badge
|
||||
-- Session-scoped badge showing worker qualification status
|
||||
|
||||
-- Add badge column to worker_registry
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS badge VARCHAR(20) DEFAULT NULL;
|
||||
|
||||
-- Add qualified_at timestamp
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS qualified_at TIMESTAMPTZ DEFAULT NULL;
|
||||
|
||||
-- Add current_session_id to link worker to their active session
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS current_session_id INTEGER DEFAULT NULL;
|
||||
|
||||
-- Badge values:
|
||||
-- 'gold' = preflight passed, actively qualified with valid session
|
||||
-- NULL = not qualified (no active session or session expired)
|
||||
|
||||
-- Function: Set worker badge to gold when qualified
|
||||
CREATE OR REPLACE FUNCTION set_worker_qualified(
|
||||
p_worker_id VARCHAR(255),
|
||||
p_session_id INTEGER
|
||||
) RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_registry
|
||||
SET badge = 'gold',
|
||||
qualified_at = NOW(),
|
||||
current_session_id = p_session_id
|
||||
WHERE worker_id = p_worker_id;
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Clear worker badge when session ends
|
||||
CREATE OR REPLACE FUNCTION clear_worker_badge(p_worker_id VARCHAR(255))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_registry
|
||||
SET badge = NULL,
|
||||
qualified_at = NULL,
|
||||
current_session_id = NULL
|
||||
WHERE worker_id = p_worker_id;
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Index for finding qualified workers
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_registry_badge
|
||||
ON worker_registry(badge) WHERE badge IS NOT NULL;
|
||||
|
||||
-- Verify
|
||||
SELECT 'worker_registry badge column added' as status;
|
||||
1784
backend/node_modules/.package-lock.json
generated
vendored
1784
backend/node_modules/.package-lock.json
generated
vendored
File diff suppressed because it is too large
Load Diff
1789
backend/package-lock.json
generated
1789
backend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -22,7 +22,6 @@
|
||||
"seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@aws-sdk/client-s3": "^3.953.0",
|
||||
"@kubernetes/client-node": "^1.4.0",
|
||||
"@types/bcryptjs": "^3.0.0",
|
||||
"axios": "^1.6.2",
|
||||
@@ -50,8 +49,6 @@
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"sharp": "^0.32.0",
|
||||
"socks-proxy-agent": "^8.0.2",
|
||||
"swagger-jsdoc": "^6.2.8",
|
||||
"swagger-ui-express": "^5.0.1",
|
||||
"user-agents": "^1.1.669",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.22.4"
|
||||
@@ -64,8 +61,6 @@
|
||||
"@types/node": "^20.10.5",
|
||||
"@types/node-cron": "^3.0.11",
|
||||
"@types/pg": "^8.15.6",
|
||||
"@types/swagger-jsdoc": "^6.0.4",
|
||||
"@types/swagger-ui-express": "^4.1.8",
|
||||
"@types/uuid": "^9.0.7",
|
||||
"tsx": "^4.7.0",
|
||||
"typescript": "^5.3.3"
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1 +1 @@
|
||||
cannaiq-menus-2.3.0.zip
|
||||
cannaiq-menus-1.6.0.zip
|
||||
@@ -1,130 +0,0 @@
|
||||
/**
|
||||
* Count Jane stores - v2: Try Algolia store search
|
||||
* Usage: npx ts-node scripts/count-jane-stores-v2.ts
|
||||
*/
|
||||
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const STATES = [
|
||||
'AZ', 'CA', 'CO', 'FL', 'IL', 'MA', 'MI', 'NV', 'NJ', 'NY', 'OH', 'PA', 'WA', 'OR'
|
||||
];
|
||||
|
||||
async function main() {
|
||||
console.log('Counting Jane stores by exploring state pages...\n');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
const allStores: Map<number, any> = new Map();
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
const type = req.resourceType();
|
||||
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
page.on('response', async (response) => {
|
||||
const url = response.url();
|
||||
const contentType = response.headers()['content-type'] || '';
|
||||
if (url.includes('iheartjane.com') && contentType.includes('json')) {
|
||||
try {
|
||||
const json = await response.json();
|
||||
// Look for stores in any response
|
||||
if (json.stores && Array.isArray(json.stores)) {
|
||||
for (const s of json.stores) {
|
||||
if (s.id) allStores.set(s.id, s);
|
||||
}
|
||||
}
|
||||
// Also check hits (Algolia format)
|
||||
if (json.hits && Array.isArray(json.hits)) {
|
||||
for (const s of json.hits) {
|
||||
if (s.id) allStores.set(s.id, s);
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
});
|
||||
|
||||
// First visit the main stores page
|
||||
console.log('Visiting main stores page...');
|
||||
await page.goto('https://www.iheartjane.com/stores', {
|
||||
waitUntil: 'networkidle0',
|
||||
timeout: 60000,
|
||||
});
|
||||
await new Promise(r => setTimeout(r, 3000));
|
||||
|
||||
// Try to scroll to load more stores
|
||||
console.log('Scrolling to load more...');
|
||||
for (let i = 0; i < 5; i++) {
|
||||
await page.evaluate(() => window.scrollBy(0, 1000));
|
||||
await new Promise(r => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
// Try clicking "Load More" if it exists
|
||||
try {
|
||||
const loadMore = await page.$('button:has-text("Load More"), [class*="load-more"]');
|
||||
if (loadMore) {
|
||||
console.log('Clicking Load More...');
|
||||
await loadMore.click();
|
||||
await new Promise(r => setTimeout(r, 3000));
|
||||
}
|
||||
} catch {}
|
||||
|
||||
// Extract stores from DOM as fallback
|
||||
const domStores = await page.evaluate(() => {
|
||||
const storeElements = document.querySelectorAll('[data-store-id], [class*="StoreCard"], [class*="store-card"]');
|
||||
return storeElements.length;
|
||||
});
|
||||
|
||||
console.log(`\nStores from DOM elements: ${domStores}`);
|
||||
|
||||
await browser.close();
|
||||
|
||||
// Count by state
|
||||
const byState: Record<string, number> = {};
|
||||
for (const store of allStores.values()) {
|
||||
const state = store.state || 'Unknown';
|
||||
byState[state] = (byState[state] || 0) + 1;
|
||||
}
|
||||
|
||||
console.log('\n=== JANE STORE COUNTS ===\n');
|
||||
console.log(`Unique stores captured: ${allStores.size}`);
|
||||
|
||||
if (allStores.size > 0) {
|
||||
console.log('\nBy State:');
|
||||
const sorted = Object.entries(byState).sort((a, b) => b[1] - a[1]);
|
||||
for (const [state, count] of sorted.slice(0, 20)) {
|
||||
console.log(` ${state}: ${count}`);
|
||||
}
|
||||
|
||||
// Check Arizona specifically
|
||||
const azStores = Array.from(allStores.values()).filter(s =>
|
||||
s.state === 'Arizona' || s.state === 'AZ'
|
||||
);
|
||||
console.log(`\nArizona stores: ${azStores.length}`);
|
||||
if (azStores.length > 0) {
|
||||
console.log('AZ stores:');
|
||||
for (const s of azStores.slice(0, 10)) {
|
||||
console.log(` - ${s.name} (ID: ${s.id}) - ${s.city}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Note about total
|
||||
console.log('\n--- Note ---');
|
||||
console.log('Jane uses server-side rendering. To get full store count,');
|
||||
console.log('you may need to check their public marketing materials or');
|
||||
console.log('iterate through known store IDs.');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,98 +0,0 @@
|
||||
/**
|
||||
* Count Jane stores by state
|
||||
* Usage: npx ts-node scripts/count-jane-stores.ts
|
||||
*/
|
||||
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
async function main() {
|
||||
console.log('Counting Jane stores...\n');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Capture store data from API
|
||||
const stores: any[] = [];
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
const type = req.resourceType();
|
||||
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
page.on('response', async (response) => {
|
||||
const url = response.url();
|
||||
if (url.includes('iheartjane.com') && url.includes('stores')) {
|
||||
try {
|
||||
const json = await response.json();
|
||||
if (json.stores && Array.isArray(json.stores)) {
|
||||
stores.push(...json.stores);
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
});
|
||||
|
||||
// Visit the store directory
|
||||
console.log('Loading Jane store directory...');
|
||||
await page.goto('https://www.iheartjane.com/stores', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000,
|
||||
});
|
||||
|
||||
// Wait for stores to load
|
||||
await new Promise(r => setTimeout(r, 5000));
|
||||
|
||||
// Also try to get store count from page content
|
||||
const pageStoreCount = await page.evaluate(() => {
|
||||
// Look for store count in page text
|
||||
const text = document.body.innerText;
|
||||
const match = text.match(/(\d+)\s*stores?/i);
|
||||
return match ? parseInt(match[1]) : null;
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
|
||||
// Count by state
|
||||
const byState: Record<string, number> = {};
|
||||
for (const store of stores) {
|
||||
const state = store.state || 'Unknown';
|
||||
byState[state] = (byState[state] || 0) + 1;
|
||||
}
|
||||
|
||||
console.log('\n=== JANE STORE COUNTS ===\n');
|
||||
console.log(`Total stores captured from API: ${stores.length}`);
|
||||
if (pageStoreCount) {
|
||||
console.log(`Page claims: ${pageStoreCount} stores`);
|
||||
}
|
||||
|
||||
console.log('\nBy State:');
|
||||
const sorted = Object.entries(byState).sort((a, b) => b[1] - a[1]);
|
||||
for (const [state, count] of sorted) {
|
||||
console.log(` ${state}: ${count}`);
|
||||
}
|
||||
|
||||
// Check Arizona specifically
|
||||
const azStores = stores.filter(s =>
|
||||
s.state === 'Arizona' || s.state === 'AZ'
|
||||
);
|
||||
console.log(`\nArizona stores: ${azStores.length}`);
|
||||
if (azStores.length > 0) {
|
||||
console.log('Sample AZ stores:');
|
||||
for (const s of azStores.slice(0, 5)) {
|
||||
console.log(` - ${s.name} (ID: ${s.id}) - ${s.city}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,184 +0,0 @@
|
||||
/**
|
||||
* Explore all Treez page URLs to find the full product catalog
|
||||
*/
|
||||
|
||||
import puppeteer, { Page } from 'puppeteer';
|
||||
|
||||
const STORE_ID = 'best';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function bypassAgeGate(page: Page): Promise<void> {
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log(' Age gate detected, bypassing...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
async function countProducts(page: Page): Promise<number> {
|
||||
return page.evaluate(() =>
|
||||
document.querySelectorAll('[class*="product_product__"]').length
|
||||
);
|
||||
}
|
||||
|
||||
async function scrollAndCount(page: Page, maxScrolls: number = 30): Promise<{ products: number; scrolls: number }> {
|
||||
let previousHeight = 0;
|
||||
let scrollCount = 0;
|
||||
let sameHeightCount = 0;
|
||||
|
||||
while (scrollCount < maxScrolls) {
|
||||
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
|
||||
if (currentHeight === previousHeight) {
|
||||
sameHeightCount++;
|
||||
if (sameHeightCount >= 3) break;
|
||||
} else {
|
||||
sameHeightCount = 0;
|
||||
}
|
||||
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1500);
|
||||
|
||||
previousHeight = currentHeight;
|
||||
scrollCount++;
|
||||
}
|
||||
|
||||
const products = await countProducts(page);
|
||||
return { products, scrolls: scrollCount };
|
||||
}
|
||||
|
||||
async function testUrl(page: Page, path: string): Promise<{ products: number; scrolls: number; error?: string }> {
|
||||
const url = `https://${STORE_ID}.treez.io${path}`;
|
||||
console.log(`\nTesting: ${url}`);
|
||||
|
||||
try {
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(2000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(1000);
|
||||
|
||||
const initialCount = await countProducts(page);
|
||||
console.log(` Initial products: ${initialCount}`);
|
||||
|
||||
if (initialCount > 0) {
|
||||
const result = await scrollAndCount(page);
|
||||
console.log(` After scroll: ${result.products} products (${result.scrolls} scrolls)`);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Check for brand/category cards instead
|
||||
const cardCount = await page.evaluate(() => {
|
||||
const selectors = [
|
||||
'[class*="brand"]',
|
||||
'[class*="Brand"]',
|
||||
'[class*="category"]',
|
||||
'[class*="Category"]',
|
||||
'[class*="card"]',
|
||||
'a[href*="/brand/"]',
|
||||
'a[href*="/category/"]',
|
||||
];
|
||||
let count = 0;
|
||||
selectors.forEach(sel => {
|
||||
count += document.querySelectorAll(sel).length;
|
||||
});
|
||||
return count;
|
||||
});
|
||||
console.log(` Cards/links found: ${cardCount}`);
|
||||
|
||||
return { products: initialCount, scrolls: 0 };
|
||||
} catch (error: any) {
|
||||
console.log(` Error: ${error.message}`);
|
||||
return { products: 0, scrolls: 0, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Exploring Treez Page URLs');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Block images to speed up
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media', 'stylesheet'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
const urlsToTest = [
|
||||
'/onlinemenu/?customerType=ADULT', // Homepage
|
||||
'/onlinemenu/brands?customerType=ADULT', // Brands page
|
||||
'/onlinemenu/shop?customerType=ADULT', // Shop page?
|
||||
'/onlinemenu/products?customerType=ADULT', // Products page?
|
||||
'/onlinemenu/menu?customerType=ADULT', // Menu page?
|
||||
'/onlinemenu/all?customerType=ADULT', // All products?
|
||||
'/onlinemenu/flower?customerType=ADULT', // Flower category
|
||||
'/onlinemenu/vapes?customerType=ADULT', // Vapes category
|
||||
'/onlinemenu/edibles?customerType=ADULT', // Edibles category
|
||||
'/onlinemenu/concentrates?customerType=ADULT', // Concentrates category
|
||||
];
|
||||
|
||||
const results: { path: string; products: number; scrolls: number }[] = [];
|
||||
|
||||
for (const path of urlsToTest) {
|
||||
const result = await testUrl(page, path);
|
||||
results.push({ path, ...result });
|
||||
}
|
||||
|
||||
// Look for navigation links on the main page
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('Checking navigation structure on homepage...');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
await page.goto(`https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 30000,
|
||||
});
|
||||
await sleep(2000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(1000);
|
||||
|
||||
const navLinks = await page.evaluate(() => {
|
||||
const links: { text: string; href: string }[] = [];
|
||||
document.querySelectorAll('a[href*="/onlinemenu/"]').forEach(el => {
|
||||
const text = el.textContent?.trim() || '';
|
||||
const href = el.getAttribute('href') || '';
|
||||
if (text && !links.some(l => l.href === href)) {
|
||||
links.push({ text: text.slice(0, 50), href });
|
||||
}
|
||||
});
|
||||
return links;
|
||||
});
|
||||
|
||||
console.log('\nNavigation links found:');
|
||||
navLinks.forEach(l => console.log(` "${l.text}" → ${l.href}`));
|
||||
|
||||
// Summary
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('Summary');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
results.sort((a, b) => b.products - a.products);
|
||||
results.forEach(r => {
|
||||
console.log(`${r.products.toString().padStart(4)} products | ${r.path}`);
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,247 +0,0 @@
|
||||
/**
|
||||
* Explore Treez site structure to find full product catalog
|
||||
*
|
||||
* Usage: npx ts-node scripts/explore-treez-structure.ts
|
||||
*/
|
||||
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
const STORE_ID = 'best';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Exploring Treez Site Structure');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
try {
|
||||
// Navigate to base menu URL
|
||||
const baseUrl = `https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`;
|
||||
console.log(`\n[1] Navigating to: ${baseUrl}`);
|
||||
await page.goto(baseUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate if present
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log('[1] Age gate detected, bypassing...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Get all navigation links
|
||||
console.log('\n[2] Extracting navigation structure...');
|
||||
const navInfo = await page.evaluate(() => {
|
||||
const links: { text: string; href: string }[] = [];
|
||||
|
||||
// Look for nav links
|
||||
document.querySelectorAll('nav a, [class*="nav"] a, [class*="menu"] a, header a').forEach(el => {
|
||||
const text = el.textContent?.trim() || '';
|
||||
const href = el.getAttribute('href') || '';
|
||||
if (text && href && !links.some(l => l.href === href)) {
|
||||
links.push({ text, href });
|
||||
}
|
||||
});
|
||||
|
||||
// Look for category tabs/buttons
|
||||
document.querySelectorAll('[class*="category"], [class*="tab"], [role="tab"]').forEach(el => {
|
||||
const text = el.textContent?.trim() || '';
|
||||
const href = el.getAttribute('href') || el.getAttribute('data-href') || '';
|
||||
if (text && !links.some(l => l.text === text)) {
|
||||
links.push({ text, href: href || `(click: ${el.className})` });
|
||||
}
|
||||
});
|
||||
|
||||
// Get current URL
|
||||
const currentUrl = window.location.href;
|
||||
|
||||
// Count products on page
|
||||
const productCount = document.querySelectorAll('[class*="product_product__"]').length;
|
||||
|
||||
return { links, currentUrl, productCount };
|
||||
});
|
||||
|
||||
console.log(`Current URL: ${navInfo.currentUrl}`);
|
||||
console.log(`Products on homepage: ${navInfo.productCount}`);
|
||||
console.log('\nNavigation links found:');
|
||||
navInfo.links.forEach(l => {
|
||||
console.log(` "${l.text}" → ${l.href}`);
|
||||
});
|
||||
|
||||
// Look for category buttons/tabs specifically
|
||||
console.log('\n[3] Looking for category navigation...');
|
||||
const categories = await page.evaluate(() => {
|
||||
const cats: { text: string; className: string; tagName: string }[] = [];
|
||||
|
||||
// Find all clickable elements that might be categories
|
||||
const selectors = [
|
||||
'[class*="CategoryNav"]',
|
||||
'[class*="category"]',
|
||||
'[class*="Category"]',
|
||||
'[class*="nav"] button',
|
||||
'[class*="tab"]',
|
||||
'[role="tablist"] *',
|
||||
'.MuiTab-root',
|
||||
'[class*="filter"]',
|
||||
];
|
||||
|
||||
selectors.forEach(sel => {
|
||||
document.querySelectorAll(sel).forEach(el => {
|
||||
const text = el.textContent?.trim() || '';
|
||||
if (text && text.length < 50 && !cats.some(c => c.text === text)) {
|
||||
cats.push({
|
||||
text,
|
||||
className: el.className?.toString().slice(0, 80) || '',
|
||||
tagName: el.tagName,
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return cats;
|
||||
});
|
||||
|
||||
console.log('Category-like elements:');
|
||||
categories.forEach(c => {
|
||||
console.log(` [${c.tagName}] "${c.text}" (class: ${c.className})`);
|
||||
});
|
||||
|
||||
// Try clicking on "Flower" or "All" if found
|
||||
console.log('\n[4] Looking for "Flower" or "All Products" link...');
|
||||
const clickTargets = ['Flower', 'All', 'All Products', 'Shop All', 'View All'];
|
||||
|
||||
for (const target of clickTargets) {
|
||||
const element = await page.evaluate((targetText) => {
|
||||
const els = Array.from(document.querySelectorAll('a, button, [role="tab"], [class*="category"]'));
|
||||
const match = els.find(el =>
|
||||
el.textContent?.trim().toLowerCase() === targetText.toLowerCase()
|
||||
);
|
||||
if (match) {
|
||||
return {
|
||||
found: true,
|
||||
text: match.textContent?.trim(),
|
||||
tag: match.tagName,
|
||||
};
|
||||
}
|
||||
return { found: false };
|
||||
}, target);
|
||||
|
||||
if (element.found) {
|
||||
console.log(`Found "${element.text}" (${element.tag}), clicking...`);
|
||||
|
||||
await page.evaluate((targetText) => {
|
||||
const els = Array.from(document.querySelectorAll('a, button, [role="tab"], [class*="category"]'));
|
||||
const match = els.find(el =>
|
||||
el.textContent?.trim().toLowerCase() === targetText.toLowerCase()
|
||||
);
|
||||
if (match) (match as HTMLElement).click();
|
||||
}, target);
|
||||
|
||||
await sleep(3000);
|
||||
|
||||
const newUrl = page.url();
|
||||
const newCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('[class*="product_product__"]').length
|
||||
);
|
||||
|
||||
console.log(` New URL: ${newUrl}`);
|
||||
console.log(` Products after click: ${newCount}`);
|
||||
|
||||
if (newCount > navInfo.productCount) {
|
||||
console.log(` ✓ Found more products! (${navInfo.productCount} → ${newCount})`);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check page height and scroll behavior
|
||||
console.log('\n[5] Checking scroll behavior on current page...');
|
||||
let previousHeight = 0;
|
||||
let scrollCount = 0;
|
||||
let previousProductCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('[class*="product_product__"]').length
|
||||
);
|
||||
|
||||
while (scrollCount < 10) {
|
||||
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
|
||||
if (currentHeight === previousHeight) {
|
||||
console.log(` Scroll ${scrollCount + 1}: No height change, stopping`);
|
||||
break;
|
||||
}
|
||||
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1500);
|
||||
|
||||
const currentProductCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('[class*="product_product__"]').length
|
||||
);
|
||||
|
||||
console.log(` Scroll ${scrollCount + 1}: height=${currentHeight}, products=${currentProductCount}`);
|
||||
|
||||
if (currentProductCount === previousProductCount && scrollCount > 2) {
|
||||
console.log(' No new products loading, stopping');
|
||||
break;
|
||||
}
|
||||
|
||||
previousHeight = currentHeight;
|
||||
previousProductCount = currentProductCount;
|
||||
scrollCount++;
|
||||
}
|
||||
|
||||
// Try direct URL patterns
|
||||
console.log('\n[6] Testing URL patterns...');
|
||||
const urlPatterns = [
|
||||
'/onlinemenu/flower?customerType=ADULT',
|
||||
'/onlinemenu/all?customerType=ADULT',
|
||||
'/onlinemenu?category=flower&customerType=ADULT',
|
||||
'/onlinemenu?view=all&customerType=ADULT',
|
||||
];
|
||||
|
||||
for (const pattern of urlPatterns) {
|
||||
const testUrl = `https://${STORE_ID}.treez.io${pattern}`;
|
||||
console.log(`\nTrying: ${testUrl}`);
|
||||
|
||||
await page.goto(testUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(2000);
|
||||
|
||||
// Bypass age gate again if needed
|
||||
const gate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (gate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
const productCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('[class*="product_product__"]').length
|
||||
);
|
||||
|
||||
console.log(` Products found: ${productCount}`);
|
||||
}
|
||||
|
||||
// Screenshot the final state
|
||||
await page.screenshot({ path: '/tmp/treez-explore.png', fullPage: true });
|
||||
console.log('\n[7] Screenshot saved to /tmp/treez-explore.png');
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('Error:', error.message);
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,138 +0,0 @@
|
||||
/**
|
||||
* Run Jane product discovery for stores in database
|
||||
* Usage: npx ts-node scripts/run-jane-product-discovery.ts [DISPENSARY_ID]
|
||||
* Example: npx ts-node scripts/run-jane-product-discovery.ts 4220
|
||||
* Or run for all Jane stores: npx ts-node scripts/run-jane-product-discovery.ts all
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { fetchProductsByStoreIdDirect } from '../src/platforms/jane';
|
||||
import { saveRawPayload } from '../src/utils/payload-storage';
|
||||
|
||||
async function main() {
|
||||
const arg = process.argv[2];
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log('Jane Product Discovery');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
});
|
||||
|
||||
try {
|
||||
// Get dispensaries to process
|
||||
let dispensaries: any[];
|
||||
|
||||
if (arg === 'all') {
|
||||
const result = await pool.query(
|
||||
`SELECT id, name, menu_url, platform_dispensary_id
|
||||
FROM dispensaries
|
||||
WHERE platform = 'jane' AND menu_url IS NOT NULL
|
||||
ORDER BY id`
|
||||
);
|
||||
dispensaries = result.rows;
|
||||
} else if (arg) {
|
||||
const result = await pool.query(
|
||||
`SELECT id, name, menu_url, platform_dispensary_id
|
||||
FROM dispensaries
|
||||
WHERE id = $1`,
|
||||
[parseInt(arg)]
|
||||
);
|
||||
dispensaries = result.rows;
|
||||
} else {
|
||||
// Default: get first Jane store
|
||||
const result = await pool.query(
|
||||
`SELECT id, name, menu_url, platform_dispensary_id
|
||||
FROM dispensaries
|
||||
WHERE platform = 'jane' AND menu_url IS NOT NULL
|
||||
ORDER BY id LIMIT 1`
|
||||
);
|
||||
dispensaries = result.rows;
|
||||
}
|
||||
|
||||
if (dispensaries.length === 0) {
|
||||
console.log('No Jane dispensaries found');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Processing ${dispensaries.length} dispensary(ies)...\n`);
|
||||
|
||||
let successCount = 0;
|
||||
let failCount = 0;
|
||||
|
||||
for (const disp of dispensaries) {
|
||||
console.log(`\n${'─'.repeat(60)}`);
|
||||
console.log(`${disp.name} (ID: ${disp.id}, Jane ID: ${disp.platform_dispensary_id})`);
|
||||
console.log('─'.repeat(60));
|
||||
|
||||
try {
|
||||
const result = await fetchProductsByStoreIdDirect(disp.platform_dispensary_id);
|
||||
|
||||
if (result.products.length === 0) {
|
||||
console.log(' ✗ No products captured');
|
||||
failCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(` ✓ Captured ${result.products.length} products`);
|
||||
|
||||
// Build payload
|
||||
const rawPayload = {
|
||||
hits: result.products.map(p => p.raw),
|
||||
store: result.store?.raw || null,
|
||||
capturedAt: new Date().toISOString(),
|
||||
platform: 'jane',
|
||||
dispensaryId: disp.id,
|
||||
storeId: disp.platform_dispensary_id,
|
||||
};
|
||||
|
||||
// Save payload
|
||||
const { id: payloadId, sizeBytes } = await saveRawPayload(
|
||||
pool,
|
||||
disp.id,
|
||||
rawPayload,
|
||||
null,
|
||||
result.products.length,
|
||||
'jane'
|
||||
);
|
||||
|
||||
console.log(` ✓ Saved payload ${payloadId} (${Math.round(sizeBytes / 1024)}KB)`);
|
||||
|
||||
// Update dispensary
|
||||
await pool.query(
|
||||
`UPDATE dispensaries
|
||||
SET stage = 'hydrating',
|
||||
last_fetch_at = NOW(),
|
||||
product_count = $2,
|
||||
consecutive_successes = consecutive_successes + 1,
|
||||
consecutive_failures = 0,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1`,
|
||||
[disp.id, result.products.length]
|
||||
);
|
||||
|
||||
console.log(` ✓ Updated dispensary (product_count: ${result.products.length})`);
|
||||
successCount++;
|
||||
|
||||
} catch (error: any) {
|
||||
console.log(` ✗ Error: ${error.message}`);
|
||||
failCount++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('RESULTS');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Success: ${successCount}`);
|
||||
console.log(`Failed: ${failCount}`);
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('Error:', error.message);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,137 +0,0 @@
|
||||
/**
|
||||
* Run Jane store discovery and insert into database
|
||||
* Usage: npx ts-node scripts/run-jane-store-discovery.ts [STATE_CODE]
|
||||
* Example: npx ts-node scripts/run-jane-store-discovery.ts AZ
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { discoverStoresByState } from '../src/platforms/jane';
|
||||
|
||||
/**
|
||||
* Generate slug from store name
|
||||
* e.g., "Hana Meds - Phoenix (REC)" -> "hana-meds-phoenix-rec"
|
||||
*/
|
||||
function generateSlug(name: string): string {
|
||||
return name
|
||||
.toLowerCase()
|
||||
.replace(/[()]/g, '') // Remove parentheses
|
||||
.replace(/[^a-z0-9\s-]/g, '') // Remove special chars
|
||||
.replace(/\s+/g, '-') // Spaces to hyphens
|
||||
.replace(/-+/g, '-') // Collapse multiple hyphens
|
||||
.replace(/^-|-$/g, ''); // Trim hyphens
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const stateCode = process.argv[2] || 'AZ';
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Jane Store Discovery - ${stateCode}`);
|
||||
console.log('='.repeat(60));
|
||||
|
||||
// Connect to database
|
||||
const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
});
|
||||
|
||||
try {
|
||||
// Test connection
|
||||
const testResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']);
|
||||
console.log(`Current Jane stores in DB: ${testResult.rows[0].count}`);
|
||||
|
||||
// Discover stores
|
||||
console.log(`\nDiscovering Jane stores in ${stateCode}...`);
|
||||
const stores = await discoverStoresByState(stateCode);
|
||||
|
||||
if (stores.length === 0) {
|
||||
console.log(`No stores found in ${stateCode}`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`\nFound ${stores.length} stores. Inserting into database...`);
|
||||
|
||||
// Insert stores
|
||||
let inserted = 0;
|
||||
let updated = 0;
|
||||
const newIds: number[] = [];
|
||||
|
||||
for (const store of stores) {
|
||||
const menuUrl = `https://www.iheartjane.com/stores/${store.storeId}/${store.urlSlug || 'menu'}`;
|
||||
const slug = generateSlug(store.name);
|
||||
|
||||
try {
|
||||
const result = await pool.query(
|
||||
`INSERT INTO dispensaries (
|
||||
name, slug, address1, city, state, zipcode,
|
||||
latitude, longitude, menu_url, menu_type, platform,
|
||||
platform_dispensary_id, is_medical, is_recreational,
|
||||
stage, created_at, updated_at
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, NOW(), NOW())
|
||||
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
|
||||
DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
slug = EXCLUDED.slug,
|
||||
address1 = EXCLUDED.address1,
|
||||
city = EXCLUDED.city,
|
||||
latitude = EXCLUDED.latitude,
|
||||
longitude = EXCLUDED.longitude,
|
||||
menu_url = EXCLUDED.menu_url,
|
||||
is_medical = EXCLUDED.is_medical,
|
||||
is_recreational = EXCLUDED.is_recreational,
|
||||
updated_at = NOW()
|
||||
RETURNING id, (xmax = 0) AS is_new`,
|
||||
[
|
||||
store.name,
|
||||
slug,
|
||||
store.address,
|
||||
store.city,
|
||||
stateCode,
|
||||
store.zip,
|
||||
store.lat,
|
||||
store.long,
|
||||
menuUrl,
|
||||
'embedded', // menu_type: how it's displayed
|
||||
'jane', // platform: who provides the menu
|
||||
store.storeId,
|
||||
store.medical,
|
||||
store.recreational,
|
||||
'discovered',
|
||||
]
|
||||
);
|
||||
|
||||
if (result.rows.length > 0) {
|
||||
const { id, is_new } = result.rows[0];
|
||||
if (is_new) {
|
||||
inserted++;
|
||||
newIds.push(id);
|
||||
console.log(` + Inserted: ${store.name} (DB ID: ${id}, Jane ID: ${store.storeId})`);
|
||||
} else {
|
||||
updated++;
|
||||
console.log(` ~ Updated: ${store.name} (DB ID: ${id})`);
|
||||
}
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error(` ! Error inserting ${store.name}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('RESULTS');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Stores discovered: ${stores.length}`);
|
||||
console.log(`New stores inserted: ${inserted}`);
|
||||
console.log(`Existing stores updated: ${updated}`);
|
||||
console.log(`New dispensary IDs: ${newIds.join(', ') || '(none)'}`);
|
||||
|
||||
// Show final count
|
||||
const finalResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']);
|
||||
console.log(`\nTotal Jane stores in DB: ${finalResult.rows[0].count}`);
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('Error:', error.message);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,179 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('Loading ALL brands from https://shop.bestdispensary.com/brands');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log('Age gate detected, bypassing...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
console.log('Current URL:', page.url());
|
||||
|
||||
// Get initial brand count
|
||||
let brandCount = await page.evaluate(() => {
|
||||
const seen = new Set<string>();
|
||||
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href');
|
||||
if (href) seen.add(href);
|
||||
});
|
||||
return seen.size;
|
||||
});
|
||||
console.log(`Initial brand count: ${brandCount}`);
|
||||
|
||||
// Aggressive scrolling
|
||||
console.log('\nScrolling to load ALL brands...');
|
||||
let previousCount = 0;
|
||||
let sameCount = 0;
|
||||
|
||||
for (let i = 0; i < 50; i++) {
|
||||
// Scroll to bottom
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1000);
|
||||
|
||||
brandCount = await page.evaluate(() => {
|
||||
const seen = new Set<string>();
|
||||
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href');
|
||||
if (href) seen.add(href);
|
||||
});
|
||||
return seen.size;
|
||||
});
|
||||
|
||||
if (brandCount === previousCount) {
|
||||
sameCount++;
|
||||
if (sameCount >= 5) {
|
||||
console.log(` Scroll ${i+1}: ${brandCount} brands (stopping - no change)`);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
sameCount = 0;
|
||||
console.log(` Scroll ${i+1}: ${brandCount} brands`);
|
||||
}
|
||||
previousCount = brandCount;
|
||||
}
|
||||
|
||||
// Get all unique brands
|
||||
const brands = await page.evaluate(() => {
|
||||
const results: { name: string; href: string }[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
const normalizedHref = href.toLowerCase();
|
||||
if (seen.has(normalizedHref)) return;
|
||||
seen.add(normalizedHref);
|
||||
|
||||
// Get brand name
|
||||
let name = '';
|
||||
const heading = a.querySelector('h3, h4, h5, [class*="name"]');
|
||||
if (heading) {
|
||||
name = heading.textContent?.trim() || '';
|
||||
}
|
||||
if (!name) {
|
||||
name = a.textContent?.trim().split('\n')[0] || '';
|
||||
}
|
||||
if (!name) {
|
||||
name = href.split('/brand/')[1]?.replace(/-/g, ' ') || '';
|
||||
}
|
||||
|
||||
results.push({ name: name.slice(0, 50), href });
|
||||
});
|
||||
|
||||
return results.sort((a, b) => a.name.localeCompare(b.name));
|
||||
});
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('TOTAL BRANDS FOUND: ' + brands.length);
|
||||
console.log('='.repeat(60));
|
||||
|
||||
brands.forEach((b, i) => {
|
||||
const num = (i + 1).toString().padStart(3, ' ');
|
||||
console.log(`${num}. ${b.name} (${b.href})`);
|
||||
});
|
||||
|
||||
// Now visit each brand page and count products
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('PRODUCTS PER BRAND');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const brandProducts: { brand: string; products: number }[] = [];
|
||||
|
||||
for (let i = 0; i < brands.length; i++) {
|
||||
const brand = brands[i];
|
||||
try {
|
||||
const brandUrl = brand.href.startsWith('http')
|
||||
? brand.href
|
||||
: `https://shop.bestdispensary.com${brand.href}`;
|
||||
|
||||
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(1500);
|
||||
|
||||
// Scroll to load products
|
||||
for (let j = 0; j < 10; j++) {
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(800);
|
||||
}
|
||||
|
||||
const productCount = await page.evaluate(() => {
|
||||
const seen = new Set<string>();
|
||||
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
|
||||
const img = a.querySelector('img');
|
||||
const name = img?.getAttribute('alt') || a.textContent?.trim() || '';
|
||||
if (name) seen.add(name);
|
||||
});
|
||||
return seen.size;
|
||||
});
|
||||
|
||||
brandProducts.push({ brand: brand.name, products: productCount });
|
||||
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ${productCount} products`);
|
||||
|
||||
} catch (err: any) {
|
||||
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ERROR - ${err.message?.slice(0, 30)}`);
|
||||
brandProducts.push({ brand: brand.name, products: 0 });
|
||||
}
|
||||
}
|
||||
|
||||
// Summary
|
||||
const totalProducts = brandProducts.reduce((sum, b) => sum + b.products, 0);
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('SUMMARY');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Total brands: ${brands.length}`);
|
||||
console.log(`Total products: ${totalProducts}`);
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,92 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('Navigating to https://shop.bestdispensary.com/brands');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
// Go directly to the brands page
|
||||
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate if present
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log('Age gate detected, bypassing...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
console.log('Current URL:', page.url());
|
||||
|
||||
// Scroll to load all content
|
||||
console.log('\nScrolling to load all brands...');
|
||||
let previousHeight = 0;
|
||||
for (let i = 0; i < 20; i++) {
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1500);
|
||||
|
||||
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
if (currentHeight === previousHeight) {
|
||||
console.log(` Scroll ${i+1}: No new content`);
|
||||
break;
|
||||
}
|
||||
previousHeight = currentHeight;
|
||||
|
||||
const brandCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('a[href*="/brand/"]').length
|
||||
);
|
||||
console.log(` Scroll ${i+1}: height=${currentHeight}, brand links=${brandCount}`);
|
||||
}
|
||||
|
||||
// Get all brand links
|
||||
const brands = await page.evaluate(() => {
|
||||
const results: { name: string; href: string }[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
if (seen.has(href)) return;
|
||||
seen.add(href);
|
||||
|
||||
const name = a.textContent?.trim() || href.split('/brand/')[1] || '';
|
||||
results.push({ name, href });
|
||||
});
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log(`\nFound ${brands.length} brands:`);
|
||||
brands.forEach(b => console.log(` - ${b.name} (${b.href})`));
|
||||
|
||||
// Take screenshot
|
||||
await page.screenshot({ path: '/tmp/bestdispensary-brands.png', fullPage: true });
|
||||
console.log('\nScreenshot saved to /tmp/bestdispensary-brands.png');
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,108 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Check Load More button
|
||||
const btnInfo = await page.evaluate(() => {
|
||||
const btn = document.querySelector('button.collection__load-more');
|
||||
if (!btn) return { found: false };
|
||||
|
||||
const rect = btn.getBoundingClientRect();
|
||||
return {
|
||||
found: true,
|
||||
text: btn.textContent?.trim(),
|
||||
visible: rect.width > 0 && rect.height > 0,
|
||||
top: rect.top,
|
||||
disabled: (btn as HTMLButtonElement).disabled,
|
||||
class: btn.className,
|
||||
};
|
||||
});
|
||||
|
||||
console.log('Load More button:', btnInfo);
|
||||
|
||||
// Scroll to button and click
|
||||
console.log('\nScrolling to button and clicking...');
|
||||
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const btn = await page.$('button.collection__load-more');
|
||||
if (!btn) {
|
||||
console.log('Button not found');
|
||||
break;
|
||||
}
|
||||
|
||||
// Scroll button into view
|
||||
await page.evaluate((b) => b.scrollIntoView({ behavior: 'smooth', block: 'center' }), btn);
|
||||
await sleep(500);
|
||||
|
||||
// Check if button is still there and clickable
|
||||
const stillThere = await page.evaluate(() => {
|
||||
const b = document.querySelector('button.collection__load-more');
|
||||
return b ? b.textContent?.trim() : null;
|
||||
});
|
||||
|
||||
if (!stillThere) {
|
||||
console.log('Button disappeared - all loaded');
|
||||
break;
|
||||
}
|
||||
|
||||
// Click it
|
||||
await btn.click();
|
||||
console.log(`Click ${i+1}...`);
|
||||
await sleep(2000);
|
||||
|
||||
const count = await page.evaluate(() =>
|
||||
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').length
|
||||
);
|
||||
console.log(` Brands: ${count}`);
|
||||
}
|
||||
|
||||
// Final count
|
||||
const brands = await page.evaluate(() => {
|
||||
const list: string[] = [];
|
||||
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
|
||||
list.push(a.textContent?.trim() || '');
|
||||
});
|
||||
return list;
|
||||
});
|
||||
|
||||
console.log(`\nTotal brands: ${brands.length}`);
|
||||
console.log(brands.join(', '));
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,157 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log('Bypassing age gate...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Click "LOAD MORE" until all brands are loaded
|
||||
console.log('Loading all brands...\n');
|
||||
|
||||
let loadMoreClicks = 0;
|
||||
while (true) {
|
||||
const loadMoreBtn = await page.$('button.collection__load-more');
|
||||
if (!loadMoreBtn) {
|
||||
console.log('No more "Load More" button - all brands loaded!');
|
||||
break;
|
||||
}
|
||||
|
||||
const isVisible = await page.evaluate((btn) => {
|
||||
const rect = btn.getBoundingClientRect();
|
||||
return rect.width > 0 && rect.height > 0;
|
||||
}, loadMoreBtn);
|
||||
|
||||
if (!isVisible) {
|
||||
console.log('Load More button not visible - all brands loaded!');
|
||||
break;
|
||||
}
|
||||
|
||||
await loadMoreBtn.click();
|
||||
loadMoreClicks++;
|
||||
await sleep(1500);
|
||||
|
||||
const brandCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').length
|
||||
);
|
||||
console.log(` Click ${loadMoreClicks}: ${brandCount} brands loaded`);
|
||||
|
||||
if (loadMoreClicks > 20) break; // Safety limit
|
||||
}
|
||||
|
||||
// Get all brands
|
||||
const brands = await page.evaluate(() => {
|
||||
const results: { name: string; href: string }[] = [];
|
||||
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
const name = a.textContent?.trim() || '';
|
||||
if (name && href) {
|
||||
results.push({ name, href });
|
||||
}
|
||||
});
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log(`TOTAL BRANDS: ${brands.length}`);
|
||||
console.log('='.repeat(60));
|
||||
|
||||
// Visit each brand and count products
|
||||
console.log('\nCounting products per brand...\n');
|
||||
|
||||
const results: { brand: string; products: number }[] = [];
|
||||
|
||||
for (let i = 0; i < brands.length; i++) {
|
||||
const brand = brands[i];
|
||||
const brandUrl = `https://shop.bestdispensary.com${brand.href}`;
|
||||
|
||||
try {
|
||||
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(1000);
|
||||
|
||||
// Click load more on brand page too
|
||||
for (let j = 0; j < 10; j++) {
|
||||
const loadMore = await page.$('button.collection__load-more');
|
||||
if (!loadMore) break;
|
||||
|
||||
const isVisible = await page.evaluate((btn) => {
|
||||
const rect = btn.getBoundingClientRect();
|
||||
return rect.width > 0 && rect.height > 0;
|
||||
}, loadMore);
|
||||
|
||||
if (!isVisible) break;
|
||||
await loadMore.click();
|
||||
await sleep(1000);
|
||||
}
|
||||
|
||||
const productCount = await page.evaluate(() => {
|
||||
const seen = new Set<string>();
|
||||
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href');
|
||||
if (href) seen.add(href);
|
||||
});
|
||||
return seen.size;
|
||||
});
|
||||
|
||||
results.push({ brand: brand.name, products: productCount });
|
||||
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ${productCount} products`);
|
||||
|
||||
} catch (err: any) {
|
||||
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ERROR`);
|
||||
results.push({ brand: brand.name, products: 0 });
|
||||
}
|
||||
}
|
||||
|
||||
// Summary
|
||||
const totalProducts = results.reduce((sum, r) => sum + r.products, 0);
|
||||
const brandsWithProducts = results.filter(r => r.products > 0).length;
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('SUMMARY');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Total brands: ${brands.length}`);
|
||||
console.log(`Brands with products: ${brandsWithProducts}`);
|
||||
console.log(`Total products: ${totalProducts}`);
|
||||
|
||||
// Top brands by product count
|
||||
console.log('\nTop 20 brands by product count:');
|
||||
results
|
||||
.sort((a, b) => b.products - a.products)
|
||||
.slice(0, 20)
|
||||
.forEach((r, i) => console.log(` ${i+1}. ${r.brand}: ${r.products}`));
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,108 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Try clicking Load More multiple times with JS
|
||||
console.log('Loading all brands...');
|
||||
for (let i = 0; i < 15; i++) {
|
||||
const clicked = await page.evaluate(() => {
|
||||
const btn = document.querySelector('button.collection__load-more') as HTMLButtonElement;
|
||||
if (btn) { btn.click(); return true; }
|
||||
return false;
|
||||
});
|
||||
if (!clicked) break;
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Get all brands
|
||||
const brands = await page.evaluate(() => {
|
||||
const list: { name: string; href: string }[] = [];
|
||||
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
|
||||
list.push({
|
||||
name: a.textContent?.trim() || '',
|
||||
href: a.getAttribute('href') || '',
|
||||
});
|
||||
});
|
||||
return list;
|
||||
});
|
||||
|
||||
console.log('Total brands found: ' + brands.length + '\n');
|
||||
console.log('PRODUCTS PER BRAND');
|
||||
console.log('==================\n');
|
||||
|
||||
const results: { brand: string; products: number }[] = [];
|
||||
|
||||
for (let i = 0; i < brands.length; i++) {
|
||||
const brand = brands[i];
|
||||
const url = 'https://shop.bestdispensary.com' + brand.href;
|
||||
|
||||
try {
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(1000);
|
||||
|
||||
// Click load more on brand page
|
||||
for (let j = 0; j < 20; j++) {
|
||||
const clicked = await page.evaluate(() => {
|
||||
const btn = document.querySelector('button.collection__load-more') as HTMLButtonElement;
|
||||
if (btn) { btn.click(); return true; }
|
||||
return false;
|
||||
});
|
||||
if (!clicked) break;
|
||||
await sleep(1000);
|
||||
}
|
||||
|
||||
const productCount = await page.evaluate(() => {
|
||||
const seen = new Set<string>();
|
||||
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href');
|
||||
if (href) seen.add(href);
|
||||
});
|
||||
return seen.size;
|
||||
});
|
||||
|
||||
results.push({ brand: brand.name, products: productCount });
|
||||
const num = (i + 1).toString().padStart(2, ' ');
|
||||
console.log(num + '. ' + brand.name + ': ' + productCount);
|
||||
|
||||
} catch (err) {
|
||||
results.push({ brand: brand.name, products: 0 });
|
||||
const num = (i + 1).toString().padStart(2, ' ');
|
||||
console.log(num + '. ' + brand.name + ': ERROR');
|
||||
}
|
||||
}
|
||||
|
||||
// Summary
|
||||
const total = results.reduce((s, r) => s + r.products, 0);
|
||||
console.log('\n==================');
|
||||
console.log('TOTAL: ' + brands.length + ' brands, ' + total + ' products');
|
||||
console.log('==================');
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,130 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Use the selector hint: /html/body/main/section
|
||||
console.log('Looking at main > section structure...\n');
|
||||
|
||||
const sectionInfo = await page.evaluate(() => {
|
||||
const main = document.querySelector('main');
|
||||
if (!main) return { error: 'No main element' };
|
||||
|
||||
const sections = main.querySelectorAll('section');
|
||||
const results: any[] = [];
|
||||
|
||||
sections.forEach((section, i) => {
|
||||
const children = section.children;
|
||||
const childInfo: string[] = [];
|
||||
|
||||
for (let j = 0; j < Math.min(children.length, 10); j++) {
|
||||
const child = children[j];
|
||||
childInfo.push(child.tagName + '.' + (child.className?.slice(0, 30) || ''));
|
||||
}
|
||||
|
||||
results.push({
|
||||
index: i,
|
||||
class: section.className?.slice(0, 50),
|
||||
childCount: children.length,
|
||||
sampleChildren: childInfo,
|
||||
});
|
||||
});
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log('Sections in main:');
|
||||
console.log(JSON.stringify(sectionInfo, null, 2));
|
||||
|
||||
// Look for brand cards within the section
|
||||
console.log('\nLooking for brand cards in main > section...');
|
||||
|
||||
const brandCards = await page.evaluate(() => {
|
||||
const section = document.querySelector('main > section');
|
||||
if (!section) return [];
|
||||
|
||||
// Get all child elements that might be brand cards
|
||||
const cards: { tag: string; text: string; href: string }[] = [];
|
||||
|
||||
section.querySelectorAll('a').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
const text = a.textContent?.trim().slice(0, 50) || '';
|
||||
cards.push({ tag: 'a', text, href });
|
||||
});
|
||||
|
||||
return cards;
|
||||
});
|
||||
|
||||
console.log(`Found ${brandCards.length} links in section:`);
|
||||
brandCards.slice(0, 30).forEach(c => console.log(` ${c.text} -> ${c.href}`));
|
||||
|
||||
// Get the grid of brand cards
|
||||
console.log('\nLooking for grid container...');
|
||||
|
||||
const gridCards = await page.evaluate(() => {
|
||||
// Look for grid-like containers
|
||||
const grids = document.querySelectorAll('[class*="grid"], [class*="Grid"], main section > div');
|
||||
const results: any[] = [];
|
||||
|
||||
grids.forEach((grid, i) => {
|
||||
const links = grid.querySelectorAll('a[href*="/brand/"]');
|
||||
if (links.length > 5) {
|
||||
const brands: string[] = [];
|
||||
links.forEach((a: Element) => {
|
||||
const text = a.textContent?.trim().split('\n')[0] || '';
|
||||
if (text && !brands.includes(text)) brands.push(text);
|
||||
});
|
||||
results.push({
|
||||
class: grid.className?.slice(0, 40),
|
||||
brandCount: brands.length,
|
||||
brands: brands.slice(0, 50),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log('Grid containers with brands:');
|
||||
gridCards.forEach(g => {
|
||||
console.log(`\n[${g.brandCount} brands] class="${g.class}"`);
|
||||
g.brands.forEach((b: string, i: number) => console.log(` ${i+1}. ${b}`));
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,188 +0,0 @@
|
||||
/**
|
||||
* One-off script to test iHeartJane scraping
|
||||
* Mimics remote worker: Puppeteer + stealth + proxy
|
||||
*
|
||||
* Usage: npx ts-node scripts/test-iheartjane.ts
|
||||
*/
|
||||
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const TARGET_URL = 'https://theflowershopusa.com/mesa/menu/';
|
||||
const STORE_ID = 2788;
|
||||
|
||||
async function main() {
|
||||
console.log('[iHeartJane Test] Starting...');
|
||||
|
||||
// No proxy for local testing
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Intercept network requests to capture API calls
|
||||
const apiResponses: any[] = [];
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
// Block heavy resources
|
||||
const type = req.resourceType();
|
||||
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
page.on('response', async (response) => {
|
||||
const url = response.url();
|
||||
const contentType = response.headers()['content-type'] || '';
|
||||
|
||||
// Capture any JSON response from iheartjane domains
|
||||
if ((url.includes('iheartjane.com') || url.includes('algolia')) && contentType.includes('json')) {
|
||||
try {
|
||||
const json = await response.json();
|
||||
const type = url.includes('store') ? 'STORE' :
|
||||
url.includes('product') ? 'PRODUCT' :
|
||||
url.includes('algolia') ? 'ALGOLIA' : 'API';
|
||||
apiResponses.push({ type, url, data: json });
|
||||
console.log(`[${type}] ${url.substring(0, 120)}...`);
|
||||
} catch {
|
||||
// Not JSON
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
console.log(`[iHeartJane Test] Navigating to ${TARGET_URL}`);
|
||||
|
||||
try {
|
||||
await page.goto(TARGET_URL, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000,
|
||||
});
|
||||
|
||||
console.log('[iHeartJane Test] Menu page loaded, waiting for data...');
|
||||
|
||||
// Wait a bit for all API calls to complete
|
||||
await new Promise(r => setTimeout(r, 3000));
|
||||
|
||||
// Also try to get store info by visiting the store page
|
||||
console.log('[iHeartJane Test] Fetching store info...');
|
||||
const storeInfoUrl = `https://api.iheartjane.com/v1/stores/${STORE_ID}`;
|
||||
|
||||
// Try to fetch store info via page.evaluate (uses browser context)
|
||||
const storeInfo = await page.evaluate(async (storeId) => {
|
||||
try {
|
||||
const resp = await fetch(`https://api.iheartjane.com/v1/stores/${storeId}`);
|
||||
if (resp.ok) return await resp.json();
|
||||
return { error: resp.status };
|
||||
} catch (e: any) {
|
||||
return { error: e.message };
|
||||
}
|
||||
}, STORE_ID);
|
||||
|
||||
if (storeInfo && !storeInfo.error) {
|
||||
apiResponses.push({ type: 'STORE_DIRECT', url: storeInfoUrl, data: storeInfo });
|
||||
console.log('[STORE_DIRECT] Got store info via fetch');
|
||||
} else {
|
||||
console.log(`[STORE_DIRECT] Failed: ${JSON.stringify(storeInfo)}`);
|
||||
}
|
||||
|
||||
console.log('[iHeartJane Test] Processing results...');
|
||||
|
||||
// Wait for products to load
|
||||
await page.waitForSelector('[data-testid="product-card"], .product-card, [class*="ProductCard"]', {
|
||||
timeout: 30000,
|
||||
}).catch(() => console.log('[iHeartJane Test] No product cards found via selector'));
|
||||
|
||||
// Try to extract product data from the page
|
||||
const products = await page.evaluate(() => {
|
||||
// Look for product data in various places
|
||||
const results: any[] = [];
|
||||
|
||||
// Method 1: Look for __INITIAL_STATE__ or similar
|
||||
const scripts = Array.from(document.querySelectorAll('script'));
|
||||
for (const script of scripts) {
|
||||
const text = script.textContent || '';
|
||||
if (text.includes('products') && text.includes('price')) {
|
||||
try {
|
||||
// Try to find JSON object
|
||||
const match = text.match(/\{[\s\S]*"products"[\s\S]*\}/);
|
||||
if (match) {
|
||||
results.push({ source: 'script', data: match[0].substring(0, 500) });
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
// Method 2: Look for product elements in DOM
|
||||
const productElements = document.querySelectorAll('[data-testid="product-card"], .product-card, [class*="product"]');
|
||||
for (const el of Array.from(productElements).slice(0, 5)) {
|
||||
const name = el.querySelector('[class*="name"], h3, h4')?.textContent;
|
||||
const price = el.querySelector('[class*="price"]')?.textContent;
|
||||
if (name) {
|
||||
results.push({ source: 'dom', name, price });
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log('\n[iHeartJane Test] === RESULTS ===');
|
||||
console.log(`Total API responses captured: ${apiResponses.length}`);
|
||||
|
||||
// Group by type
|
||||
const byType: Record<string, any[]> = {};
|
||||
for (const r of apiResponses) {
|
||||
byType[r.type] = byType[r.type] || [];
|
||||
byType[r.type].push(r);
|
||||
}
|
||||
|
||||
for (const [type, items] of Object.entries(byType)) {
|
||||
console.log(`\n--- ${type} (${items.length} responses) ---`);
|
||||
for (const item of items) {
|
||||
console.log(`URL: ${item.url}`);
|
||||
// Show structure
|
||||
if (item.data.hits) {
|
||||
console.log(` Products: ${item.data.hits.length} hits`);
|
||||
if (item.data.hits[0]) {
|
||||
console.log(` Fields: ${Object.keys(item.data.hits[0]).join(', ')}`);
|
||||
}
|
||||
} else if (item.data.store) {
|
||||
console.log(` Store: ${JSON.stringify(item.data.store, null, 2).substring(0, 1000)}`);
|
||||
} else {
|
||||
console.log(` Keys: ${Object.keys(item.data).join(', ')}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write full data to file
|
||||
const fs = await import('fs');
|
||||
fs.writeFileSync('/tmp/iheartjane-data.json', JSON.stringify(apiResponses, null, 2));
|
||||
console.log('\n[iHeartJane Test] Full data saved to /tmp/iheartjane-data.json');
|
||||
|
||||
// Take screenshot
|
||||
await page.screenshot({ path: '/tmp/iheartjane-test.png', fullPage: false });
|
||||
console.log('[iHeartJane Test] Screenshot saved to /tmp/iheartjane-test.png');
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('[iHeartJane Test] Error:', error.message);
|
||||
await page.screenshot({ path: '/tmp/iheartjane-error.png' });
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
console.log('[iHeartJane Test] Done');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,224 +0,0 @@
|
||||
/**
|
||||
* Explore Jane API to understand data structure
|
||||
* Usage: npx ts-node scripts/test-jane-api-explore.ts
|
||||
*/
|
||||
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
async function main() {
|
||||
console.log('Exploring Jane API from browser context...\n');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Intercept network requests to find store data API calls
|
||||
const capturedResponses: Array<{ url: string; data: any }> = [];
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => req.continue());
|
||||
|
||||
page.on('response', async (response) => {
|
||||
const url = response.url();
|
||||
if (url.includes('iheartjane.com') &&
|
||||
(url.includes('/stores') || url.includes('/search') || url.includes('algolia'))) {
|
||||
try {
|
||||
const text = await response.text();
|
||||
if (text.startsWith('{') || text.startsWith('[')) {
|
||||
const data = JSON.parse(text);
|
||||
capturedResponses.push({ url, data });
|
||||
console.log(`Captured: ${url.substring(0, 100)}...`);
|
||||
}
|
||||
} catch {
|
||||
// Not JSON
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Visit Jane to establish session
|
||||
console.log('Visiting Jane stores page to capture network requests...');
|
||||
await page.goto('https://www.iheartjane.com/stores', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000,
|
||||
});
|
||||
|
||||
console.log(`\nCaptured ${capturedResponses.length} API responses`);
|
||||
|
||||
for (const resp of capturedResponses) {
|
||||
console.log(`\n--- ${resp.url.substring(0, 80)} ---`);
|
||||
const keys = Object.keys(resp.data);
|
||||
console.log('Keys:', keys);
|
||||
|
||||
// Check for stores array
|
||||
if (resp.data.stores && Array.isArray(resp.data.stores)) {
|
||||
console.log(`Stores count: ${resp.data.stores.length}`);
|
||||
const firstStore = resp.data.stores[0];
|
||||
if (firstStore) {
|
||||
console.log('First store keys:', Object.keys(firstStore));
|
||||
console.log('Sample:', JSON.stringify(firstStore, null, 2).substring(0, 500));
|
||||
}
|
||||
}
|
||||
|
||||
// Check for hits (Algolia)
|
||||
if (resp.data.hits && Array.isArray(resp.data.hits)) {
|
||||
console.log(`Hits count: ${resp.data.hits.length}`);
|
||||
const firstHit = resp.data.hits[0];
|
||||
if (firstHit) {
|
||||
console.log('First hit keys:', Object.keys(firstHit));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Look for __NEXT_DATA__ or similar embedded data
|
||||
console.log('\n--- Checking for embedded page data ---');
|
||||
const pageData = await page.evaluate(() => {
|
||||
// Check for Next.js data
|
||||
const nextData = (window as any).__NEXT_DATA__;
|
||||
if (nextData?.props?.pageProps?.stores) {
|
||||
return {
|
||||
source: '__NEXT_DATA__',
|
||||
storeCount: nextData.props.pageProps.stores.length,
|
||||
firstStore: nextData.props.pageProps.stores[0],
|
||||
};
|
||||
}
|
||||
|
||||
// Check for any global store data
|
||||
const win = window as any;
|
||||
if (win.stores) return { source: 'window.stores', data: win.stores };
|
||||
if (win.__stores) return { source: 'window.__stores', data: win.__stores };
|
||||
|
||||
return null;
|
||||
});
|
||||
|
||||
if (pageData) {
|
||||
console.log('Found embedded data:', pageData.source);
|
||||
console.log('Store count:', pageData.storeCount);
|
||||
if (pageData.firstStore) {
|
||||
console.log('First store keys:', Object.keys(pageData.firstStore));
|
||||
console.log('Sample:', JSON.stringify({
|
||||
id: pageData.firstStore.id,
|
||||
name: pageData.firstStore.name,
|
||||
city: pageData.firstStore.city,
|
||||
state: pageData.firstStore.state,
|
||||
}, null, 2));
|
||||
}
|
||||
} else {
|
||||
console.log('No embedded page data found');
|
||||
}
|
||||
|
||||
// Try alternative API endpoints from browser context
|
||||
console.log('\n--- Testing alternative API endpoints ---');
|
||||
|
||||
// Try the map endpoint
|
||||
const mapData = await page.evaluate(async () => {
|
||||
try {
|
||||
const res = await fetch('https://api.iheartjane.com/v1/stores/map?per_page=100');
|
||||
if (res.ok) return await res.json();
|
||||
} catch {}
|
||||
return null;
|
||||
});
|
||||
|
||||
if (mapData) {
|
||||
console.log('\n/v1/stores/map response:');
|
||||
console.log('Keys:', Object.keys(mapData));
|
||||
if (mapData.stores?.[0]) {
|
||||
console.log('First store keys:', Object.keys(mapData.stores[0]));
|
||||
}
|
||||
}
|
||||
|
||||
// Try index endpoint
|
||||
const indexData = await page.evaluate(async () => {
|
||||
try {
|
||||
const res = await fetch('https://api.iheartjane.com/v1/stores/index?per_page=10');
|
||||
if (res.ok) return await res.json();
|
||||
} catch {}
|
||||
return null;
|
||||
});
|
||||
|
||||
if (indexData) {
|
||||
console.log('\n/v1/stores/index response:');
|
||||
console.log('Keys:', Object.keys(indexData));
|
||||
if (indexData.stores?.[0]) {
|
||||
console.log('First store keys:', Object.keys(indexData.stores[0]));
|
||||
}
|
||||
}
|
||||
|
||||
// Try with state parameter
|
||||
const stateData = await page.evaluate(async () => {
|
||||
try {
|
||||
const res = await fetch('https://api.iheartjane.com/v1/stores?state=AZ&per_page=10');
|
||||
if (res.ok) return await res.json();
|
||||
} catch {}
|
||||
return null;
|
||||
});
|
||||
|
||||
if (stateData) {
|
||||
console.log('\n/v1/stores?state=AZ response:');
|
||||
console.log('Keys:', Object.keys(stateData));
|
||||
console.log('Stores count:', stateData.stores?.length);
|
||||
if (stateData.stores?.[0]) {
|
||||
console.log('First store keys:', Object.keys(stateData.stores[0]));
|
||||
console.log('Sample:', JSON.stringify(stateData.stores[0], null, 2).substring(0, 300));
|
||||
}
|
||||
}
|
||||
|
||||
// Try Algolia directly for stores
|
||||
console.log('\n--- Testing Algolia for stores ---');
|
||||
const algoliaStores = await page.evaluate(async () => {
|
||||
try {
|
||||
// Common Algolia search pattern
|
||||
const res = await fetch('https://search.iheartjane.com/1/indexes/stores-production/query', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-Algolia-Application-Id': 'HKXSXRD7RA',
|
||||
'X-Algolia-API-Key': 'YjZhYjQxZjU4ZTNjMTRhYzExZTk2YjU2MzliMGE4ZTE5YjJkMmZkZTI2ODllYTY2MThlMzQ3Y2QxOTFkMjI5Y3RhZ0ZpbHRlcnM9',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
query: 'Arizona',
|
||||
hitsPerPage: 20,
|
||||
}),
|
||||
});
|
||||
if (res.ok) return await res.json();
|
||||
} catch {}
|
||||
return null;
|
||||
});
|
||||
|
||||
if (algoliaStores) {
|
||||
console.log('Algolia stores-production response:');
|
||||
console.log('Keys:', Object.keys(algoliaStores));
|
||||
console.log('Hits count:', algoliaStores.hits?.length);
|
||||
if (algoliaStores.hits?.[0]) {
|
||||
console.log('First hit keys:', Object.keys(algoliaStores.hits[0]));
|
||||
console.log('Sample:', JSON.stringify(algoliaStores.hits[0], null, 2).substring(0, 500));
|
||||
}
|
||||
}
|
||||
|
||||
// Check if there's a /v2 endpoint
|
||||
const v2Data = await page.evaluate(async () => {
|
||||
try {
|
||||
const res = await fetch('https://api.iheartjane.com/v2/stores?per_page=10');
|
||||
if (res.ok) return await res.json();
|
||||
} catch {}
|
||||
return null;
|
||||
});
|
||||
|
||||
if (v2Data) {
|
||||
console.log('\n/v2/stores response:');
|
||||
console.log('Keys:', Object.keys(v2Data));
|
||||
if (v2Data.stores?.[0]) {
|
||||
console.log('First store keys:', Object.keys(v2Data.stores[0]));
|
||||
}
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
console.log('\nDone!');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,126 +0,0 @@
|
||||
/**
|
||||
* Test script for Jane platform client
|
||||
* Tests the new Jane integration with The Flower Shop Mesa
|
||||
*
|
||||
* Usage: npx ts-node scripts/test-jane-client.ts
|
||||
*/
|
||||
|
||||
import {
|
||||
startSession,
|
||||
endSession,
|
||||
fetchProductsFromUrl,
|
||||
resolveStoreFromUrl,
|
||||
} from '../src/platforms/jane';
|
||||
import { JaneNormalizer } from '../src/hydration/normalizers/jane';
|
||||
|
||||
const TEST_URL = 'https://theflowershopusa.com/mesa/menu/';
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Jane Platform Client Test');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Test URL: ${TEST_URL}`);
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// Test 1: Fetch products from URL
|
||||
console.log('[Test 1] Fetching products from menu URL...');
|
||||
const result = await fetchProductsFromUrl(TEST_URL);
|
||||
|
||||
console.log('');
|
||||
console.log('[Results]');
|
||||
console.log(` Store: ${result.store?.name || 'Not captured'}`);
|
||||
console.log(` Store ID: ${result.store?.id || 'N/A'}`);
|
||||
console.log(` Products captured: ${result.products.length}`);
|
||||
console.log(` API responses: ${result.responses.length}`);
|
||||
|
||||
if (result.store) {
|
||||
console.log('');
|
||||
console.log('[Store Info]');
|
||||
console.log(` Address: ${result.store.address}, ${result.store.city}, ${result.store.state} ${result.store.zip}`);
|
||||
console.log(` Phone: ${result.store.phone}`);
|
||||
console.log(` Coordinates: ${result.store.lat}, ${result.store.long}`);
|
||||
console.log(` Medical: ${result.store.medical}, Recreational: ${result.store.recreational}`);
|
||||
console.log(` Rating: ${result.store.rating} (${result.store.reviews_count} reviews)`);
|
||||
console.log(` Product count (store): ${result.store.product_count}`);
|
||||
}
|
||||
|
||||
if (result.products.length > 0) {
|
||||
console.log('');
|
||||
console.log('[Sample Products (first 5)]');
|
||||
for (const p of result.products.slice(0, 5)) {
|
||||
const price = p.price_gram || p.price_each || 'N/A';
|
||||
console.log(` - ${p.name} (${p.brand}) - $${price}`);
|
||||
console.log(` Kind: ${p.kind}, Category: ${p.category}, THC: ${p.percent_thc}%`);
|
||||
}
|
||||
|
||||
// Test 2: Normalize products
|
||||
console.log('');
|
||||
console.log('[Test 2] Testing normalizer...');
|
||||
const normalizer = new JaneNormalizer();
|
||||
|
||||
// Build a fake payload structure
|
||||
const fakePayload = {
|
||||
id: 'test-payload',
|
||||
dispensary_id: 9999,
|
||||
crawl_run_id: null,
|
||||
platform: 'jane',
|
||||
payload_version: 1,
|
||||
raw_json: { hits: result.products.map(p => p.raw) },
|
||||
product_count: result.products.length,
|
||||
pricing_type: null,
|
||||
crawl_mode: null,
|
||||
fetched_at: new Date(),
|
||||
processed: false,
|
||||
normalized_at: null,
|
||||
hydration_error: null,
|
||||
hydration_attempts: 0,
|
||||
created_at: new Date(),
|
||||
};
|
||||
|
||||
const normalized = normalizer.normalize(fakePayload);
|
||||
|
||||
console.log(` Products normalized: ${normalized.products.length}`);
|
||||
console.log(` Brands extracted: ${normalized.brands.length}`);
|
||||
console.log(` Categories extracted: ${normalized.categories.length}`);
|
||||
console.log(` Errors: ${normalized.errors.length}`);
|
||||
|
||||
if (normalized.products.length > 0) {
|
||||
console.log('');
|
||||
console.log('[Sample Normalized Product]');
|
||||
const np = normalized.products[0];
|
||||
console.log(` External ID: ${np.externalProductId}`);
|
||||
console.log(` Name: ${np.name}`);
|
||||
console.log(` Brand: ${np.brandName}`);
|
||||
console.log(` Category: ${np.category}`);
|
||||
console.log(` Type: ${np.type}`);
|
||||
console.log(` Strain: ${np.strainType}`);
|
||||
console.log(` THC: ${np.thcPercent}%`);
|
||||
console.log(` CBD: ${np.cbdPercent}%`);
|
||||
console.log(` Image: ${np.primaryImageUrl?.slice(0, 60)}...`);
|
||||
|
||||
const pricing = normalized.pricing.get(np.externalProductId);
|
||||
if (pricing) {
|
||||
console.log(` Price (cents): ${pricing.priceRec}`);
|
||||
console.log(` On Special: ${pricing.isOnSpecial}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log('='.repeat(60));
|
||||
console.log('TEST PASSED');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('');
|
||||
console.error('='.repeat(60));
|
||||
console.error('TEST FAILED');
|
||||
console.error('='.repeat(60));
|
||||
console.error(`Error: ${error.message}`);
|
||||
console.error(error.stack);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,50 +0,0 @@
|
||||
/**
|
||||
* Smoke test: Discover Jane stores in Arizona
|
||||
* Usage: npx ts-node scripts/test-jane-discovery-az.ts
|
||||
*/
|
||||
|
||||
import { discoverStoresByState } from '../src/platforms/jane';
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Jane Store Discovery - Arizona Smoke Test');
|
||||
console.log('='.repeat(60));
|
||||
console.log('Using local IP (no proxy)\n');
|
||||
|
||||
try {
|
||||
const stores = await discoverStoresByState('AZ');
|
||||
|
||||
console.log(`\n${'='.repeat(60)}`);
|
||||
console.log(`RESULTS: Found ${stores.length} Jane stores in Arizona`);
|
||||
console.log('='.repeat(60));
|
||||
|
||||
if (stores.length > 0) {
|
||||
console.log('\nSample stores:');
|
||||
for (const store of stores.slice(0, 10)) {
|
||||
console.log(` - ${store.name}`);
|
||||
console.log(` ID: ${store.storeId} | ${store.city}, AZ`);
|
||||
console.log(` Types: ${store.storeTypes?.join(', ') || 'unknown'}`);
|
||||
console.log(` Products: ${store.productCount || 'N/A'}`);
|
||||
console.log('');
|
||||
}
|
||||
|
||||
if (stores.length > 10) {
|
||||
console.log(` ... and ${stores.length - 10} more stores`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('SMOKE TEST PASSED');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('\n' + '='.repeat(60));
|
||||
console.error('SMOKE TEST FAILED');
|
||||
console.error('='.repeat(60));
|
||||
console.error(`Error: ${error.message}`);
|
||||
console.error(error.stack);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,55 +0,0 @@
|
||||
/**
|
||||
* Compare MED vs REC product menus for same location
|
||||
*/
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox'] });
|
||||
const page = await browser.newPage();
|
||||
|
||||
await page.goto('https://www.iheartjane.com/stores', { waitUntil: 'domcontentloaded' });
|
||||
await new Promise(r => setTimeout(r, 2000));
|
||||
|
||||
// Fetch REC products (store 3379)
|
||||
const recProducts: number[] = await page.evaluate(async () => {
|
||||
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=3379' }),
|
||||
});
|
||||
const data = await res.json();
|
||||
return data.hits?.map((h: any) => h.product_id) || [];
|
||||
});
|
||||
|
||||
// Fetch MED products (store 4540)
|
||||
const medProducts: number[] = await page.evaluate(async () => {
|
||||
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=4540' }),
|
||||
});
|
||||
const data = await res.json();
|
||||
return data.hits?.map((h: any) => h.product_id) || [];
|
||||
});
|
||||
|
||||
const recSet = new Set(recProducts);
|
||||
const medSet = new Set(medProducts);
|
||||
|
||||
const recOnly = recProducts.filter(id => !medSet.has(id)).length;
|
||||
const medOnly = medProducts.filter(id => !recSet.has(id)).length;
|
||||
const shared = recProducts.filter(id => medSet.has(id)).length;
|
||||
|
||||
console.log('\nHana Phoenix - MED vs REC comparison (100 products each):');
|
||||
console.log(' REC products fetched:', recProducts.length);
|
||||
console.log(' MED products fetched:', medProducts.length);
|
||||
console.log(' REC-only:', recOnly);
|
||||
console.log(' MED-only:', medOnly);
|
||||
console.log(' Shared:', shared);
|
||||
console.log(' Menus are:', shared === 0 ? 'COMPLETELY DIFFERENT' : shared === recProducts.length ? 'IDENTICAL' : 'PARTIALLY OVERLAPPING');
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,79 +0,0 @@
|
||||
/**
|
||||
* Find ALL differing fields between MED and REC product payloads
|
||||
*/
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox'] });
|
||||
const page = await browser.newPage();
|
||||
|
||||
await page.goto('https://www.iheartjane.com/stores', { waitUntil: 'domcontentloaded' });
|
||||
await new Promise(r => setTimeout(r, 2000));
|
||||
|
||||
// Get full product payload from REC store
|
||||
const recProduct = await page.evaluate(async () => {
|
||||
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: '', hitsPerPage: 1, filters: 'store_id=3379' }),
|
||||
});
|
||||
const data = await res.json();
|
||||
return data.hits?.[0];
|
||||
});
|
||||
|
||||
const productId = recProduct?.product_id;
|
||||
|
||||
// Get same product from MED store
|
||||
const medProduct = await page.evaluate(async (pid: number) => {
|
||||
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=4540' }),
|
||||
});
|
||||
const data = await res.json();
|
||||
return data.hits?.find((h: any) => h.product_id === pid);
|
||||
}, productId);
|
||||
|
||||
console.log('Product:', recProduct?.name, '(ID:', productId, ')\n');
|
||||
|
||||
// Get all keys
|
||||
const allKeys = new Set([...Object.keys(recProduct || {}), ...Object.keys(medProduct || {})]);
|
||||
const sortedKeys = [...allKeys].sort();
|
||||
|
||||
console.log('=== ALL KEYS IN PAYLOAD ===');
|
||||
console.log(sortedKeys.join(', '));
|
||||
|
||||
console.log('\n=== FIELDS THAT DIFFER ===');
|
||||
let diffCount = 0;
|
||||
for (const key of sortedKeys) {
|
||||
const recVal = JSON.stringify(recProduct?.[key]);
|
||||
const medVal = JSON.stringify(medProduct?.[key]);
|
||||
if (recVal !== medVal) {
|
||||
diffCount++;
|
||||
console.log(`${key}:`);
|
||||
console.log(` REC: ${recVal?.substring(0, 100)}`);
|
||||
console.log(` MED: ${medVal?.substring(0, 100)}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (diffCount === 0) {
|
||||
console.log('(none - payloads are identical)');
|
||||
}
|
||||
|
||||
// Check for limit/allowance related fields
|
||||
console.log('\n=== LIMIT-RELATED FIELDS ===');
|
||||
const limitFields = sortedKeys.filter(k =>
|
||||
k.includes('limit') || k.includes('max') || k.includes('allow') ||
|
||||
k.includes('quantity') || k.includes('cart') || k.includes('medical') ||
|
||||
k.includes('rec') || k.includes('weight')
|
||||
);
|
||||
for (const key of limitFields) {
|
||||
console.log(`${key}: REC=${JSON.stringify(recProduct?.[key])} | MED=${JSON.stringify(medProduct?.[key])}`);
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,35 +0,0 @@
|
||||
/**
|
||||
* Test script to capture and save full Jane payload
|
||||
* Usage: npx ts-node scripts/test-jane-payload.ts
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import { fetchProductsFromUrl } from '../src/platforms/jane';
|
||||
|
||||
const TEST_URL = 'https://theflowershopusa.com/mesa/menu/';
|
||||
const OUTPUT_FILE = '/tmp/jane-test-payload.json';
|
||||
|
||||
async function main() {
|
||||
console.log('Fetching Jane payload...');
|
||||
|
||||
const result = await fetchProductsFromUrl(TEST_URL);
|
||||
|
||||
// Build payload structure matching what would be saved
|
||||
const payload = {
|
||||
hits: result.products.map(p => p.raw),
|
||||
store: result.store?.raw || null,
|
||||
capturedAt: new Date().toISOString(),
|
||||
platform: 'jane',
|
||||
storeId: result.store?.id,
|
||||
productCount: result.products.length,
|
||||
responseCount: result.responses.length,
|
||||
};
|
||||
|
||||
// Save to file
|
||||
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(payload, null, 2));
|
||||
console.log(`\nPayload saved to: ${OUTPUT_FILE}`);
|
||||
console.log(`Products: ${result.products.length}`);
|
||||
console.log(`Size: ${Math.round(fs.statSync(OUTPUT_FILE).size / 1024)}KB`);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,138 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Capture ALL requests to treez.io
|
||||
const treezRequests: any[] = [];
|
||||
|
||||
page.on('request', (req) => {
|
||||
const url = req.url();
|
||||
if (url.includes('treez.io') && !url.includes('.js') && !url.includes('.css')) {
|
||||
treezRequests.push({
|
||||
url: url,
|
||||
method: req.method(),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Also intercept and capture ES API responses
|
||||
page.on('response', async (res) => {
|
||||
const url = res.url();
|
||||
if (url.includes('gapcommerceapi.com') && res.status() === 200) {
|
||||
try {
|
||||
const json = await res.json();
|
||||
const total = json.hits?.total?.value;
|
||||
const count = json.hits?.hits?.length;
|
||||
if (total || count) {
|
||||
console.log('\nES Response: total=' + total + ', returned=' + count);
|
||||
if (json.hits?.hits?.[0]?._source) {
|
||||
const src = json.hits.hits[0]._source;
|
||||
console.log('First product fields: ' + Object.keys(src).slice(0, 20).join(', '));
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Loading /shop page...\n');
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/shop', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Click load more several times
|
||||
console.log('\nClicking Load More...');
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const btn = await page.$('button.collection__load-more');
|
||||
if (!btn) break;
|
||||
await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
console.log('\n=== TREEZ API ENDPOINTS CALLED ===\n');
|
||||
const uniqueUrls = [...new Set(treezRequests.map(r => r.url.split('?')[0]))];
|
||||
uniqueUrls.forEach(url => console.log(url));
|
||||
|
||||
// Now intercept the ES response data by making a request from browser context
|
||||
console.log('\n=== FETCHING ALL PRODUCTS VIA BROWSER ===\n');
|
||||
|
||||
const allProducts = await page.evaluate(async () => {
|
||||
const apiKey = 'V3jHL9dFzi3Gj4UISM4lr38Nm0GSxcps5OBz1PbS';
|
||||
const url = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
|
||||
|
||||
const query = {
|
||||
from: 0,
|
||||
size: 1000,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
|
||||
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
|
||||
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
|
||||
]
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'x-api-key': apiKey,
|
||||
},
|
||||
body: JSON.stringify(query),
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
return {
|
||||
total: data.hits?.total?.value,
|
||||
count: data.hits?.hits?.length,
|
||||
sample: data.hits?.hits?.[0]?._source,
|
||||
allProducts: data.hits?.hits?.map((h: any) => h._source),
|
||||
};
|
||||
} catch (err: any) {
|
||||
return { error: err.message };
|
||||
}
|
||||
});
|
||||
|
||||
if (allProducts.error) {
|
||||
console.log('Error: ' + allProducts.error);
|
||||
} else {
|
||||
console.log('Total products: ' + allProducts.total);
|
||||
console.log('Returned: ' + allProducts.count);
|
||||
|
||||
if (allProducts.sample) {
|
||||
console.log('\n=== PRODUCT FIELDS ===\n');
|
||||
console.log(Object.keys(allProducts.sample).sort().join('\n'));
|
||||
|
||||
console.log('\n=== SAMPLE PRODUCT ===\n');
|
||||
console.log(JSON.stringify(allProducts.sample, null, 2));
|
||||
}
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,203 +0,0 @@
|
||||
/**
|
||||
* Extract ALL product elements and find unique products
|
||||
*/
|
||||
|
||||
import puppeteer, { Page } from 'puppeteer';
|
||||
|
||||
const STORE_ID = 'best';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function bypassAgeGate(page: Page): Promise<void> {
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Extracting ALL product elements');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await sleep(3000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(2000);
|
||||
|
||||
// Get ALL elements with product_product__ class
|
||||
console.log('\n[1] Counting all product_product__ elements...');
|
||||
|
||||
const elementAnalysis = await page.evaluate(() => {
|
||||
const all = document.querySelectorAll('[class*="product_product__"]');
|
||||
const byTag: Record<string, number> = {};
|
||||
const anchorHrefs: string[] = [];
|
||||
const imgAlts: string[] = [];
|
||||
|
||||
all.forEach(el => {
|
||||
const tag = el.tagName;
|
||||
byTag[tag] = (byTag[tag] || 0) + 1;
|
||||
|
||||
if (tag === 'A') {
|
||||
const href = el.getAttribute('href');
|
||||
if (href && href.includes('/product/')) {
|
||||
anchorHrefs.push(href);
|
||||
}
|
||||
}
|
||||
|
||||
if (tag === 'IMG') {
|
||||
const alt = el.getAttribute('alt');
|
||||
if (alt) imgAlts.push(alt);
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
total: all.length,
|
||||
byTag,
|
||||
anchorHrefs: anchorHrefs.slice(0, 20),
|
||||
uniqueAnchors: new Set(anchorHrefs).size,
|
||||
imgAlts: imgAlts.slice(0, 20),
|
||||
uniqueImgAlts: new Set(imgAlts).size,
|
||||
};
|
||||
});
|
||||
|
||||
console.log(`Total elements: ${elementAnalysis.total}`);
|
||||
console.log(`By tag:`, elementAnalysis.byTag);
|
||||
console.log(`Unique anchor hrefs: ${elementAnalysis.uniqueAnchors}`);
|
||||
console.log(`Unique image alts: ${elementAnalysis.uniqueImgAlts}`);
|
||||
console.log(`\nSample anchor hrefs:`, elementAnalysis.anchorHrefs.slice(0, 5));
|
||||
console.log(`Sample image alts:`, elementAnalysis.imgAlts.slice(0, 5));
|
||||
|
||||
// Try to extract using different approaches
|
||||
console.log('\n[2] Testing extraction approaches...');
|
||||
|
||||
const approaches = await page.evaluate(() => {
|
||||
const results: Record<string, { count: number; unique: number; sample: string[] }> = {};
|
||||
|
||||
// Approach 1: Anchor elements with product links
|
||||
const anchors = document.querySelectorAll('a[href*="/product/"]');
|
||||
const anchorNames = new Set<string>();
|
||||
anchors.forEach(a => {
|
||||
const img = a.querySelector('img');
|
||||
const name = img?.getAttribute('alt') || a.textContent?.trim().split('\n')[0] || '';
|
||||
if (name) anchorNames.add(name);
|
||||
});
|
||||
results['a[href*="/product/"]'] = {
|
||||
count: anchors.length,
|
||||
unique: anchorNames.size,
|
||||
sample: Array.from(anchorNames).slice(0, 5),
|
||||
};
|
||||
|
||||
// Approach 2: Images with alt text inside product areas
|
||||
const productImgs = document.querySelectorAll('[class*="product_product__"] img[alt]');
|
||||
const imgNames = new Set<string>();
|
||||
productImgs.forEach(img => {
|
||||
const alt = img.getAttribute('alt');
|
||||
if (alt && alt.length > 2) imgNames.add(alt);
|
||||
});
|
||||
results['[class*="product_product__"] img[alt]'] = {
|
||||
count: productImgs.length,
|
||||
unique: imgNames.size,
|
||||
sample: Array.from(imgNames).slice(0, 5),
|
||||
};
|
||||
|
||||
// Approach 3: H5 elements (product names)
|
||||
const h5s = document.querySelectorAll('h5.product_product__name__JcEk0, h5[class*="product__name"]');
|
||||
const h5Names = new Set<string>();
|
||||
h5s.forEach(h5 => {
|
||||
const text = h5.textContent?.trim();
|
||||
if (text) h5Names.add(text);
|
||||
});
|
||||
results['h5[class*="product__name"]'] = {
|
||||
count: h5s.length,
|
||||
unique: h5Names.size,
|
||||
sample: Array.from(h5Names).slice(0, 5),
|
||||
};
|
||||
|
||||
// Approach 4: Link class with product_product__
|
||||
const links = document.querySelectorAll('a.product_product__ERWtJ, a[class*="product_product__"][class*="link"]');
|
||||
const linkNames = new Set<string>();
|
||||
links.forEach(link => {
|
||||
const h5 = link.querySelector('h5');
|
||||
const img = link.querySelector('img');
|
||||
const name = h5?.textContent?.trim() || img?.getAttribute('alt') || '';
|
||||
if (name) linkNames.add(name);
|
||||
});
|
||||
results['a.product_product__ERWtJ'] = {
|
||||
count: links.length,
|
||||
unique: linkNames.size,
|
||||
sample: Array.from(linkNames).slice(0, 5),
|
||||
};
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
Object.entries(approaches).forEach(([sel, data]) => {
|
||||
console.log(`\n${sel}:`);
|
||||
console.log(` Count: ${data.count}, Unique: ${data.unique}`);
|
||||
console.log(` Sample: ${data.sample.join(', ')}`);
|
||||
});
|
||||
|
||||
// The best approach: use images with alt as the source of truth
|
||||
console.log('\n[3] Full product extraction using img[alt] approach...');
|
||||
|
||||
const products = await page.evaluate(() => {
|
||||
const seen = new Set<string>();
|
||||
const products: { name: string; href: string; price: string }[] = [];
|
||||
|
||||
// Get all product links
|
||||
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
|
||||
const img = a.querySelector('img');
|
||||
const name = img?.getAttribute('alt') || '';
|
||||
|
||||
if (!name || seen.has(name)) return;
|
||||
seen.add(name);
|
||||
|
||||
const href = a.getAttribute('href') || '';
|
||||
|
||||
// Get price from within the link or parent
|
||||
let price = '';
|
||||
const priceEl = a.querySelector('[class*="price"]');
|
||||
if (priceEl) {
|
||||
const priceMatch = priceEl.textContent?.match(/\$(\d+(?:\.\d{2})?)/);
|
||||
price = priceMatch ? priceMatch[1] : '';
|
||||
}
|
||||
|
||||
products.push({ name, href, price });
|
||||
});
|
||||
|
||||
return products;
|
||||
});
|
||||
|
||||
console.log(`Extracted ${products.length} unique products`);
|
||||
console.log('\nSample products:');
|
||||
products.slice(0, 10).forEach(p => {
|
||||
console.log(` - ${p.name} | ${p.price ? '$' + p.price : 'N/A'} | ${p.href.slice(0, 40)}...`);
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,52 +0,0 @@
|
||||
import axios from 'axios';
|
||||
|
||||
async function main() {
|
||||
const url = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
|
||||
|
||||
const query = {
|
||||
from: 0,
|
||||
size: 500,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
|
||||
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
|
||||
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
|
||||
]
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
console.log('Querying Treez Elasticsearch API...\n');
|
||||
|
||||
try {
|
||||
const response = await axios.post(url, query, {
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
|
||||
const data = response.data;
|
||||
const total = data.hits?.total?.value || data.hits?.total;
|
||||
const products = data.hits?.hits || [];
|
||||
|
||||
console.log('Total products: ' + total);
|
||||
console.log('Products returned: ' + products.length + '\n');
|
||||
|
||||
if (products.length > 0) {
|
||||
const first = products[0]._source;
|
||||
console.log('=== PRODUCT FIELDS AVAILABLE ===\n');
|
||||
console.log(Object.keys(first).sort().join('\n'));
|
||||
|
||||
console.log('\n=== SAMPLE PRODUCT ===\n');
|
||||
console.log(JSON.stringify(first, null, 2));
|
||||
}
|
||||
|
||||
} catch (err: any) {
|
||||
console.log('Error: ' + err.message);
|
||||
if (err.response) {
|
||||
console.log('Status: ' + err.response.status);
|
||||
console.log('Data: ' + JSON.stringify(err.response.data));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,97 +0,0 @@
|
||||
import axios from 'axios';
|
||||
|
||||
async function main() {
|
||||
// Test Elasticsearch API with API key
|
||||
console.log('=== ELASTICSEARCH API ===\n');
|
||||
|
||||
const esUrl = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
|
||||
const apiKey = 'V3jHL9dFzi3Gj4UISM4lr38Nm0GSxcps5OBz1PbS';
|
||||
|
||||
const query = {
|
||||
from: 0,
|
||||
size: 1000,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
|
||||
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
|
||||
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
|
||||
]
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await axios.post(esUrl, query, {
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'x-api-key': apiKey,
|
||||
'Origin': 'https://shop.bestdispensary.com',
|
||||
'Referer': 'https://shop.bestdispensary.com/',
|
||||
},
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
const data = response.data;
|
||||
const total = data.hits?.total?.value || data.hits?.total;
|
||||
const products = data.hits?.hits || [];
|
||||
|
||||
console.log('Total products: ' + total);
|
||||
console.log('Products returned: ' + products.length);
|
||||
|
||||
if (products.length > 0) {
|
||||
const first = products[0]._source;
|
||||
console.log('\n=== PRODUCT FIELDS ===\n');
|
||||
console.log(Object.keys(first).sort().join('\n'));
|
||||
|
||||
console.log('\n=== SAMPLE PRODUCT ===\n');
|
||||
console.log(JSON.stringify(first, null, 2));
|
||||
}
|
||||
|
||||
} catch (err: any) {
|
||||
console.log('Elasticsearch Error: ' + err.message);
|
||||
if (err.response) {
|
||||
console.log('Status: ' + err.response.status);
|
||||
}
|
||||
}
|
||||
|
||||
// Test Treez Headless API
|
||||
console.log('\n\n=== TREEZ HEADLESS API ===\n');
|
||||
|
||||
const treezUrl = 'https://headless.treez.io/v2.0/dispensary/best/ecommerce/discounts?excludeInactive=true&hideUnset=true&includeProdInfo=true';
|
||||
|
||||
try {
|
||||
const response = await axios.get(treezUrl, {
|
||||
headers: {
|
||||
'client_id': '29dce682258145c6b1cf71027282d083',
|
||||
'client_secret': 'A57bB49AfD7F4233B1750a0B501B4E16',
|
||||
'cache-control': 'max-age=0, no-cache, must-revalidate, proxy-revalidate',
|
||||
'Origin': 'https://shop.bestdispensary.com',
|
||||
'Referer': 'https://shop.bestdispensary.com/',
|
||||
},
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
const data = response.data;
|
||||
console.log('Response type: ' + typeof data);
|
||||
|
||||
if (Array.isArray(data)) {
|
||||
console.log('Array length: ' + data.length);
|
||||
if (data.length > 0) {
|
||||
console.log('First item: ' + JSON.stringify(data[0], null, 2).slice(0, 1000));
|
||||
}
|
||||
} else {
|
||||
console.log('Keys: ' + Object.keys(data).join(', '));
|
||||
console.log('Data: ' + JSON.stringify(data, null, 2).slice(0, 2000));
|
||||
}
|
||||
|
||||
} catch (err: any) {
|
||||
console.log('Treez Error: ' + err.message);
|
||||
if (err.response) {
|
||||
console.log('Status: ' + err.response.status);
|
||||
console.log('Data: ' + JSON.stringify(err.response.data).slice(0, 500));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,243 +0,0 @@
|
||||
/**
|
||||
* Visit each brand page and extract products
|
||||
*/
|
||||
|
||||
import puppeteer, { Page } from 'puppeteer';
|
||||
|
||||
const STORE_ID = 'best';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function bypassAgeGate(page: Page): Promise<void> {
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
async function scrollToLoadAll(page: Page): Promise<void> {
|
||||
let previousHeight = 0;
|
||||
let sameCount = 0;
|
||||
|
||||
for (let i = 0; i < 30; i++) {
|
||||
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
if (currentHeight === previousHeight) {
|
||||
sameCount++;
|
||||
if (sameCount >= 3) break;
|
||||
} else {
|
||||
sameCount = 0;
|
||||
}
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1000);
|
||||
previousHeight = currentHeight;
|
||||
}
|
||||
}
|
||||
|
||||
async function extractProducts(page: Page): Promise<{ name: string; price: string; href: string }[]> {
|
||||
return page.evaluate(() => {
|
||||
const products: { name: string; price: string; href: string }[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
const img = a.querySelector('img');
|
||||
const h5 = a.querySelector('h5');
|
||||
const name = img?.getAttribute('alt') || h5?.textContent?.trim() || '';
|
||||
|
||||
if (!name || seen.has(name)) return;
|
||||
seen.add(name);
|
||||
|
||||
const priceEl = a.querySelector('[class*="price"]');
|
||||
const priceMatch = priceEl?.textContent?.match(/\$(\d+(?:\.\d{2})?)/);
|
||||
const price = priceMatch ? priceMatch[1] : '';
|
||||
|
||||
products.push({ name, price, href });
|
||||
});
|
||||
|
||||
return products;
|
||||
});
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Extracting Products from All Brands');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
// Go to brands page and get all brand links
|
||||
const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||
console.log(`\n[1] Getting brand list from ${brandsUrl}`);
|
||||
|
||||
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await sleep(3000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(2000);
|
||||
|
||||
// The 142 items on /brands ARE brands (shown as product cards with brand info)
|
||||
// Get the brand names from the product hrefs (they contain brand name in URL)
|
||||
const brandInfo = await page.evaluate(() => {
|
||||
const brands: { name: string; slug: string }[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
// Extract brand info from product URLs
|
||||
// URL pattern: /product/{brand}-{product}-{details}
|
||||
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
// Try to extract brand from URL - first segment before product name
|
||||
const match = href.match(/\/product\/([^-]+(?:-[^-]+)?)-/);
|
||||
if (match) {
|
||||
const slug = match[1];
|
||||
if (!seen.has(slug)) {
|
||||
seen.add(slug);
|
||||
// Also look for brand text in the card
|
||||
const brandEl = a.querySelector('[class*="brand"], [class*="Brand"]');
|
||||
const name = brandEl?.textContent?.trim() || slug;
|
||||
brands.push({ name, slug });
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return brands;
|
||||
});
|
||||
|
||||
console.log(`Found ${brandInfo.length} potential brands from product URLs`);
|
||||
console.log('Sample:', brandInfo.slice(0, 5));
|
||||
|
||||
// Actually, let's look for brand page links directly
|
||||
console.log('\n[2] Looking for brand page links...');
|
||||
|
||||
const brandLinks = await page.evaluate(() => {
|
||||
const links: { name: string; href: string }[] = [];
|
||||
|
||||
// Look for links to /brand/ pages
|
||||
document.querySelectorAll('a[href*="/brand/"]').forEach(a => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
const text = a.textContent?.trim() || '';
|
||||
if (href && !links.some(l => l.href === href)) {
|
||||
links.push({ name: text, href });
|
||||
}
|
||||
});
|
||||
|
||||
return links;
|
||||
});
|
||||
|
||||
console.log(`Found ${brandLinks.length} brand page links`);
|
||||
if (brandLinks.length > 0) {
|
||||
console.log('Sample:', brandLinks.slice(0, 10));
|
||||
}
|
||||
|
||||
// If no brand links, try to find them in section headers
|
||||
console.log('\n[3] Looking for brand sections...');
|
||||
|
||||
const brandSections = await page.evaluate(() => {
|
||||
const sections: { brandName: string; sampleProduct: string }[] = [];
|
||||
|
||||
document.querySelectorAll('[class*="products_product__section"]').forEach(section => {
|
||||
const header = section.querySelector('h2, h3, [class*="heading"]');
|
||||
const brandName = header?.textContent?.trim() || '';
|
||||
const firstProduct = section.querySelector('a[href*="/product/"]');
|
||||
const productName = firstProduct?.querySelector('h5')?.textContent?.trim() ||
|
||||
firstProduct?.querySelector('img')?.getAttribute('alt') || '';
|
||||
|
||||
if (brandName) {
|
||||
sections.push({ brandName, sampleProduct: productName });
|
||||
}
|
||||
});
|
||||
|
||||
return sections;
|
||||
});
|
||||
|
||||
console.log(`Found ${brandSections.length} brand sections`);
|
||||
brandSections.slice(0, 10).forEach(s => {
|
||||
console.log(` - Brand: "${s.brandName}" | Sample: "${s.sampleProduct}"`);
|
||||
});
|
||||
|
||||
// Try visiting a brand page directly using the section name
|
||||
if (brandSections.length > 0) {
|
||||
console.log('\n[4] Testing brand page URLs...');
|
||||
|
||||
// Try different URL patterns for first brand
|
||||
const testBrand = brandSections[0].brandName;
|
||||
const testSlug = testBrand.toLowerCase().replace(/[^a-z0-9]+/g, '-');
|
||||
|
||||
const urlPatterns = [
|
||||
`/onlinemenu/brand/${encodeURIComponent(testBrand)}`,
|
||||
`/onlinemenu/brand/${testSlug}`,
|
||||
`/brand/${encodeURIComponent(testBrand)}`,
|
||||
`/brand/${testSlug}`,
|
||||
];
|
||||
|
||||
for (const path of urlPatterns) {
|
||||
const testUrl = `https://${STORE_ID}.treez.io${path}?customerType=ADULT`;
|
||||
try {
|
||||
console.log(` Trying: ${testUrl}`);
|
||||
await page.goto(testUrl, { waitUntil: 'networkidle2', timeout: 15000 });
|
||||
await sleep(2000);
|
||||
|
||||
const products = await extractProducts(page);
|
||||
console.log(` Products found: ${products.length}`);
|
||||
|
||||
if (products.length > 0) {
|
||||
console.log(` ✓ Working URL pattern: ${path}`);
|
||||
break;
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.log(` Error: ${e.message.slice(0, 50)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if clicking on a brand section leads to a brand page
|
||||
console.log('\n[5] Checking if brand sections have clickable headers...');
|
||||
|
||||
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await sleep(3000);
|
||||
|
||||
const clickableHeaders = await page.evaluate(() => {
|
||||
const results: { text: string; tag: string; href: string; clickable: boolean }[] = [];
|
||||
|
||||
document.querySelectorAll('[class*="products_product__section"] h2, [class*="products_product__section"] h3').forEach(header => {
|
||||
const link = header.closest('a') || header.querySelector('a');
|
||||
const text = header.textContent?.trim() || '';
|
||||
const href = link?.getAttribute('href') || '';
|
||||
|
||||
results.push({
|
||||
text,
|
||||
tag: header.tagName,
|
||||
href,
|
||||
clickable: !!link,
|
||||
});
|
||||
});
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log('Section headers:');
|
||||
clickableHeaders.slice(0, 10).forEach(h => {
|
||||
console.log(` [${h.tag}] "${h.text}" - ${h.clickable ? `Link: ${h.href}` : 'Not clickable'}`);
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,183 +0,0 @@
|
||||
/**
|
||||
* Detailed brand section analysis
|
||||
*/
|
||||
|
||||
import puppeteer, { Page } from 'puppeteer';
|
||||
|
||||
const STORE_ID = 'best';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function bypassAgeGate(page: Page): Promise<void> {
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log(' Age gate detected, bypassing...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Detailed Brand Section Analysis');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||
console.log(`\nNavigating to ${url}`);
|
||||
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await sleep(3000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(2000);
|
||||
|
||||
// Scroll multiple times to load all content
|
||||
console.log('\n[1] Scrolling to load all content...');
|
||||
let previousHeight = 0;
|
||||
let scrollCount = 0;
|
||||
|
||||
for (let i = 0; i < 30; i++) {
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1500);
|
||||
|
||||
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
const productCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('a[href*="/product/"]').length
|
||||
);
|
||||
|
||||
console.log(` Scroll ${i + 1}: height=${currentHeight}, products=${productCount}`);
|
||||
|
||||
if (currentHeight === previousHeight) {
|
||||
scrollCount++;
|
||||
if (scrollCount >= 3) break;
|
||||
} else {
|
||||
scrollCount = 0;
|
||||
}
|
||||
previousHeight = currentHeight;
|
||||
}
|
||||
|
||||
// Look at ALL h2/h3 headers on page
|
||||
console.log('\n[2] Finding ALL h2/h3 headers on page...');
|
||||
|
||||
const headers = await page.evaluate(() => {
|
||||
const results: { tag: string; text: string; parentClass: string }[] = [];
|
||||
|
||||
document.querySelectorAll('h2, h3').forEach((el: Element) => {
|
||||
results.push({
|
||||
tag: el.tagName,
|
||||
text: el.textContent?.trim().slice(0, 80) || '',
|
||||
parentClass: el.parentElement?.className?.slice(0, 50) || '',
|
||||
});
|
||||
});
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log(`Found ${headers.length} headers:`);
|
||||
headers.forEach((h: { tag: string; text: string }) =>
|
||||
console.log(` [${h.tag}] "${h.text}"`)
|
||||
);
|
||||
|
||||
// Get products grouped by their section heading
|
||||
console.log('\n[3] Getting products per section...');
|
||||
|
||||
const sectionProducts = await page.evaluate(() => {
|
||||
const results: { heading: string; products: number }[] = [];
|
||||
|
||||
// Find all sections that contain products
|
||||
document.querySelectorAll('[class*="products_product__section"]').forEach((section: Element) => {
|
||||
const heading = section.querySelector('h2, h3');
|
||||
const headingText = heading?.textContent?.trim() || 'Unknown';
|
||||
const products = section.querySelectorAll('a[href*="/product/"]');
|
||||
|
||||
results.push({
|
||||
heading: headingText,
|
||||
products: products.length,
|
||||
});
|
||||
});
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log(`Found ${sectionProducts.length} brand sections:`);
|
||||
let totalProducts = 0;
|
||||
sectionProducts.forEach((s: { heading: string; products: number }) => {
|
||||
console.log(` ${s.heading}: ${s.products} products`);
|
||||
totalProducts += s.products;
|
||||
});
|
||||
console.log(`\nTotal products across all sections: ${totalProducts}`);
|
||||
|
||||
// Also extract brand from each product's URL/card
|
||||
console.log('\n[4] Extracting brand from product URLs/cards...');
|
||||
|
||||
const brandCounts = await page.evaluate(() => {
|
||||
const byBrand: Record<string, number> = {};
|
||||
const seen = new Set<string>();
|
||||
|
||||
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
const img = a.querySelector('img');
|
||||
const name = img?.getAttribute('alt') || '';
|
||||
|
||||
if (!name || seen.has(name)) return;
|
||||
seen.add(name);
|
||||
|
||||
// Try to find brand from the card
|
||||
const brandEl = a.querySelector('[class*="brand"], [class*="Brand"], span, p');
|
||||
let brand = '';
|
||||
|
||||
// Try various methods to find brand
|
||||
const allSpans = a.querySelectorAll('span, p');
|
||||
allSpans.forEach((span: Element) => {
|
||||
const text = span.textContent?.trim() || '';
|
||||
if (text && text.length < 50 && text !== name && !text.includes('$')) {
|
||||
if (!brand) brand = text;
|
||||
}
|
||||
});
|
||||
|
||||
// Fallback: get brand from parent section heading
|
||||
if (!brand) {
|
||||
const section = a.closest('[class*="products_product__section"]');
|
||||
const heading = section?.querySelector('h2, h3');
|
||||
brand = heading?.textContent?.trim() || 'Unknown';
|
||||
}
|
||||
|
||||
byBrand[brand] = (byBrand[brand] || 0) + 1;
|
||||
});
|
||||
|
||||
return byBrand;
|
||||
});
|
||||
|
||||
console.log('Products by brand:');
|
||||
Object.entries(brandCounts)
|
||||
.sort((a, b) => (b[1] as number) - (a[1] as number))
|
||||
.forEach(([brand, count]) => {
|
||||
console.log(` ${brand}: ${count}`);
|
||||
});
|
||||
|
||||
const uniqueTotal = Object.values(brandCounts).reduce((sum: number, c) => sum + (c as number), 0);
|
||||
console.log(`\nTotal unique products: ${uniqueTotal}`);
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,257 +0,0 @@
|
||||
/**
|
||||
* Test Treez brand-based product extraction
|
||||
* 1. Load /brands page
|
||||
* 2. Click "load more brands" to get all brands
|
||||
* 3. Extract brand URLs
|
||||
* 4. Visit each brand and extract products
|
||||
*/
|
||||
|
||||
import puppeteer, { Page } from 'puppeteer';
|
||||
|
||||
const STORE_ID = 'best';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function bypassAgeGate(page: Page): Promise<void> {
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log('[AgeGate] Detected, bypassing...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
async function loadAllBrands(page: Page): Promise<void> {
|
||||
console.log('[Brands] Looking for "load more" option...');
|
||||
|
||||
// Look for select/dropdown with "load more" or "all brands" option
|
||||
const selectInfo = await page.evaluate(() => {
|
||||
const selects = document.querySelectorAll('select');
|
||||
const info: { selector: string; options: string[] }[] = [];
|
||||
|
||||
selects.forEach((sel, i) => {
|
||||
const options = Array.from(sel.options).map(o => o.text);
|
||||
info.push({ selector: `select:nth-of-type(${i + 1})`, options });
|
||||
});
|
||||
|
||||
return info;
|
||||
});
|
||||
|
||||
console.log('[Brands] Found selects:', JSON.stringify(selectInfo, null, 2));
|
||||
|
||||
// Look for any button or link with "load more" or "show all"
|
||||
const loadMoreButtons = await page.evaluate(() => {
|
||||
const elements = document.querySelectorAll('button, a, [role="button"]');
|
||||
const matches: { text: string; tag: string }[] = [];
|
||||
|
||||
elements.forEach(el => {
|
||||
const text = el.textContent?.toLowerCase() || '';
|
||||
if (text.includes('load more') || text.includes('show all') || text.includes('view all')) {
|
||||
matches.push({ text: el.textContent?.trim() || '', tag: el.tagName });
|
||||
}
|
||||
});
|
||||
|
||||
return matches;
|
||||
});
|
||||
|
||||
console.log('[Brands] Found load more buttons:', loadMoreButtons);
|
||||
|
||||
// Try to find and interact with the brands dropdown
|
||||
// First, let's see all interactive elements with "brand" in them
|
||||
const brandElements = await page.evaluate(() => {
|
||||
const all = document.querySelectorAll('*');
|
||||
const matches: { tag: string; class: string; text: string }[] = [];
|
||||
|
||||
all.forEach(el => {
|
||||
const className = el.className?.toString?.() || '';
|
||||
const text = el.textContent?.trim().slice(0, 100) || '';
|
||||
if (className.toLowerCase().includes('brand') || className.toLowerCase().includes('select')) {
|
||||
matches.push({
|
||||
tag: el.tagName,
|
||||
class: className.slice(0, 100),
|
||||
text: text.slice(0, 50),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return matches.slice(0, 20);
|
||||
});
|
||||
|
||||
console.log('[Brands] Brand-related elements:', JSON.stringify(brandElements.slice(0, 10), null, 2));
|
||||
}
|
||||
|
||||
async function extractBrandLinks(page: Page): Promise<{ name: string; url: string }[]> {
|
||||
const brands = await page.evaluate(() => {
|
||||
const links: { name: string; url: string }[] = [];
|
||||
|
||||
// Look for brand cards/links
|
||||
const selectors = [
|
||||
'a[href*="/brand/"]',
|
||||
'a[href*="/brands/"]',
|
||||
'[class*="brand"] a',
|
||||
'[class*="Brand"] a',
|
||||
];
|
||||
|
||||
selectors.forEach(sel => {
|
||||
document.querySelectorAll(sel).forEach(el => {
|
||||
const href = el.getAttribute('href');
|
||||
const name = el.textContent?.trim() || '';
|
||||
if (href && name && !links.some(l => l.url === href)) {
|
||||
links.push({ name, url: href });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return links;
|
||||
});
|
||||
|
||||
return brands;
|
||||
}
|
||||
|
||||
async function extractProductsFromBrandPage(page: Page): Promise<any[]> {
|
||||
// Scroll to load all products
|
||||
let previousHeight = 0;
|
||||
let scrollCount = 0;
|
||||
let sameHeightCount = 0;
|
||||
|
||||
while (scrollCount < 20) {
|
||||
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
|
||||
if (currentHeight === previousHeight) {
|
||||
sameHeightCount++;
|
||||
if (sameHeightCount >= 3) break;
|
||||
} else {
|
||||
sameHeightCount = 0;
|
||||
}
|
||||
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1000);
|
||||
|
||||
previousHeight = currentHeight;
|
||||
scrollCount++;
|
||||
}
|
||||
|
||||
// Extract products
|
||||
const products = await page.evaluate(() => {
|
||||
const results: any[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
document.querySelectorAll('[class*="product_product__"]').forEach(el => {
|
||||
const nameEl = el.querySelector('[class*="product__name"], [class*="name__"]');
|
||||
const name = nameEl?.textContent?.trim() || '';
|
||||
|
||||
if (!name || seen.has(name)) return;
|
||||
seen.add(name);
|
||||
|
||||
const priceEl = el.querySelector('[class*="price"]');
|
||||
const priceText = priceEl?.textContent || '';
|
||||
const priceMatch = priceText.match(/\$(\d+(?:\.\d{2})?)/);
|
||||
const price = priceMatch ? parseFloat(priceMatch[1]) : null;
|
||||
|
||||
const linkEl = el.querySelector('a[href*="/product/"]');
|
||||
let productId = '';
|
||||
if (linkEl) {
|
||||
const href = linkEl.getAttribute('href') || '';
|
||||
const match = href.match(/\/product\/([^\/?]+)/);
|
||||
productId = match ? match[1] : '';
|
||||
}
|
||||
|
||||
results.push({
|
||||
productId: productId || `treez_${name.replace(/\s+/g, '_').toLowerCase().slice(0, 30)}`,
|
||||
name,
|
||||
price,
|
||||
});
|
||||
});
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
return products;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Testing Treez Brand-Based Extraction');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Block images
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
// Navigate to brands page
|
||||
const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||
console.log(`\n[1] Navigating to ${brandsUrl}`);
|
||||
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await sleep(2000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(1000);
|
||||
|
||||
// Screenshot to see what we're working with
|
||||
await page.screenshot({ path: '/tmp/treez-brands-page.png', fullPage: false });
|
||||
console.log('[1] Screenshot saved to /tmp/treez-brands-page.png');
|
||||
|
||||
// Try to load all brands
|
||||
console.log('\n[2] Exploring brand selection options...');
|
||||
await loadAllBrands(page);
|
||||
|
||||
// Extract brand links
|
||||
console.log('\n[3] Extracting brand links...');
|
||||
const brandLinks = await extractBrandLinks(page);
|
||||
console.log(`Found ${brandLinks.length} brand links:`);
|
||||
brandLinks.slice(0, 10).forEach(b => console.log(` - ${b.name}: ${b.url}`));
|
||||
|
||||
// If we found brand links, visit a couple to test
|
||||
if (brandLinks.length > 0) {
|
||||
console.log('\n[4] Testing product extraction from first 3 brands...');
|
||||
|
||||
let totalProducts = 0;
|
||||
const allProducts: any[] = [];
|
||||
|
||||
for (const brand of brandLinks.slice(0, 3)) {
|
||||
const brandUrl = brand.url.startsWith('http')
|
||||
? brand.url
|
||||
: `https://${STORE_ID}.treez.io${brand.url}`;
|
||||
|
||||
console.log(`\n Visiting brand: ${brand.name}`);
|
||||
console.log(` URL: ${brandUrl}`);
|
||||
|
||||
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(2000);
|
||||
|
||||
const products = await extractProductsFromBrandPage(page);
|
||||
console.log(` Products found: ${products.length}`);
|
||||
|
||||
allProducts.push(...products.map(p => ({ ...p, brand: brand.name })));
|
||||
totalProducts += products.length;
|
||||
}
|
||||
|
||||
console.log(`\n[5] Summary from 3 brands: ${totalProducts} products`);
|
||||
console.log(`Estimated total (${brandLinks.length} brands): ~${Math.round(totalProducts / 3 * brandLinks.length)} products`);
|
||||
}
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('Error:', error.message);
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,113 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Capture request headers for API calls
|
||||
const apiRequests: any[] = [];
|
||||
|
||||
page.on('request', (req) => {
|
||||
const url = req.url();
|
||||
if (url.includes('treez.io') || url.includes('gapcommerce')) {
|
||||
apiRequests.push({
|
||||
url: url,
|
||||
method: req.method(),
|
||||
headers: req.headers(),
|
||||
postData: req.postData(),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Loading page to capture API auth headers...\n');
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/shop', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
console.log('=== API REQUESTS WITH HEADERS ===\n');
|
||||
|
||||
apiRequests.forEach((req, i) => {
|
||||
console.log((i+1) + '. ' + req.method + ' ' + req.url.slice(0, 100));
|
||||
console.log(' Headers:');
|
||||
Object.entries(req.headers).forEach(([k, v]) => {
|
||||
if (k.toLowerCase().includes('auth') ||
|
||||
k.toLowerCase().includes('token') ||
|
||||
k.toLowerCase().includes('key') ||
|
||||
k.toLowerCase().includes('api') ||
|
||||
k.toLowerCase() === 'authorization' ||
|
||||
k.toLowerCase() === 'x-api-key') {
|
||||
console.log(' >>> ' + k + ': ' + v);
|
||||
}
|
||||
});
|
||||
// Show all headers for treez.io requests
|
||||
if (req.url.includes('headless.treez.io')) {
|
||||
console.log(' ALL HEADERS:');
|
||||
Object.entries(req.headers).forEach(([k, v]) => {
|
||||
console.log(' ' + k + ': ' + String(v).slice(0, 80));
|
||||
});
|
||||
}
|
||||
console.log('');
|
||||
});
|
||||
|
||||
// Also check for API keys in page scripts
|
||||
console.log('=== CHECKING FOR API KEYS IN PAGE ===\n');
|
||||
|
||||
const pageData = await page.evaluate(() => {
|
||||
const data: any = {};
|
||||
|
||||
// Check window object for API keys
|
||||
const win = window as any;
|
||||
if (win.__NEXT_DATA__) {
|
||||
data.nextData = win.__NEXT_DATA__;
|
||||
}
|
||||
|
||||
// Check for any global config
|
||||
if (win.config || win.CONFIG) {
|
||||
data.config = win.config || win.CONFIG;
|
||||
}
|
||||
|
||||
// Look for treez-related globals
|
||||
Object.keys(win).forEach(key => {
|
||||
if (key.toLowerCase().includes('treez') ||
|
||||
key.toLowerCase().includes('api') ||
|
||||
key.toLowerCase().includes('config')) {
|
||||
try {
|
||||
data[key] = JSON.stringify(win[key]).slice(0, 500);
|
||||
} catch {}
|
||||
}
|
||||
});
|
||||
|
||||
return data;
|
||||
});
|
||||
|
||||
if (pageData.nextData?.props?.pageProps) {
|
||||
console.log('Next.js pageProps keys: ' + Object.keys(pageData.nextData.props.pageProps).join(', '));
|
||||
}
|
||||
if (pageData.nextData?.runtimeConfig) {
|
||||
console.log('Runtime config: ' + JSON.stringify(pageData.nextData.runtimeConfig).slice(0, 500));
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,100 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
import fs from 'fs';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Capture ES API responses
|
||||
let allProductData: any[] = [];
|
||||
|
||||
page.on('response', async (res) => {
|
||||
const url = res.url();
|
||||
if (url.includes('gapcommerceapi.com/product/search') && res.status() === 200) {
|
||||
try {
|
||||
const json = await res.json();
|
||||
const products = json.hits?.hits?.map((h: any) => h._source) || [];
|
||||
allProductData = allProductData.concat(products);
|
||||
console.log('Captured ' + products.length + ' products (total: ' + allProductData.length + ')');
|
||||
} catch {}
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Loading /shop page to capture product data...\n');
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/shop', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Click load more many times to get all products
|
||||
console.log('\nClicking Load More to capture all products...');
|
||||
for (let i = 0; i < 50; i++) {
|
||||
const btn = await page.$('button.collection__load-more');
|
||||
if (!btn) {
|
||||
console.log('No more Load More button');
|
||||
break;
|
||||
}
|
||||
|
||||
const isVisible = await page.evaluate((b) => {
|
||||
const rect = b.getBoundingClientRect();
|
||||
return rect.width > 0 && rect.height > 0;
|
||||
}, btn);
|
||||
|
||||
if (!isVisible) {
|
||||
console.log('Load More not visible');
|
||||
break;
|
||||
}
|
||||
|
||||
await btn.click();
|
||||
await sleep(1500);
|
||||
console.log('Click ' + (i+1) + ': ' + allProductData.length + ' total products');
|
||||
}
|
||||
|
||||
console.log('\n=== RESULTS ===\n');
|
||||
console.log('Total products captured: ' + allProductData.length);
|
||||
|
||||
if (allProductData.length > 0) {
|
||||
// Dedupe by some ID
|
||||
const seen = new Set();
|
||||
const unique = allProductData.filter(p => {
|
||||
const id = p.id || p.productId || p.name;
|
||||
if (seen.has(id)) return false;
|
||||
seen.add(id);
|
||||
return true;
|
||||
});
|
||||
|
||||
console.log('Unique products: ' + unique.length);
|
||||
console.log('\n=== PRODUCT FIELDS ===\n');
|
||||
console.log(Object.keys(unique[0]).sort().join('\n'));
|
||||
|
||||
console.log('\n=== SAMPLE PRODUCT ===\n');
|
||||
console.log(JSON.stringify(unique[0], null, 2));
|
||||
|
||||
// Save to file
|
||||
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(unique, null, 2));
|
||||
console.log('\nSaved to /tmp/treez-products.json');
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,88 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
import fs from 'fs';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Capture ES API responses as text
|
||||
let allProducts: any[] = [];
|
||||
|
||||
page.on('response', async (res) => {
|
||||
const url = res.url();
|
||||
if (url.includes('gapcommerceapi.com/product/search')) {
|
||||
console.log('ES Response: status=' + res.status());
|
||||
if (res.status() === 200) {
|
||||
try {
|
||||
const text = await res.text();
|
||||
console.log('Response length: ' + text.length);
|
||||
const json = JSON.parse(text);
|
||||
const products = json.hits?.hits?.map((h: any) => h._source) || [];
|
||||
allProducts = allProducts.concat(products);
|
||||
console.log('Got ' + products.length + ' products (total: ' + allProducts.length + ')');
|
||||
} catch (err: any) {
|
||||
console.log('Parse error: ' + err.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Loading page...\n');
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/shop', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(5000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log('Bypassing age gate...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(3000);
|
||||
}
|
||||
|
||||
// Wait for initial products to load
|
||||
await sleep(3000);
|
||||
|
||||
console.log('\nInitial products captured: ' + allProducts.length);
|
||||
|
||||
// Try scrolling to trigger more loads
|
||||
console.log('\nScrolling...');
|
||||
for (let i = 0; i < 20; i++) {
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1500);
|
||||
|
||||
// Also click load more if present
|
||||
try {
|
||||
await page.click('button.collection__load-more');
|
||||
console.log('Clicked load more');
|
||||
} catch {}
|
||||
}
|
||||
|
||||
console.log('\n=== FINAL RESULTS ===\n');
|
||||
console.log('Total products: ' + allProducts.length);
|
||||
|
||||
if (allProducts.length > 0) {
|
||||
console.log('\nFields: ' + Object.keys(allProducts[0]).sort().join(', '));
|
||||
console.log('\nSample:\n' + JSON.stringify(allProducts[0], null, 2));
|
||||
|
||||
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(allProducts, null, 2));
|
||||
console.log('\nSaved to /tmp/treez-products.json');
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,192 +0,0 @@
|
||||
/**
|
||||
* Navigate to each category page and count products
|
||||
*/
|
||||
|
||||
import puppeteer, { Page } from 'puppeteer';
|
||||
|
||||
const STORE_ID = 'best';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function bypassAgeGate(page: Page): Promise<void> {
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
async function scrollToLoadAll(page: Page): Promise<void> {
|
||||
let previousHeight = 0;
|
||||
let scrollCount = 0;
|
||||
let sameCount = 0;
|
||||
|
||||
while (scrollCount < 50) {
|
||||
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
if (currentHeight === previousHeight) {
|
||||
sameCount++;
|
||||
if (sameCount >= 3) break;
|
||||
} else {
|
||||
sameCount = 0;
|
||||
}
|
||||
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1500);
|
||||
previousHeight = currentHeight;
|
||||
scrollCount++;
|
||||
}
|
||||
}
|
||||
|
||||
async function countProducts(page: Page): Promise<number> {
|
||||
return page.evaluate(() => {
|
||||
const seen = new Set<string>();
|
||||
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
|
||||
const img = a.querySelector('img');
|
||||
const name = img?.getAttribute('alt') || a.querySelector('h5')?.textContent?.trim() || '';
|
||||
if (name) seen.add(name);
|
||||
});
|
||||
return seen.size;
|
||||
});
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Testing Treez Category Pages');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
// Categories from the nav menu
|
||||
const categories = [
|
||||
'cartridges',
|
||||
'flower',
|
||||
'pre-rolls',
|
||||
'edibles',
|
||||
'extracts',
|
||||
'tinctures',
|
||||
'capsules',
|
||||
'topicals',
|
||||
'accessories',
|
||||
'drink',
|
||||
];
|
||||
|
||||
const results: { category: string; products: number }[] = [];
|
||||
let ageGateBypassed = false;
|
||||
|
||||
for (const category of categories) {
|
||||
// Try different URL patterns
|
||||
const urls = [
|
||||
`https://${STORE_ID}.treez.io/onlinemenu/${category}?customerType=ADULT`,
|
||||
`https://${STORE_ID}.treez.io/onlinemenu/category/${category}?customerType=ADULT`,
|
||||
`https://${STORE_ID}.treez.io/${category}?customerType=ADULT`,
|
||||
];
|
||||
|
||||
for (const url of urls) {
|
||||
try {
|
||||
console.log(`\nTrying: ${url}`);
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(2000);
|
||||
|
||||
if (!ageGateBypassed) {
|
||||
await bypassAgeGate(page);
|
||||
ageGateBypassed = true;
|
||||
await sleep(1000);
|
||||
}
|
||||
|
||||
const initialCount = await countProducts(page);
|
||||
if (initialCount > 0) {
|
||||
console.log(` Initial: ${initialCount} products`);
|
||||
|
||||
await scrollToLoadAll(page);
|
||||
const finalCount = await countProducts(page);
|
||||
console.log(` After scroll: ${finalCount} products`);
|
||||
|
||||
results.push({ category, products: finalCount });
|
||||
break; // Found working URL, move to next category
|
||||
} else {
|
||||
console.log(` No products found`);
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.log(` Error: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also try the main shop page
|
||||
console.log('\nTrying main shop page...');
|
||||
try {
|
||||
const shopUrl = `https://${STORE_ID}.treez.io/onlinemenu/shop?customerType=ADULT`;
|
||||
await page.goto(shopUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(2000);
|
||||
|
||||
const initialCount = await countProducts(page);
|
||||
console.log(`Shop page initial: ${initialCount} products`);
|
||||
|
||||
if (initialCount > 0) {
|
||||
await scrollToLoadAll(page);
|
||||
const finalCount = await countProducts(page);
|
||||
console.log(`Shop page after scroll: ${finalCount} products`);
|
||||
results.push({ category: 'shop', products: finalCount });
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.log(`Shop page error: ${error.message}`);
|
||||
}
|
||||
|
||||
// Try to find and click on category links from the nav
|
||||
console.log('\n[Alternative] Trying to find nav category links...');
|
||||
|
||||
const homeUrl = `https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`;
|
||||
await page.goto(homeUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(3000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(1000);
|
||||
|
||||
const navLinks = await page.evaluate(() => {
|
||||
const links: { text: string; href: string }[] = [];
|
||||
document.querySelectorAll('nav a, [class*="nav"] a').forEach(a => {
|
||||
const text = a.textContent?.trim() || '';
|
||||
const href = a.getAttribute('href') || '';
|
||||
if (href && text && !links.some(l => l.href === href)) {
|
||||
links.push({ text, href });
|
||||
}
|
||||
});
|
||||
return links;
|
||||
});
|
||||
|
||||
console.log('Nav links found:');
|
||||
navLinks.forEach(l => console.log(` - "${l.text}" → ${l.href}`));
|
||||
|
||||
// Summary
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('Summary');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
let total = 0;
|
||||
results.forEach(r => {
|
||||
console.log(`${r.category}: ${r.products} products`);
|
||||
total += r.products;
|
||||
});
|
||||
console.log(`\nTotal across categories: ${total} products`);
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,178 +0,0 @@
|
||||
/**
|
||||
* ============================================================
|
||||
* TREEZ CLIENT TEST SCRIPT
|
||||
* ============================================================
|
||||
*
|
||||
* Tests the Treez CDP interception client using Best Dispensary.
|
||||
*
|
||||
* This verifies:
|
||||
* - Stealth plugin bypasses headless detection
|
||||
* - CDP intercepts Elasticsearch API responses
|
||||
* - Products are captured and normalized correctly
|
||||
* - Inventory data is available
|
||||
*
|
||||
* Usage: npx ts-node scripts/test-treez-client.ts
|
||||
*
|
||||
* ============================================================
|
||||
*/
|
||||
|
||||
import { fetchProductsFromUrl } from '../src/platforms/treez';
|
||||
|
||||
const TEST_URL = 'https://shop.bestdispensary.com/shop';
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('TREEZ CLIENT TEST - CDP INTERCEPTION');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`URL: ${TEST_URL}`);
|
||||
console.log('Method: Puppeteer + Stealth + CDP response capture');
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
console.log('[Starting] Launching browser with Stealth plugin...\n');
|
||||
|
||||
const result = await fetchProductsFromUrl(TEST_URL);
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('RESULTS');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Total products: ${result.totalCaptured}`);
|
||||
console.log(`Store ID: ${result.storeId || 'N/A (custom domain)'}`);
|
||||
console.log(`Source URL: ${result.sourceUrl}`);
|
||||
console.log(`Fetched at: ${result.fetchedAt.toISOString()}`);
|
||||
|
||||
if (result.products.length === 0) {
|
||||
console.log('\n[WARNING] No products captured!');
|
||||
console.log('This could mean:');
|
||||
console.log(' - Stealth plugin is not bypassing detection');
|
||||
console.log(' - CDP is not intercepting the correct URLs');
|
||||
console.log(' - Page structure has changed');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Show sample raw product
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('SAMPLE RAW PRODUCT (from Elasticsearch)');
|
||||
console.log('='.repeat(60));
|
||||
const raw = result.products[0];
|
||||
console.log(JSON.stringify({
|
||||
id: raw.id,
|
||||
name: raw.name,
|
||||
menuTitle: raw.menuTitle,
|
||||
brand: raw.brand,
|
||||
category: raw.category,
|
||||
subtype: raw.subtype,
|
||||
status: raw.status,
|
||||
availableUnits: raw.availableUnits,
|
||||
customMinPrice: raw.customMinPrice,
|
||||
customMaxPrice: raw.customMaxPrice,
|
||||
isActive: raw.isActive,
|
||||
isAboveThreshold: raw.isAboveThreshold,
|
||||
}, null, 2));
|
||||
|
||||
// Show sample normalized product
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('SAMPLE NORMALIZED PRODUCT');
|
||||
console.log('='.repeat(60));
|
||||
const normalized = result.normalized[0];
|
||||
console.log(JSON.stringify({
|
||||
id: normalized.id,
|
||||
name: normalized.name,
|
||||
brand: normalized.brand,
|
||||
category: normalized.category,
|
||||
subtype: normalized.subtype,
|
||||
price: normalized.price,
|
||||
priceMin: normalized.priceMin,
|
||||
priceMax: normalized.priceMax,
|
||||
discountedPrice: normalized.discountedPrice,
|
||||
discountPercent: normalized.discountPercent,
|
||||
availableUnits: normalized.availableUnits,
|
||||
inStock: normalized.inStock,
|
||||
thcPercent: normalized.thcPercent,
|
||||
cbdPercent: normalized.cbdPercent,
|
||||
strainType: normalized.strainType,
|
||||
effects: normalized.effects,
|
||||
flavors: normalized.flavors,
|
||||
imageUrl: normalized.imageUrl,
|
||||
images: normalized.images?.slice(0, 2),
|
||||
}, null, 2));
|
||||
|
||||
// Brand breakdown
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('BRANDS (top 15)');
|
||||
console.log('='.repeat(60));
|
||||
const brandCounts = new Map<string, number>();
|
||||
for (const p of result.normalized) {
|
||||
const brand = p.brand || 'Unknown';
|
||||
brandCounts.set(brand, (brandCounts.get(brand) || 0) + 1);
|
||||
}
|
||||
|
||||
const sorted = [...brandCounts.entries()].sort((a, b) => b[1] - a[1]);
|
||||
console.log(`Total unique brands: ${sorted.length}\n`);
|
||||
sorted.slice(0, 15).forEach(([brand, count]) => {
|
||||
console.log(` ${brand}: ${count} products`);
|
||||
});
|
||||
|
||||
// Category breakdown
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('CATEGORIES');
|
||||
console.log('='.repeat(60));
|
||||
const categoryCounts = new Map<string, number>();
|
||||
for (const p of result.normalized) {
|
||||
const cat = p.category || 'Unknown';
|
||||
categoryCounts.set(cat, (categoryCounts.get(cat) || 0) + 1);
|
||||
}
|
||||
|
||||
const catSorted = [...categoryCounts.entries()].sort((a, b) => b[1] - a[1]);
|
||||
catSorted.forEach(([cat, count]) => {
|
||||
console.log(` ${cat}: ${count} products`);
|
||||
});
|
||||
|
||||
// Inventory stats
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('INVENTORY STATS');
|
||||
console.log('='.repeat(60));
|
||||
const inStock = result.normalized.filter(p => p.inStock).length;
|
||||
const outOfStock = result.normalized.filter(p => !p.inStock).length;
|
||||
const hasInventoryData = result.normalized.filter(p => p.availableUnits > 0).length;
|
||||
|
||||
console.log(`In stock: ${inStock}`);
|
||||
console.log(`Out of stock: ${outOfStock}`);
|
||||
console.log(`With inventory levels: ${hasInventoryData}`);
|
||||
|
||||
// Show inventory examples
|
||||
if (hasInventoryData > 0) {
|
||||
console.log('\nSample inventory levels:');
|
||||
result.normalized
|
||||
.filter(p => p.availableUnits > 0)
|
||||
.slice(0, 5)
|
||||
.forEach(p => {
|
||||
console.log(` ${p.name}: ${p.availableUnits} units`);
|
||||
});
|
||||
}
|
||||
|
||||
// Check for THC/CBD data
|
||||
const hasThc = result.normalized.filter(p => p.thcPercent !== null).length;
|
||||
const hasCbd = result.normalized.filter(p => p.cbdPercent !== null).length;
|
||||
console.log(`\nWith THC data: ${hasThc} (${Math.round(hasThc / result.totalCaptured * 100)}%)`);
|
||||
console.log(`With CBD data: ${hasCbd} (${Math.round(hasCbd / result.totalCaptured * 100)}%)`);
|
||||
|
||||
// Check for images
|
||||
const hasImages = result.normalized.filter(p => p.imageUrl).length;
|
||||
console.log(`With images: ${hasImages} (${Math.round(hasImages / result.totalCaptured * 100)}%)`);
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('TEST PASSED');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('\n' + '='.repeat(60));
|
||||
console.error('TEST FAILED');
|
||||
console.error('='.repeat(60));
|
||||
console.error(`Error: ${error.message}`);
|
||||
console.error(error.stack);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,160 +0,0 @@
|
||||
/**
|
||||
* Find the correct product card container selector
|
||||
*/
|
||||
|
||||
import puppeteer, { Page } from 'puppeteer';
|
||||
|
||||
const STORE_ID = 'best';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function bypassAgeGate(page: Page): Promise<void> {
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('Finding Treez product card containers...\n');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await sleep(3000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(2000);
|
||||
|
||||
// Find product card containers by looking for elements that contain both name AND price
|
||||
const analysis = await page.evaluate(() => {
|
||||
// Strategy: find all H5 elements (which contain names), then get their parent containers
|
||||
const nameElements = document.querySelectorAll('h5.product_product__name__JcEk0');
|
||||
const containers: Map<string, { count: number; sample: string }> = new Map();
|
||||
|
||||
nameElements.forEach(nameEl => {
|
||||
// Walk up to find the product card container
|
||||
let current = nameEl.parentElement;
|
||||
let depth = 0;
|
||||
|
||||
while (current && depth < 10) {
|
||||
const className = current.className?.toString?.() || '';
|
||||
|
||||
// Look for ProductCard in the class name
|
||||
if (className.includes('ProductCard')) {
|
||||
const key = className.slice(0, 100);
|
||||
const existing = containers.get(key) || { count: 0, sample: '' };
|
||||
existing.count++;
|
||||
if (!existing.sample) {
|
||||
existing.sample = current.outerHTML.slice(0, 300);
|
||||
}
|
||||
containers.set(key, existing);
|
||||
break;
|
||||
}
|
||||
|
||||
current = current.parentElement;
|
||||
depth++;
|
||||
}
|
||||
});
|
||||
|
||||
return Array.from(containers.entries()).map(([cls, data]) => ({
|
||||
class: cls,
|
||||
count: data.count,
|
||||
sample: data.sample,
|
||||
}));
|
||||
});
|
||||
|
||||
console.log('Product card containers found:');
|
||||
analysis.forEach(({ class: cls, count, sample }) => {
|
||||
console.log(`\n[${count}x] ${cls}`);
|
||||
console.log(`Sample: ${sample.slice(0, 200)}...`);
|
||||
});
|
||||
|
||||
// Now test various container selectors
|
||||
console.log('\n\n--- Testing container selectors ---');
|
||||
|
||||
const selectorTests = await page.evaluate(() => {
|
||||
const tests: Record<string, { total: number; withName: number; withPrice: number }> = {};
|
||||
|
||||
const selectors = [
|
||||
'[class*="ProductCardWithBtn"]',
|
||||
'[class*="ProductCard_product"]',
|
||||
'[class*="ProductCard__"]',
|
||||
'article[class*="product"]',
|
||||
'div[class*="ProductCard"]',
|
||||
'a[class*="ProductCard"]',
|
||||
'[class*="product_product__"][class*="link"]',
|
||||
'article',
|
||||
];
|
||||
|
||||
selectors.forEach(sel => {
|
||||
const elements = document.querySelectorAll(sel);
|
||||
let withName = 0;
|
||||
let withPrice = 0;
|
||||
|
||||
elements.forEach(el => {
|
||||
if (el.querySelector('h5, [class*="product__name"]')) withName++;
|
||||
if (el.querySelector('[class*="price"]')) withPrice++;
|
||||
});
|
||||
|
||||
tests[sel] = { total: elements.length, withName, withPrice };
|
||||
});
|
||||
|
||||
return tests;
|
||||
});
|
||||
|
||||
Object.entries(selectorTests).forEach(([sel, { total, withName, withPrice }]) => {
|
||||
console.log(`${sel}: ${total} total, ${withName} with name, ${withPrice} with price`);
|
||||
});
|
||||
|
||||
// Get the actual product card class pattern
|
||||
console.log('\n\n--- Finding exact product card class ---');
|
||||
|
||||
const exactClasses = await page.evaluate(() => {
|
||||
// Find elements that have both h5 name AND price child
|
||||
const allElements = document.querySelectorAll('*');
|
||||
const matches: { tag: string; class: string }[] = [];
|
||||
|
||||
allElements.forEach(el => {
|
||||
const hasName = el.querySelector('h5.product_product__name__JcEk0');
|
||||
const hasPrice = el.querySelector('[class*="price__ins"], [class*="price__"]');
|
||||
|
||||
if (hasName && hasPrice) {
|
||||
const className = el.className?.toString?.() || '';
|
||||
if (className && !matches.some(m => m.class === className)) {
|
||||
matches.push({ tag: el.tagName, class: className.slice(0, 150) });
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return matches;
|
||||
});
|
||||
|
||||
console.log('Elements containing both name and price:');
|
||||
exactClasses.forEach(({ tag, class: cls }) => {
|
||||
console.log(` [${tag}] ${cls}`);
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user