Compare commits

..

2 Commits

Author SHA1 Message Date
Kelly
2513e22171 fix(security): Add auth middleware to unprotected API endpoints
Security audit identified 8 endpoint groups that were publicly accessible
without authentication. Added authMiddleware and requireRole where appropriate.

Protected endpoints:
- /api/payloads/* - authMiddleware (trusted origins or API token)
- /api/job-queue/* - authMiddleware + requireRole('admin')
- /api/workers/* - authMiddleware
- /api/worker-registry/* - authMiddleware (pods access via trusted IPs)
- /api/k8s/* - authMiddleware + requireRole('admin')
- /api/pipeline/* - authMiddleware + requireRole('admin')
- /api/tasks/* - authMiddleware + requireRole('admin')
- /api/admin/orchestrator/* - authMiddleware + requireRole('admin')

Also:
- Added API_SECURITY.md documentation
- Filter AI settings from /settings page (managed in /ai-settings)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-12 00:30:08 -07:00
Kelly
e17b3b225a feat(k8s): Add StatefulSet for persistent workers
- Add scraper-worker-statefulset.yaml with 8 persistent pods
- updateStrategy: OnDelete prevents automatic restarts
- Workers maintain stable identity across restarts
- Document worker architecture in CLAUDE.md
- Add worker registry API endpoint documentation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 00:30:08 -07:00
283 changed files with 2610 additions and 63026 deletions

7
.gitignore vendored
View File

@@ -51,10 +51,3 @@ coverage/
*.tmp
*.temp
llm-scraper/
# Claude Code
.claude/
# Test/debug scripts
backend/scripts/test-*.ts
backend/scripts/run-*.ts

View File

@@ -3,7 +3,7 @@ steps:
# PR VALIDATION: Parallel type checks (PRs only)
# ===========================================
typecheck-backend:
image: node:22
image: code.cannabrands.app/creationshop/node:20
commands:
- cd backend
- npm ci --prefer-offline
@@ -13,7 +13,7 @@ steps:
event: pull_request
typecheck-cannaiq:
image: node:22
image: code.cannabrands.app/creationshop/node:20
commands:
- cd cannaiq
- npm ci --prefer-offline
@@ -23,7 +23,7 @@ steps:
event: pull_request
typecheck-findadispo:
image: node:22
image: code.cannabrands.app/creationshop/node:20
commands:
- cd findadispo/frontend
- npm ci --prefer-offline
@@ -33,7 +33,7 @@ steps:
event: pull_request
typecheck-findagram:
image: node:22
image: code.cannabrands.app/creationshop/node:20
commands:
- cd findagram/frontend
- npm ci --prefer-offline
@@ -58,7 +58,7 @@ steps:
-H "Authorization: token $GITEA_TOKEN" \
-H "Content-Type: application/json" \
-d '{"Do":"merge"}' \
"https://git.spdy.io/api/v1/repos/Creationshop/cannaiq/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
depends_on:
- typecheck-backend
- typecheck-cannaiq
@@ -68,122 +68,127 @@ steps:
event: pull_request
# ===========================================
# DOCKER: Multi-stage builds with layer caching
# MASTER DEPLOY: Parallel Docker builds
# NOTE: cache_from/cache_to removed due to plugin bug splitting on commas
# ===========================================
docker-backend:
image: gcr.io/kaniko-project/executor:debug
commands:
- /kaniko/executor
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/backend
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/backend/Dockerfile
--destination=registry.spdy.io/cannaiq/backend:latest
--destination=registry.spdy.io/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8}
--build-arg=APP_BUILD_VERSION=sha-${CI_COMMIT_SHA:0:8}
--build-arg=APP_GIT_SHA=${CI_COMMIT_SHA}
--build-arg=APP_BUILD_TIME=${CI_PIPELINE_CREATED}
--cache=true
--cache-repo=registry.spdy.io/cannaiq/cache-backend
--cache-ttl=168h
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/dispensary-scraper
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: backend/Dockerfile
context: backend
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
build_args:
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
APP_GIT_SHA: ${CI_COMMIT_SHA}
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
depends_on: []
when:
branch: [master, develop]
branch: master
event: push
docker-cannaiq:
image: gcr.io/kaniko-project/executor:debug
commands:
- /kaniko/executor
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/cannaiq
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/cannaiq/Dockerfile
--destination=registry.spdy.io/cannaiq/frontend:latest
--destination=registry.spdy.io/cannaiq/frontend:sha-${CI_COMMIT_SHA:0:8}
--cache=true
--cache-repo=registry.spdy.io/cannaiq/cache-cannaiq
--cache-ttl=168h
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/cannaiq-frontend
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: cannaiq/Dockerfile
context: cannaiq
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
depends_on: []
when:
branch: [master, develop]
branch: master
event: push
docker-findadispo:
image: gcr.io/kaniko-project/executor:debug
commands:
- /kaniko/executor
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findadispo/frontend
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findadispo/frontend/Dockerfile
--destination=registry.spdy.io/cannaiq/findadispo:latest
--destination=registry.spdy.io/cannaiq/findadispo:sha-${CI_COMMIT_SHA:0:8}
--cache=true
--cache-repo=registry.spdy.io/cannaiq/cache-findadispo
--cache-ttl=168h
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/findadispo-frontend
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: findadispo/frontend/Dockerfile
context: findadispo/frontend
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
depends_on: []
when:
branch: [master, develop]
branch: master
event: push
docker-findagram:
image: gcr.io/kaniko-project/executor:debug
commands:
- /kaniko/executor
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findagram/frontend
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findagram/frontend/Dockerfile
--destination=registry.spdy.io/cannaiq/findagram:latest
--destination=registry.spdy.io/cannaiq/findagram:sha-${CI_COMMIT_SHA:0:8}
--cache=true
--cache-repo=registry.spdy.io/cannaiq/cache-findagram
--cache-ttl=168h
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/findagram-frontend
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: findagram/frontend/Dockerfile
context: findagram/frontend
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
depends_on: []
when:
branch: [master, develop]
branch: master
event: push
# ===========================================
# DEPLOY: Pull from local registry
# STAGE 3: Deploy and Run Migrations
# ===========================================
deploy:
image: bitnami/kubectl:latest
environment:
K8S_TOKEN:
from_secret: k8s_token
KUBECONFIG_CONTENT:
from_secret: kubeconfig_data
commands:
- mkdir -p ~/.kube
- |
cat > ~/.kube/config << KUBEEOF
apiVersion: v1
kind: Config
clusters:
- cluster:
certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkakNDQVIyZ0F3SUJBZ0lCQURBS0JnZ3Foa2pPUFFRREFqQWpNU0V3SHdZRFZRUUREQmhyTTNNdGMyVnkKZG1WeUxXTmhRREUzTmpVM05UUTNPRE13SGhjTk1qVXhNakUwTWpNeU5qSXpXaGNOTXpVeE1qRXlNak15TmpJegpXakFqTVNFd0h3WURWUVFEREJock0zTXRjMlZ5ZG1WeUxXTmhRREUzTmpVM05UUTNPRE13V1RBVEJnY3Foa2pPClBRSUJCZ2dxaGtqT1BRTUJCd05DQUFRWDRNdFJRTW5lWVJVV0s2cjZ3VEV2WjAxNnV4T3NUR3JJZ013TXVnNGwKajQ1bHZ6ZkM1WE1NY1pESnUxZ0t1dVJhVGxlb0xVOVJnSERIUUI4TUwzNTJvMEl3UURBT0JnTlZIUThCQWY4RQpCQU1DQXFRd0R3WURWUjBUQVFIL0JBVXdBd0VCL3pBZEJnTlZIUTRFRmdRVXIzNDZpNE42TFhzaEZsREhvSlU0CjJ1RjZseGN3Q2dZSUtvWkl6ajBFQXdJRFJ3QXdSQUlnVUtqdWRFQWJyS1JDVHROVXZTc1Rmb3FEaHFSeDM5MkYKTFFSVWlKK0hCVElDSUJqOFIxbG1zSnFSRkRHMEpwMGN4OG5ZZnFCaElRQzh6WWdRdTdBZmR4L3IKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
server: https://10.100.6.10:6443
name: spdy-k3s
contexts:
- context:
cluster: spdy-k3s
namespace: cannaiq
user: cannaiq-admin
name: cannaiq
current-context: cannaiq
users:
- name: cannaiq-admin
user:
token: $K8S_TOKEN
KUBEEOF
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
- chmod 600 ~/.kube/config
# Apply manifests to ensure probes and resource limits are set
- kubectl apply -f /woodpecker/src/git.spdy.io/Creationshop/cannaiq/k8s/scraper.yaml
- kubectl apply -f /woodpecker/src/git.spdy.io/Creationshop/cannaiq/k8s/scraper-worker.yaml
- kubectl set image deployment/scraper scraper=registry.spdy.io/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
- kubectl rollout status deployment/scraper -n cannaiq --timeout=300s
- kubectl set image deployment/scraper-worker worker=registry.spdy.io/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=registry.spdy.io/cannaiq/frontend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
- kubectl set image deployment/findadispo-frontend findadispo-frontend=registry.spdy.io/cannaiq/findadispo:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
- kubectl set image deployment/findagram-frontend findagram-frontend=registry.spdy.io/cannaiq/findagram:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
- kubectl rollout status deployment/cannaiq-frontend -n cannaiq --timeout=300s
# Deploy backend first
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
# Note: Migrations run automatically at startup via auto-migrate
# Deploy remaining services
# Resilience: ensure workers are scaled up if at 0
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
depends_on:
- docker-backend
- docker-cannaiq
- docker-findadispo
- docker-findagram
when:
branch: [master, develop]
branch: master
event: push

191
.woodpecker/ci.yml Normal file
View File

@@ -0,0 +1,191 @@
steps:
# ===========================================
# PR VALIDATION: Only typecheck changed projects
# ===========================================
typecheck-backend:
image: code.cannabrands.app/creationshop/node:20
commands:
- npm config set cache /npm-cache/backend --global
- cd backend
- npm ci --prefer-offline
- npx tsc --noEmit
volumes:
- npm-cache:/npm-cache
depends_on: []
when:
event: pull_request
path:
include: ['backend/**']
typecheck-cannaiq:
image: code.cannabrands.app/creationshop/node:20
commands:
- npm config set cache /npm-cache/cannaiq --global
- cd cannaiq
- npm ci --prefer-offline
- npx tsc --noEmit
volumes:
- npm-cache:/npm-cache
depends_on: []
when:
event: pull_request
path:
include: ['cannaiq/**']
# findadispo/findagram typechecks skipped - they have || true anyway
# ===========================================
# AUTO-MERGE: Merge PR after all checks pass
# ===========================================
auto-merge:
image: alpine:latest
environment:
GITEA_TOKEN:
from_secret: gitea_token
commands:
- apk add --no-cache curl
- |
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
curl -s -X POST \
-H "Authorization: token $GITEA_TOKEN" \
-H "Content-Type: application/json" \
-d '{"Do":"merge"}' \
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
depends_on:
- typecheck-backend
- typecheck-cannaiq
when:
event: pull_request
# ===========================================
# MASTER DEPLOY: Parallel Docker builds
# ===========================================
docker-backend:
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/dispensary-scraper
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: backend/Dockerfile
context: backend
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
build_args:
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
APP_GIT_SHA: ${CI_COMMIT_SHA}
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
depends_on: []
when:
branch: master
event: push
docker-cannaiq:
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/cannaiq-frontend
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: cannaiq/Dockerfile
context: cannaiq
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
depends_on: []
when:
branch: master
event: push
docker-findadispo:
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/findadispo-frontend
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: findadispo/frontend/Dockerfile
context: findadispo/frontend
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
depends_on: []
when:
branch: master
event: push
docker-findagram:
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/findagram-frontend
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: findagram/frontend/Dockerfile
context: findagram/frontend
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
depends_on: []
when:
branch: master
event: push
# ===========================================
# STAGE 3: Deploy and Run Migrations
# ===========================================
deploy:
image: bitnami/kubectl:latest
environment:
KUBECONFIG_CONTENT:
from_secret: kubeconfig_data
commands:
- mkdir -p ~/.kube
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
- chmod 600 ~/.kube/config
# Deploy backend first
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
# Note: Migrations run automatically at startup via auto-migrate
# Deploy remaining services
# Resilience: ensure workers are scaled up if at 0
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
depends_on:
- docker-backend
- docker-cannaiq
- docker-findadispo
- docker-findagram
when:
branch: master
event: push

300
CLAUDE.md
View File

@@ -1,8 +1,5 @@
# Claude Guidelines for CannaiQ
## CURRENT ENVIRONMENT: PRODUCTION
**We are working in PRODUCTION only.** All database queries and API calls should target the remote production environment, not localhost. Use kubectl port-forward or remote DB connections as needed.
## PERMANENT RULES (NEVER VIOLATE)
### 1. NO DELETE
@@ -20,72 +17,6 @@ Never deploy unless user explicitly says: "CLAUDE — DEPLOYMENT IS NOW AUTHORIZ
### 5. DB POOL ONLY
Never import `src/db/migrate.ts` at runtime. Use `src/db/pool.ts` for DB access.
### 6. CI/CD DEPLOYMENT — BATCH CHANGES, PUSH ONCE
**Never manually deploy or check deployment status.** The project uses Woodpecker CI.
**CRITICAL: Each CI build takes 30 minutes. NEVER push incrementally.**
**Workflow:**
1. Make ALL related code changes first
2. Test locally if possible (./setup-local.sh)
3. ONE commit with all changes
4. ONE push to master
5. **STOP** - CI handles the rest
6. Wait for user to confirm deployment worked
**DO NOT:**
- Push multiple small commits (each triggers 30-min build)
- Run `kubectl rollout status` to check deployment
- Run `kubectl logs` to verify new code is running
- Manually restart pods
- Check CI pipeline status
Batch everything, push once, wait for user feedback.
### 7. K8S — DEPLOY AND FORGET
**DO NOT run kubectl commands.** The system is self-managing.
**Operational Model:**
```
┌─────────────────────────────────────────────────────────┐
│ DEPLOY ONCE → WORKERS RUN FOREVER → CREATE TASKS ONLY │
└─────────────────────────────────────────────────────────┘
1. CI deploys code changes (automatic on push)
2. K8s maintains 8 pods (self-healing)
3. Workers poll DB for tasks (autonomous)
4. Create tasks via API or DB → workers pick them up
5. Never touch K8s directly
```
**Fixed Configuration (NEVER CHANGE):**
- **8 replicas** — locked in `k8s/scraper-worker.yaml`
- **MAX_CONCURRENT_TASKS=3** — 3 browsers per pod (memory safe)
- **Total capacity:** 8 pods × 3 = 24 concurrent tasks
**DO NOT:**
- Run `kubectl` commands (scale, rollout, logs, get pods, etc.)
- Manually restart pods
- Change replica count
- Check deployment status
**To interact with the system:**
- Create tasks in DB → workers pick them up automatically
- Check task status via DB queries or API
- View worker status via dashboard (cannaiq.co)
**Why no kubectl?**
- K8s auto-restarts crashed pods
- Workers self-heal (reconnect to DB, retry failed tasks)
- No manual intervention needed in steady state
- Only CI touches K8s (on code deployments)
**Scaling Decision:**
- Monitor pool drain rate via dashboard/DB queries
- If pool drains too slowly, manually increase replicas in `k8s/scraper-worker.yaml`
- Commit + push → CI deploys new replica count
- No runtime kubectl scaling — all changes via code
---
## Quick Reference
@@ -272,216 +203,57 @@ All other browsers are filtered out. Uses `intoli/user-agents` library for reali
These binaries mimic real browser TLS fingerprints to avoid detection.
### Evomi Residential Proxy API
Workers use Evomi's residential proxy API for geo-targeted proxies on-demand.
**Priority Order**:
1. Evomi API (if EVOMI_USER/EVOMI_PASS configured)
2. DB proxies (fallback if Evomi not configured)
**Environment Variables**:
| Variable | Description | Default |
|----------|-------------|---------|
| `EVOMI_USER` | API username | - |
| `EVOMI_PASS` | API key | - |
| `EVOMI_HOST` | Proxy host | `rpc.evomi.com` |
| `EVOMI_PORT` | Proxy port | `1000` |
**K8s Secret**: Credentials stored in `scraper-secrets`:
```bash
kubectl get secret scraper-secrets -n cannaiq -o jsonpath='{.data.EVOMI_PASS}' | base64 -d
```
**Proxy URL Format**: `http://{user}_{session}_{geo}:{pass}@{host}:{port}`
- `session`: Worker ID for sticky sessions
- `geo`: State code (e.g., `arizona`, `california`)
**Files**:
- `src/services/crawl-rotator.ts` - `getEvomiConfig()`, `buildEvomiProxyUrl()`
- `src/tasks/task-worker.ts` - Proxy initialization order
---
## Bulk Task Workflow (Updated 2025-12-13)
## Worker Architecture (Kubernetes)
### Overview
Tasks are created with `scheduled_for = NOW()` by default. Worker-level controls handle pacing - no task-level staggering needed.
### Persistent Workers (StatefulSet)
### How It Works
```
1. Task created with scheduled_for = NOW()
2. Worker claims task only when scheduled_for <= NOW()
3. Worker runs preflight on EVERY task claim (proxy health check)
4. If preflight passes, worker executes task
5. If preflight fails, task released back to pending for another worker
6. Worker finishes task, polls for next available task
7. Repeat - preflight runs on each new task claim
```
Workers run as a **StatefulSet** with 8 persistent pods. They maintain identity across restarts.
### Worker-Level Throttling
These controls pace task execution - no staggering at task creation time:
**Pod Names**: `scraper-worker-0` through `scraper-worker-7`
| Control | Purpose |
|---------|---------|
| `MAX_CONCURRENT_TASKS` | Limits concurrent tasks per pod (default: 3) |
| Working hours | Restricts when tasks run (configurable per schedule) |
| Preflight checks | Ensures proxy health before each task |
| Per-store locking | Only one active task per dispensary |
**Key Properties**:
- `updateStrategy: OnDelete` - Pods only update when manually deleted (no automatic restarts)
- `podManagementPolicy: Parallel` - All pods start simultaneously
- Workers register with their pod name as identity
### Key Points
- **Preflight is per-task, not per-startup**: Each task claim triggers a new preflight check
- **Worker controls pacing**: Tasks scheduled for NOW() but claimed based on worker capacity
- **Optional staggering**: Pass `stagger_seconds > 0` if you need explicit delays
**K8s Manifest**: `backend/k8s/scraper-worker-statefulset.yaml`
### API Endpoints
```bash
# Create bulk tasks for specific dispensary IDs
POST /api/tasks/batch/staggered
{
"dispensary_ids": [1, 2, 3, 4],
"role": "product_refresh", # or "product_discovery"
"stagger_seconds": 0, # default: 0 (all NOW)
"platform": "dutchie", # default: "dutchie"
"method": null # "curl" | "http" | null
}
### Worker Lifecycle
# Create bulk tasks for all stores in a state
POST /api/tasks/crawl-state/:stateCode
{
"stagger_seconds": 0, # default: 0 (all NOW)
"method": "http" # default: "http"
}
```
1. **Startup**: Worker registers in `worker_registry` table with pod name
2. **Preflight**: Runs dual-transport preflights (curl + http), reports IPs and fingerprint
3. **Task Loop**: Polls for tasks, executes them, reports status
4. **Shutdown**: Graceful 60-second termination period
### Example: Tasks for AZ Stores
```bash
curl -X POST http://localhost:3010/api/tasks/crawl-state/AZ \
-H "Content-Type: application/json"
```
### NEVER Restart Workers Unnecessarily
### Related Files
| File | Purpose |
|------|---------|
| `src/tasks/task-service.ts` | `createStaggeredTasks()` method |
| `src/routes/tasks.ts` | API endpoints for batch task creation |
| `src/tasks/task-worker.ts` | Worker task claiming and preflight logic |
**Claude must NOT**:
- Restart workers unless explicitly requested
- Use `kubectl rollout restart` on workers
- Use `kubectl set image` on workers (this triggers restart)
---
**To update worker code** (only when user authorizes):
1. Build and push new image with version tag
2. Update StatefulSet image reference
3. Manually delete pods one at a time when ready: `kubectl delete pod scraper-worker-0 -n dispensary-scraper`
## Wasabi S3 Storage (Payload Archive)
### Worker Registry API
Raw crawl payloads are archived to Wasabi S3 for long-term storage and potential reprocessing.
**Endpoint**: `GET /api/worker-registry/workers`
### Configuration
| Variable | Description | Default |
|----------|-------------|---------|
| `WASABI_ACCESS_KEY` | Wasabi access key ID | - |
| `WASABI_SECRET_KEY` | Wasabi secret access key | - |
| `WASABI_BUCKET` | Bucket name | `cannaiq` |
| `WASABI_REGION` | Wasabi region | `us-west-2` |
| `WASABI_ENDPOINT` | S3 endpoint URL | `https://s3.us-west-2.wasabisys.com` |
### Storage Path Format
```
payloads/{state}/{YYYY-MM-DD}/{dispensary_id}/{platform}_{timestamp}.json.gz
```
Example: `payloads/AZ/2025-12-16/123/dutchie_2025-12-16T10-30-00-000Z.json.gz`
### Features
- **Gzip compression**: ~70% size reduction on JSON payloads
- **Automatic archival**: Every crawl is archived (not just daily baselines)
- **Metadata**: taskId, productCount, platform stored with each object
- **Graceful fallback**: If Wasabi not configured, archival is skipped (no task failure)
### Files
| File | Purpose |
|------|---------|
| `src/services/wasabi-storage.ts` | S3 client and storage functions |
| `src/tasks/handlers/product-discovery-dutchie.ts` | Archives Dutchie payloads |
| `src/tasks/handlers/product-discovery-jane.ts` | Archives Jane payloads |
| `src/tasks/handlers/product-discovery-treez.ts` | Archives Treez payloads |
### K8s Secret Setup
```bash
kubectl patch secret scraper-secrets -n cannaiq -p '{"stringData":{
"WASABI_ACCESS_KEY": "<access-key>",
"WASABI_SECRET_KEY": "<secret-key>"
}}'
```
### Usage in Code
```typescript
import { storePayload, getPayload, listPayloads } from '../services/wasabi-storage';
// Store a payload
const result = await storePayload(dispensaryId, 'AZ', 'dutchie', rawPayload);
console.log(result.path); // payloads/AZ/2025-12-16/123/dutchie_...
console.log(result.compressedBytes); // Size after gzip
// Retrieve a payload
const payload = await getPayload(result.path);
// List payloads for a store on a date
const paths = await listPayloads(123, 'AZ', '2025-12-16');
```
### Estimated Storage
- ~100KB per crawl (compressed)
- ~200 stores × 12 crawls/day = 240MB/day
- ~7.2GB/month
- 5TB capacity = ~5+ years of storage
---
## Real-Time Inventory Tracking
High-frequency crawling for sales velocity and inventory analytics.
### Crawl Intervals
| State | Interval | Jitter | Effective Range |
|-------|----------|--------|-----------------|
| AZ | 5 min | ±3 min | 2-8 min |
| Others | 60 min | ±3 min | 57-63 min |
### Delta-Only Snapshots
Only store inventory changes, not full state. Reduces storage by ~95%.
**Change Types**:
- `sale`: quantity decreased (qty_delta < 0)
- `restock`: quantity increased (qty_delta > 0)
- `price_change`: price changed, quantity same
- `oos`: went out of stock (qty → 0)
- `back_in_stock`: returned to stock (0 → qty)
- `new_product`: first time seeing product
### Revenue Calculation
```
revenue = ABS(qty_delta) × effective_price
effective_price = sale_price if on_special else regular_price
```
### Key Views
| View | Purpose |
|------|---------|
| `v_hourly_sales` | Sales aggregated by hour |
| `v_daily_store_sales` | Daily revenue by store |
| `v_daily_brand_sales` | Daily brand performance |
| `v_product_velocity` | Hot/steady/slow/stale rankings |
| `v_stock_out_prediction` | Days until OOS based on velocity |
| `v_brand_variants` | SKU counts per brand |
### Files
| File | Purpose |
|------|---------|
| `src/services/inventory-snapshots.ts` | Delta calculation and storage |
| `src/services/task-scheduler.ts` | High-frequency scheduling with jitter |
| `migrations/125_delta_only_snapshots.sql` | Delta columns and views |
| `migrations/126_az_high_frequency.sql` | AZ 5-min intervals |
**Response Fields**:
| Field | Description |
|-------|-------------|
| `pod_name` | Kubernetes pod name |
| `worker_id` | Internal worker UUID |
| `status` | active, idle, offline |
| `curl_ip` | IP from curl preflight |
| `http_ip` | IP from Puppeteer preflight |
| `preflight_status` | pending, passed, failed |
| `preflight_at` | Timestamp of last preflight |
| `fingerprint_data` | Browser fingerprint JSON |
---

View File

@@ -1,33 +1,17 @@
# Build stage
# Image: git.spdy.io/creationshop/dispensary-scraper
FROM node:22-slim AS builder
# Install build tools for native modules (bcrypt, sharp)
RUN apt-get update && apt-get install -y \
python3 \
build-essential \
--no-install-recommends \
&& rm -rf /var/lib/apt/lists/*
# Image: code.cannabrands.app/creationshop/dispensary-scraper
FROM code.cannabrands.app/creationshop/node:20-slim AS builder
WORKDIR /app
COPY package*.json ./
# Install dependencies with retry and fallback registry
RUN npm config set fetch-retries 3 && \
npm config set fetch-retry-mintimeout 20000 && \
npm config set fetch-retry-maxtimeout 120000 && \
npm install || \
(npm config set registry https://registry.npmmirror.com && npm install)
RUN npm install
COPY . .
RUN npm run build
# Prune dev dependencies for smaller production image
RUN npm prune --production
# Production stage
FROM node:22-slim
FROM code.cannabrands.app/creationshop/node:20-slim
# Build arguments for version info
ARG APP_BUILD_VERSION=dev
@@ -60,7 +44,8 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
WORKDIR /app
COPY package*.json ./
COPY --from=builder /app/node_modules ./node_modules
RUN npm install --omit=dev
COPY --from=builder /app/dist ./dist
# Copy migrations for auto-migrate on startup

View File

@@ -0,0 +1,175 @@
# API Security Documentation
This document describes the authentication and authorization configuration for all CannaiQ API endpoints.
## Authentication Methods
### 1. Trusted Origins (No Token Required)
Requests from trusted sources are automatically authenticated with `internal` role:
**Trusted IPs:**
- `127.0.0.1` (localhost IPv4)
- `::1` (localhost IPv6)
- `::ffff:127.0.0.1` (IPv4-mapped IPv6)
**Trusted Domains:**
- `https://cannaiq.co`
- `https://www.cannaiq.co`
- `https://findadispo.com`
- `https://www.findadispo.com`
- `https://findagram.co`
- `https://www.findagram.co`
- `http://localhost:3010`
- `http://localhost:8080`
- `http://localhost:5173`
**Trusted Patterns:**
- `*.cannabrands.app`
- `*.cannaiq.co`
**Internal Header:**
- `X-Internal-Request` header matching `INTERNAL_REQUEST_SECRET` env var
### 2. Bearer Token Authentication
External requests must include a valid token:
```
Authorization: Bearer <token>
```
**Token Types:**
- **JWT Token**: User session tokens (7-day expiry)
- **API Token**: Long-lived tokens for integrations (stored in `api_tokens` table)
## Authorization Levels
### Public (No Auth)
Routes accessible without authentication:
- `GET /health` - Health check
- `GET /api/health/*` - Comprehensive health endpoints
- `GET /outbound-ip` - Server's outbound IP
- `GET /api/v1/deals` - Public deals endpoint
### Authenticated (Trusted Origin or Token)
Routes requiring authentication but no specific role:
| Route | Description |
|-------|-------------|
| `/api/payloads/*` | Raw crawl payload access |
| `/api/workers/*` | Worker monitoring |
| `/api/worker-registry/*` | Worker registration and heartbeats |
| `/api/stores/*` | Store CRUD |
| `/api/products/*` | Product listing |
| `/api/dispensaries/*` | Dispensary data |
### Admin Only (Requires `admin` or `superadmin` role)
Routes restricted to administrators:
| Route | Description |
|-------|-------------|
| `/api/job-queue/*` | Job queue management |
| `/api/k8s/*` | Kubernetes control (scaling) |
| `/api/pipeline/*` | Pipeline stage transitions |
| `/api/tasks/*` | Task queue management |
| `/api/admin/orchestrator/*` | Orchestrator dashboard |
| `/api/admin/trusted-origins/*` | Manage trusted origins |
| `/api/admin/debug/*` | Debug endpoints |
**Note:** The `internal` role (localhost/trusted origins) bypasses role checks, granting automatic admin access for local development and internal services.
## Endpoint Security Matrix
| Endpoint Group | Auth Required | Role Required | Notes |
|----------------|---------------|---------------|-------|
| `/api/payloads/*` | Yes | None | Query API for raw crawl data |
| `/api/job-queue/*` | Yes | admin | Legacy job queue (deprecated) |
| `/api/workers/*` | Yes | None | Worker status monitoring |
| `/api/worker-registry/*` | Yes | None | Workers register via trusted IPs |
| `/api/k8s/*` | Yes | admin | K8s scaling controls |
| `/api/pipeline/*` | Yes | admin | Store pipeline transitions |
| `/api/tasks/*` | Yes | admin | Task queue CRUD |
| `/api/admin/orchestrator/*` | Yes | admin | Orchestrator metrics/alerts |
| `/api/admin/trusted-origins/*` | Yes | admin | Auth bypass management |
| `/api/v1/*` | Varies | Varies | Public API (per-endpoint) |
| `/api/consumer/*` | Varies | Varies | Consumer features |
## Implementation Details
### Middleware Stack
```typescript
// Authentication middleware - validates token or trusted origin
import { authMiddleware } from '../auth/middleware';
// Role requirement middleware - checks user role
import { requireRole } from '../auth/middleware';
// Usage in route files:
router.use(authMiddleware); // All routes need auth
router.use(requireRole('admin', 'superadmin')); // Admin-only routes
```
### Auth Middleware Flow
```
Request → Check Bearer Token
├─ Valid JWT → Set user from token → Continue
├─ Valid API Token → Set user as api_token role → Continue
└─ No Token → Check Trusted Origin
├─ Trusted → Set user as internal role → Continue
└─ Not Trusted → 401 Unauthorized
```
### Role Check Flow
```
Request → authMiddleware → requireRole('admin')
├─ role === 'internal' → Continue (bypass)
├─ role in ['admin', 'superadmin'] → Continue
└─ else → 403 Forbidden
```
## Worker Pod Authentication
Worker pods (in Kubernetes) authenticate via:
1. **Internal IP**: Pods communicate via cluster IPs, which are trusted
2. **Internal Header**: Optional `X-Internal-Request` header for explicit trust
Endpoints used by workers:
- `POST /api/worker-registry/register` - Report for duty
- `POST /api/worker-registry/heartbeat` - Stay alive
- `POST /api/worker-registry/deregister` - Graceful shutdown
- `POST /api/worker-registry/task-completed` - Report task completion
## API Token Management
API tokens are managed via:
- `GET /api/api-tokens` - List tokens
- `POST /api/api-tokens` - Create token
- `DELETE /api/api-tokens/:id` - Revoke token
Token properties:
- `token`: The bearer token value
- `name`: Human-readable identifier
- `rate_limit`: Requests per minute
- `expires_at`: Optional expiration
- `active`: Enable/disable toggle
- `allowed_endpoints`: Optional endpoint restrictions
## Security Best Practices
1. **Never expose tokens in URLs** - Use Authorization header
2. **Use HTTPS in production** - All traffic encrypted
3. **Rotate API tokens periodically** - Set expiration dates
4. **Monitor rate limits** - Prevent abuse
5. **Audit access logs** - Track API usage via `api_usage_logs` table
## Related Files
- `src/auth/middleware.ts` - Auth middleware implementation
- `src/routes/api-tokens.ts` - Token management endpoints
- `src/middleware/apiTokenTracker.ts` - Usage tracking
- `src/middleware/trustedDomains.ts` - Domain trust markers

View File

@@ -99,60 +99,10 @@ src/scraper-v2/*.ts # Entire directory deprecated
|------|---------|--------|
| `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
| `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
| `src/tasks/handlers/entry-point-discovery.ts` | Resolve platform IDs (auto-healing) | **PRIMARY** |
| `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs (legacy) | LEGACY |
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs | ACTIVE |
| `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
---
## Transport Rules (CRITICAL)
**Browser-based (Puppeteer) is the DEFAULT transport. curl is ONLY allowed when explicitly specified.**
### Transport Selection
| `task.method` | Transport Used | Notes |
|---------------|----------------|-------|
| `null` | Browser (Puppeteer) | DEFAULT - use this for most tasks |
| `'http'` | Browser (Puppeteer) | Explicit browser request |
| `'curl'` | curl-impersonate | ONLY when explicitly needed |
### Why Browser-First?
1. **Anti-detection**: Puppeteer with StealthPlugin evades bot detection
2. **Session cookies**: Browser maintains session state automatically
3. **Fingerprinting**: Real browser fingerprint (TLS, headers, etc.)
4. **Age gates**: Browser can click through age verification
### Entry Point Discovery Auto-Healing
The `entry_point_discovery` handler uses a healing strategy:
```
1. FIRST: Check dutchie_discovery_locations for existing platform_location_id
- By linked dutchie_discovery_id
- By slug match in discovery data
→ If found, NO network call needed
2. SECOND: Browser-based GraphQL (Puppeteer)
- 5x retries for network/proxy failures
- On HTTP 403: rotate proxy and retry
- On HTTP 404 after 2 attempts: mark as 'removed'
3. HARD FAILURE: After exhausting options → 'needs_investigation'
```
### DO NOT Use curl Unless:
- Task explicitly has `method = 'curl'`
- You're testing curl-impersonate binaries
- The API explicitly requires curl fingerprinting
### Files
| File | Transport | Purpose |
|------|-----------|---------|
| `src/services/puppeteer-preflight.ts` | Browser | Preflight check |
| `src/services/curl-preflight.ts` | curl | Preflight check |
| `src/tasks/handlers/entry-point-discovery.ts` | Browser | Platform ID resolution |
| `src/tasks/handlers/payload-fetch.ts` | Both | Product fetching |
### Database
| File | Purpose | Status |
|------|---------|--------|

View File

@@ -1,343 +0,0 @@
# CannaiQ Query API
Query raw crawl payload data with flexible filters, sorting, and aggregation.
## Base URL
```
https://cannaiq.co/api/payloads
```
## Authentication
Include your API key in the header:
```
X-API-Key: your-api-key
```
---
## Endpoints
### 1. Query Products
Filter and search products from a store's latest crawl data.
```
GET /api/payloads/store/{dispensaryId}/query
```
#### Query Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `brand` | string | Filter by brand name (partial match) |
| `category` | string | Filter by category (flower, vape, edible, etc.) |
| `subcategory` | string | Filter by subcategory |
| `strain_type` | string | Filter by strain (indica, sativa, hybrid, cbd) |
| `in_stock` | boolean | Filter by stock status (true/false) |
| `price_min` | number | Minimum price |
| `price_max` | number | Maximum price |
| `thc_min` | number | Minimum THC percentage |
| `thc_max` | number | Maximum THC percentage |
| `search` | string | Search product name (partial match) |
| `fields` | string | Comma-separated fields to return |
| `limit` | number | Max results (default 100, max 1000) |
| `offset` | number | Skip results for pagination |
| `sort` | string | Sort by: name, price, thc, brand |
| `order` | string | Sort order: asc, desc |
#### Available Fields
When using `fields` parameter, you can request:
- `id` - Product ID
- `name` - Product name
- `brand` - Brand name
- `category` - Product category
- `subcategory` - Product subcategory
- `strain_type` - Indica/Sativa/Hybrid/CBD
- `price` - Current price
- `price_med` - Medical price
- `price_rec` - Recreational price
- `thc` - THC percentage
- `cbd` - CBD percentage
- `weight` - Product weight/size
- `status` - Stock status
- `in_stock` - Boolean in-stock flag
- `image_url` - Product image
- `description` - Product description
#### Examples
**Get all flower products under $40:**
```
GET /api/payloads/store/112/query?category=flower&price_max=40
```
**Search for "Blue Dream" with high THC:**
```
GET /api/payloads/store/112/query?search=blue+dream&thc_min=20
```
**Get only name and price for Alien Labs products:**
```
GET /api/payloads/store/112/query?brand=Alien+Labs&fields=name,price,thc
```
**Get top 10 highest THC products:**
```
GET /api/payloads/store/112/query?sort=thc&order=desc&limit=10
```
**Paginate through in-stock products:**
```
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=0
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=50
```
#### Response
```json
{
"success": true,
"dispensaryId": 112,
"payloadId": 45,
"fetchedAt": "2025-12-11T10:30:00Z",
"query": {
"filters": {
"brand": "Alien Labs",
"category": null,
"price_max": null
},
"sort": "price",
"order": "asc",
"limit": 100,
"offset": 0
},
"pagination": {
"total": 15,
"returned": 15,
"limit": 100,
"offset": 0,
"has_more": false
},
"products": [
{
"id": "507f1f77bcf86cd799439011",
"name": "Alien Labs - Baklava 3.5g",
"brand": "Alien Labs",
"category": "flower",
"strain_type": "hybrid",
"price": 55,
"thc": "28.5",
"in_stock": true
}
]
}
```
---
### 2. Aggregate Data
Group products and calculate metrics.
```
GET /api/payloads/store/{dispensaryId}/aggregate
```
#### Query Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `group_by` | string | **Required.** Field to group by: brand, category, subcategory, strain_type |
| `metrics` | string | Comma-separated metrics (default: count) |
#### Available Metrics
- `count` - Number of products
- `avg_price` - Average price
- `min_price` - Lowest price
- `max_price` - Highest price
- `avg_thc` - Average THC percentage
- `in_stock_count` - Number of in-stock products
#### Examples
**Count products by brand:**
```
GET /api/payloads/store/112/aggregate?group_by=brand
```
**Get price stats by category:**
```
GET /api/payloads/store/112/aggregate?group_by=category&metrics=count,avg_price,min_price,max_price
```
**Get THC averages by strain type:**
```
GET /api/payloads/store/112/aggregate?group_by=strain_type&metrics=count,avg_thc
```
**Brand analysis with stock info:**
```
GET /api/payloads/store/112/aggregate?group_by=brand&metrics=count,avg_price,in_stock_count
```
#### Response
```json
{
"success": true,
"dispensaryId": 112,
"payloadId": 45,
"fetchedAt": "2025-12-11T10:30:00Z",
"groupBy": "brand",
"metrics": ["count", "avg_price"],
"totalProducts": 450,
"groupCount": 85,
"aggregations": [
{
"brand": "Alien Labs",
"count": 15,
"avg_price": 52.33
},
{
"brand": "Connected",
"count": 12,
"avg_price": 48.50
}
]
}
```
---
### 3. Compare Stores (Price Comparison)
Query the same data from multiple stores and compare in your app:
```javascript
// Get flower prices from Store A
const storeA = await fetch('/api/payloads/store/112/query?category=flower&fields=name,brand,price');
// Get flower prices from Store B
const storeB = await fetch('/api/payloads/store/115/query?category=flower&fields=name,brand,price');
// Compare in your app
const dataA = await storeA.json();
const dataB = await storeB.json();
// Find matching products and compare prices
```
---
### 4. Price History
For historical price data, use the snapshots endpoint:
```
GET /api/v1/products/{productId}/history?days=30
```
Or compare payloads over time:
```
GET /api/payloads/store/{dispensaryId}/diff?from={payloadId1}&to={payloadId2}
```
The diff endpoint shows:
- Products added
- Products removed
- Price changes
- Stock changes
---
### 5. List Stores
Get available dispensaries to query:
```
GET /api/stores
```
Returns all stores with their IDs, names, and locations.
---
## Use Cases
### Price Comparison App
```javascript
// 1. Get stores in Arizona
const stores = await fetch('/api/stores?state=AZ').then(r => r.json());
// 2. Query flower prices from each store
const prices = await Promise.all(
stores.map(store =>
fetch(`/api/payloads/store/${store.id}/query?category=flower&fields=name,brand,price`)
.then(r => r.json())
)
);
// 3. Build comparison matrix in your app
```
### Brand Analytics Dashboard
```javascript
// Get brand presence across stores
const brandData = await Promise.all(
storeIds.map(id =>
fetch(`/api/payloads/store/${id}/aggregate?group_by=brand&metrics=count,avg_price`)
.then(r => r.json())
)
);
// Aggregate brand presence across all stores
```
### Deal Finder
```javascript
// Find high-THC flower under $30
const deals = await fetch(
'/api/payloads/store/112/query?category=flower&price_max=30&thc_min=20&in_stock=true&sort=thc&order=desc'
).then(r => r.json());
```
### Inventory Tracker
```javascript
// Get products that went out of stock
const diff = await fetch('/api/payloads/store/112/diff').then(r => r.json());
const outOfStock = diff.details.stockChanges.filter(
p => p.newStatus !== 'Active'
);
```
---
## Rate Limits
- Default: 100 requests/minute per API key
- Contact support for higher limits
## Error Responses
```json
{
"success": false,
"error": "Error message here"
}
```
Common errors:
- `404` - Store or payload not found
- `400` - Missing required parameter
- `401` - Invalid or missing API key
- `429` - Rate limit exceeded

View File

@@ -504,103 +504,6 @@ The Workers Dashboard shows:
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
## Browser Task Memory Limits (Updated 2025-12)
Browser-based tasks (Puppeteer/Chrome) have strict memory constraints that limit concurrency.
### Why Browser Tasks Are Different
Each browser task launches a Chrome process. Unlike I/O-bound API calls, browsers consume significant RAM:
| Component | RAM Usage |
|-----------|-----------|
| Node.js runtime | ~150 MB |
| Chrome browser (base) | ~200-250 MB |
| Dutchie menu page (loaded) | ~100-150 MB |
| **Per browser total** | **~350-450 MB** |
### Memory Math for Pod Limits
```
Pod memory limit: 2 GB (2000 MB)
Node.js runtime: -150 MB
Safety buffer: -100 MB
────────────────────────────────
Available for browsers: 1750 MB
Per browser + page: ~400 MB
Max browsers: 1750 ÷ 400 = ~4 browsers
Recommended: 3 browsers (leaves headroom for spikes)
```
### MAX_CONCURRENT_TASKS for Browser Tasks
| Browsers per Pod | RAM Used | Risk Level |
|------------------|----------|------------|
| 1 | ~500 MB | Very safe |
| 2 | ~900 MB | Safe |
| **3** | **~1.3 GB** | **Recommended** |
| 4 | ~1.7 GB | Tight (may OOM) |
| 5+ | >2 GB | Will OOM crash |
**CRITICAL**: `MAX_CONCURRENT_TASKS=3` is the maximum safe value for browser tasks with current pod limits.
### Scaling Strategy
Scale **horizontally** (more pods) rather than vertically (more concurrency per pod):
```
┌─────────────────────────────────────────────────────────────────────────┐
│ Cluster: 8 pods × 3 browsers = 24 concurrent tasks │
│ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Pod 0 │ │ Pod 1 │ │ Pod 2 │ │ Pod 3 │ │
│ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Pod 4 │ │ Pod 5 │ │ Pod 6 │ │ Pod 7 │ │
│ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
└─────────────────────────────────────────────────────────────────────────┘
```
### Browser Lifecycle Per Task
Each task gets a fresh browser with fresh IP/identity:
```
1. Claim task from queue
2. Get fresh proxy from pool
3. Launch browser with proxy
4. Run preflight (verify IP)
5. Execute scrape
6. Close browser
7. Repeat
```
This ensures:
- Fresh IP per task (proxy rotation)
- Fresh fingerprint per task (UA rotation)
- No cookie/session bleed between tasks
- Predictable memory usage
### Increasing Capacity
To handle more concurrent tasks:
1. **Add more pods** (up to 8 per CLAUDE.md limit)
2. **Increase pod memory** (allows 4 browsers per pod):
```yaml
resources:
limits:
memory: "2.5Gi" # from 2Gi
```
**DO NOT** simply increase `MAX_CONCURRENT_TASKS` without also increasing pod memory limits.
## Monitoring
### Logs

View File

@@ -2,7 +2,7 @@ apiVersion: v1
kind: Service
metadata:
name: scraper-worker
namespace: cannaiq
namespace: dispensary-scraper
labels:
app: scraper-worker
spec:
@@ -17,7 +17,7 @@ apiVersion: apps/v1
kind: StatefulSet
metadata:
name: scraper-worker
namespace: cannaiq
namespace: dispensary-scraper
spec:
serviceName: scraper-worker
replicas: 8
@@ -37,7 +37,7 @@ spec:
- name: regcred
containers:
- name: worker
image: git.spdy.io/creationshop/cannaiq:latest
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
imagePullPolicy: Always
command: ["node"]
args: ["dist/tasks/task-worker.js"]

View File

@@ -1,59 +0,0 @@
-- Migration 085: Trusted Origins Management
-- Allows admin to manage trusted IPs and domains via UI instead of hardcoded values
-- Trusted origins table (IPs and domains that bypass API key auth)
CREATE TABLE IF NOT EXISTS trusted_origins (
id SERIAL PRIMARY KEY,
-- Origin type: 'ip', 'domain', 'pattern'
origin_type VARCHAR(20) NOT NULL CHECK (origin_type IN ('ip', 'domain', 'pattern')),
-- The actual value
-- For ip: '127.0.0.1', '::1', '192.168.1.0/24'
-- For domain: 'cannaiq.co', 'findadispo.com'
-- For pattern: '^https://.*\.cannabrands\.app$' (regex)
origin_value VARCHAR(255) NOT NULL,
-- Description for admin reference
description TEXT,
-- Active flag
active BOOLEAN DEFAULT true,
-- Audit
created_at TIMESTAMPTZ DEFAULT NOW(),
created_by INTEGER REFERENCES users(id),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(origin_type, origin_value)
);
-- Index for quick lookups
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active ON trusted_origins(active) WHERE active = true;
CREATE INDEX IF NOT EXISTS idx_trusted_origins_type ON trusted_origins(origin_type, active);
-- Seed with current hardcoded values
INSERT INTO trusted_origins (origin_type, origin_value, description) VALUES
-- Trusted IPs (localhost)
('ip', '127.0.0.1', 'Localhost IPv4'),
('ip', '::1', 'Localhost IPv6'),
('ip', '::ffff:127.0.0.1', 'Localhost IPv4-mapped IPv6'),
-- Trusted domains
('domain', 'cannaiq.co', 'CannaiQ production'),
('domain', 'www.cannaiq.co', 'CannaiQ production (www)'),
('domain', 'findadispo.com', 'FindADispo production'),
('domain', 'www.findadispo.com', 'FindADispo production (www)'),
('domain', 'findagram.co', 'Findagram production'),
('domain', 'www.findagram.co', 'Findagram production (www)'),
('domain', 'localhost:3010', 'Local backend dev'),
('domain', 'localhost:8080', 'Local admin dev'),
('domain', 'localhost:5173', 'Local Vite dev'),
-- Pattern-based (regex)
('pattern', '^https://.*\.cannabrands\.app$', 'All cannabrands.app subdomains'),
('pattern', '^https://.*\.cannaiq\.co$', 'All cannaiq.co subdomains')
ON CONFLICT (origin_type, origin_value) DO NOTHING;
-- Add comment
COMMENT ON TABLE trusted_origins IS 'IPs and domains that bypass API key authentication. Managed via /admin.';

View File

@@ -1,10 +0,0 @@
-- Migration 086: Add proxy_url column for alternative URL formats
-- Some proxy providers use non-standard URL formats (e.g., host:port:user:pass)
-- This column allows storing the raw URL directly
-- Add proxy_url column - if set, used directly instead of constructing from parts
ALTER TABLE proxies
ADD COLUMN IF NOT EXISTS proxy_url TEXT;
-- Add comment
COMMENT ON COLUMN proxies.proxy_url IS 'Raw proxy URL (if provider uses non-standard format). Takes precedence over constructed URL from host/port/user/pass.';

View File

@@ -1,30 +0,0 @@
-- Migration 088: Extend raw_crawl_payloads for discovery payloads
--
-- Enables saving raw store data from Dutchie discovery crawls.
-- Store discovery returns raw dispensary objects - save them for historical analysis.
-- Add payload_type to distinguish product crawls from discovery crawls
ALTER TABLE raw_crawl_payloads
ADD COLUMN IF NOT EXISTS payload_type VARCHAR(32) NOT NULL DEFAULT 'product';
-- Add state_code for discovery payloads (null for product payloads)
ALTER TABLE raw_crawl_payloads
ADD COLUMN IF NOT EXISTS state_code VARCHAR(10);
-- Add store_count for discovery payloads (alternative to product_count)
ALTER TABLE raw_crawl_payloads
ADD COLUMN IF NOT EXISTS store_count INTEGER;
-- Make dispensary_id nullable for discovery payloads
ALTER TABLE raw_crawl_payloads
ALTER COLUMN dispensary_id DROP NOT NULL;
-- Add index for discovery payload queries
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_type_state
ON raw_crawl_payloads(payload_type, state_code)
WHERE payload_type = 'store_discovery';
-- Comments
COMMENT ON COLUMN raw_crawl_payloads.payload_type IS 'Type: product (default), store_discovery';
COMMENT ON COLUMN raw_crawl_payloads.state_code IS 'State code for discovery payloads (e.g., AZ, MI)';
COMMENT ON COLUMN raw_crawl_payloads.store_count IS 'Number of stores in discovery payload';

View File

@@ -1,105 +0,0 @@
-- Migration 089: Immutable Schedules with Per-State Product Discovery
--
-- Key changes:
-- 1. Add is_immutable column - schedules can be edited but not deleted
-- 2. Add method column - all tasks use 'http' (Puppeteer transport)
-- 3. Store discovery weekly (168h)
-- 4. Per-state product_discovery schedules (4h default)
-- 5. Remove old payload_fetch schedules
-- =====================================================
-- 1) Add new columns to task_schedules
-- =====================================================
ALTER TABLE task_schedules
ADD COLUMN IF NOT EXISTS is_immutable BOOLEAN DEFAULT FALSE;
ALTER TABLE task_schedules
ADD COLUMN IF NOT EXISTS method VARCHAR(10) DEFAULT 'http';
-- =====================================================
-- 2) Update store_discovery to weekly and immutable
-- =====================================================
UPDATE task_schedules
SET interval_hours = 168, -- 7 days
is_immutable = TRUE,
method = 'http',
description = 'Discover new Dutchie stores weekly (HTTP transport)'
WHERE name IN ('store_discovery_dutchie', 'Store Discovery');
-- Insert if doesn't exist
INSERT INTO task_schedules (name, role, interval_hours, priority, description, is_immutable, method, platform, next_run_at)
VALUES ('Store Discovery', 'store_discovery', 168, 5, 'Discover new Dutchie stores weekly (HTTP transport)', TRUE, 'http', 'dutchie', NOW())
ON CONFLICT (name) DO UPDATE SET
interval_hours = 168,
is_immutable = TRUE,
method = 'http',
description = 'Discover new Dutchie stores weekly (HTTP transport)';
-- =====================================================
-- 3) Remove old payload_fetch and product_refresh_all schedules
-- =====================================================
DELETE FROM task_schedules WHERE name IN ('payload_fetch_all', 'product_refresh_all');
-- =====================================================
-- 4) Create per-state product_discovery schedules
-- =====================================================
-- One schedule per state that has dispensaries with active cannabis programs
INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
SELECT
'product_discovery_' || lower(s.code) AS name,
'product_discovery' AS role,
s.code AS state_code,
4 AS interval_hours, -- 4 hours default, editable
10 AS priority,
'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
TRUE AS is_immutable, -- Can edit but not delete
'http' AS method,
CASE WHEN s.is_active THEN TRUE ELSE FALSE END AS enabled,
-- Stagger start times: each state starts 5 minutes after the previous
NOW() + (ROW_NUMBER() OVER (ORDER BY s.code) * INTERVAL '5 minutes') AS next_run_at
FROM states s
WHERE EXISTS (
SELECT 1 FROM dispensaries d
WHERE d.state_id = s.id AND d.crawl_enabled = true
)
ON CONFLICT (name) DO UPDATE SET
is_immutable = TRUE,
method = 'http',
description = EXCLUDED.description;
-- Also create schedules for states that might have stores discovered later
INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
SELECT
'product_discovery_' || lower(s.code) AS name,
'product_discovery' AS role,
s.code AS state_code,
4 AS interval_hours,
10 AS priority,
'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
TRUE AS is_immutable,
'http' AS method,
FALSE AS enabled, -- Disabled until stores exist
NOW() + INTERVAL '1 hour'
FROM states s
WHERE NOT EXISTS (
SELECT 1 FROM task_schedules ts WHERE ts.name = 'product_discovery_' || lower(s.code)
)
ON CONFLICT (name) DO NOTHING;
-- =====================================================
-- 5) Make analytics_refresh immutable
-- =====================================================
UPDATE task_schedules
SET is_immutable = TRUE, method = 'http'
WHERE name = 'analytics_refresh';
-- =====================================================
-- 6) Add index for schedule lookups
-- =====================================================
CREATE INDEX IF NOT EXISTS idx_task_schedules_state_code
ON task_schedules(state_code)
WHERE state_code IS NOT NULL;
-- Comments
COMMENT ON COLUMN task_schedules.is_immutable IS 'If TRUE, schedule cannot be deleted (only edited)';
COMMENT ON COLUMN task_schedules.method IS 'Transport method: http (Puppeteer/browser) or curl (axios)';

View File

@@ -1,66 +0,0 @@
-- Migration 090: Add modification tracking columns
--
-- Tracks when records were last modified and by which task.
-- Enables debugging, auditing, and understanding data freshness.
--
-- Columns added:
-- last_modified_at - When the record was last modified by a task
-- last_modified_by_task - Which task role modified it (e.g., 'product_refresh')
-- last_modified_task_id - The specific task ID that modified it
-- ============================================================
-- dispensaries table
-- ============================================================
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
-- Index for querying recently modified records
CREATE INDEX IF NOT EXISTS idx_dispensaries_last_modified
ON dispensaries(last_modified_at DESC)
WHERE last_modified_at IS NOT NULL;
-- Index for querying by task type
CREATE INDEX IF NOT EXISTS idx_dispensaries_modified_by_task
ON dispensaries(last_modified_by_task)
WHERE last_modified_by_task IS NOT NULL;
COMMENT ON COLUMN dispensaries.last_modified_at IS 'Timestamp when this record was last modified by a task';
COMMENT ON COLUMN dispensaries.last_modified_by_task IS 'Task role that last modified this record (e.g., store_discovery_state, entry_point_discovery)';
COMMENT ON COLUMN dispensaries.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';
-- ============================================================
-- store_products table
-- ============================================================
ALTER TABLE store_products
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
ALTER TABLE store_products
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
ALTER TABLE store_products
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
-- Index for querying recently modified products
CREATE INDEX IF NOT EXISTS idx_store_products_last_modified
ON store_products(last_modified_at DESC)
WHERE last_modified_at IS NOT NULL;
-- Index for querying by task type
CREATE INDEX IF NOT EXISTS idx_store_products_modified_by_task
ON store_products(last_modified_by_task)
WHERE last_modified_by_task IS NOT NULL;
-- Composite index for finding products modified by a specific task
CREATE INDEX IF NOT EXISTS idx_store_products_task_modified
ON store_products(dispensary_id, last_modified_at DESC)
WHERE last_modified_at IS NOT NULL;
COMMENT ON COLUMN store_products.last_modified_at IS 'Timestamp when this record was last modified by a task';
COMMENT ON COLUMN store_products.last_modified_by_task IS 'Task role that last modified this record (e.g., product_refresh, product_discovery)';
COMMENT ON COLUMN store_products.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';

View File

@@ -1,26 +0,0 @@
-- Migration 091: Add store discovery tracking columns
-- Per auto-healing scheme (2025-12-12):
-- Track when store_discovery last updated each dispensary
-- Track when last payload was saved
-- Add last_store_discovery_at to track when store_discovery updated this record
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_store_discovery_at TIMESTAMPTZ;
-- Add last_payload_at to track when last product payload was saved
-- (Complements last_fetch_at which tracks API fetch time)
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_payload_at TIMESTAMPTZ;
-- Add index for finding stale discovery data
CREATE INDEX IF NOT EXISTS idx_dispensaries_store_discovery_at
ON dispensaries (last_store_discovery_at DESC NULLS LAST)
WHERE crawl_enabled = true;
-- Add index for finding dispensaries without recent payloads
CREATE INDEX IF NOT EXISTS idx_dispensaries_payload_at
ON dispensaries (last_payload_at DESC NULLS LAST)
WHERE crawl_enabled = true;
COMMENT ON COLUMN dispensaries.last_store_discovery_at IS 'When store_discovery task last updated this record';
COMMENT ON COLUMN dispensaries.last_payload_at IS 'When last product payload was saved for this dispensary';

View File

@@ -1,30 +0,0 @@
-- Fix 3 Trulieve/Harvest stores with incorrect menu URLs
-- These records have NULL or mismatched platform_dispensary_id so store_discovery
-- ON CONFLICT can't update them automatically
UPDATE dispensaries
SET
menu_url = 'https://dutchie.com/dispensary/svaccha-llc-nirvana-center-apache-junction',
updated_at = NOW()
WHERE id = 224;
UPDATE dispensaries
SET
menu_url = 'https://dutchie.com/dispensary/trulieve-of-phoenix-tatum',
updated_at = NOW()
WHERE id = 76;
UPDATE dispensaries
SET
menu_url = 'https://dutchie.com/dispensary/harvest-of-havasu',
updated_at = NOW()
WHERE id = 403;
-- Queue entry_point_discovery tasks to resolve their platform_dispensary_id
-- method='http' ensures only workers that passed http preflight can claim these
INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for, method)
VALUES
('entry_point_discovery', 224, 5, NOW(), 'http'),
('entry_point_discovery', 76, 5, NOW(), 'http'),
('entry_point_discovery', 403, 5, NOW(), 'http')
ON CONFLICT DO NOTHING;

View File

@@ -1,35 +0,0 @@
-- Migration 092: Store Intelligence Cache
-- Pre-computed store intelligence data refreshed by analytics_refresh task
-- Eliminates costly aggregation queries on /intelligence/stores endpoint
CREATE TABLE IF NOT EXISTS store_intelligence_cache (
dispensary_id INTEGER PRIMARY KEY REFERENCES dispensaries(id) ON DELETE CASCADE,
-- Basic counts
sku_count INTEGER NOT NULL DEFAULT 0,
brand_count INTEGER NOT NULL DEFAULT 0,
snapshot_count INTEGER NOT NULL DEFAULT 0,
-- Pricing
avg_price_rec NUMERIC(10,2),
avg_price_med NUMERIC(10,2),
min_price NUMERIC(10,2),
max_price NUMERIC(10,2),
-- Category breakdown (JSONB for flexibility)
category_counts JSONB DEFAULT '{}',
-- Timestamps
last_crawl_at TIMESTAMPTZ,
last_refresh_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Metadata
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Index for fast lookups
CREATE INDEX IF NOT EXISTS idx_store_intelligence_cache_refresh
ON store_intelligence_cache (last_refresh_at DESC);
COMMENT ON TABLE store_intelligence_cache IS 'Pre-computed store intelligence metrics, refreshed by analytics_refresh task';
COMMENT ON COLUMN store_intelligence_cache.category_counts IS 'JSON object mapping category_raw to product count';

View File

@@ -1,43 +0,0 @@
-- Migration: 093_fix_mv_state_metrics.sql
-- Purpose: Fix mv_state_metrics to use brand_name_raw and show correct store counts
-- Issues fixed:
-- 1. unique_brands used brand_id (often NULL), now uses brand_name_raw
-- 2. Added out_of_stock_products column
-- 3. dispensary_count now correctly named
-- Drop and recreate the materialized view with correct definition
DROP MATERIALIZED VIEW IF EXISTS mv_state_metrics;
CREATE MATERIALIZED VIEW mv_state_metrics AS
SELECT
d.state,
s.name AS state_name,
COUNT(DISTINCT d.id) AS dispensary_count,
COUNT(DISTINCT CASE WHEN d.menu_type = 'dutchie' THEN d.id END) AS dutchie_stores,
COUNT(DISTINCT CASE WHEN d.crawl_enabled = true THEN d.id END) AS active_stores,
COUNT(sp.id) AS total_products,
COUNT(CASE WHEN COALESCE(sp.is_in_stock, true) THEN sp.id END) AS in_stock_products,
COUNT(CASE WHEN sp.is_in_stock = false THEN sp.id END) AS out_of_stock_products,
COUNT(CASE WHEN sp.is_on_special THEN sp.id END) AS on_special_products,
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != '') AS unique_brands,
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS unique_categories,
ROUND(AVG(sp.price_rec) FILTER (WHERE sp.price_rec > 0)::NUMERIC, 2) AS avg_price_rec,
MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) AS min_price_rec,
MAX(sp.price_rec) FILTER (WHERE sp.price_rec > 0) AS max_price_rec,
NOW() AS refreshed_at
FROM dispensaries d
LEFT JOIN states s ON d.state = s.code
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
WHERE d.state IS NOT NULL
GROUP BY d.state, s.name;
-- Create unique index for CONCURRENTLY refresh support
CREATE UNIQUE INDEX idx_mv_state_metrics_state ON mv_state_metrics(state);
-- Update refresh function
CREATE OR REPLACE FUNCTION refresh_state_metrics()
RETURNS void AS $$
BEGIN
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_state_metrics;
END;
$$ LANGUAGE plpgsql;

View File

@@ -1,516 +0,0 @@
-- Migration: Import 500 Evomi residential proxies
-- These are sticky-session rotating proxies where password contains session ID
-- Active is set to false - run Test All to verify and activate
-- First, drop the old unique constraint that doesn't account for username/password
ALTER TABLE proxies DROP CONSTRAINT IF EXISTS proxies_host_port_protocol_key;
-- Add new unique constraint that includes username and password
-- This allows multiple entries for the same host:port with different credentials (sessions)
ALTER TABLE proxies ADD CONSTRAINT proxies_host_port_protocol_username_password_key
UNIQUE(host, port, protocol, username, password);
-- Now insert all 500 proxies
INSERT INTO proxies (host, port, protocol, username, password, active, max_connections)
VALUES
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4XRRPF1UQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5UNGX7N7K', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9PSKYP1GU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GZBKKYL2S', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YHJHM0XZU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ESDYQ34CJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GAXUMFKQI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2FF66K4CI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SUYM0R49B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A8VHZMEFP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WNRLH6NXR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SPSB3IUX6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-85N76UU5Q', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-189P3LH2F', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-47DQOAGWY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IBT0QO7M2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UPXOUOH8X', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BFQ1PH75D', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KNTFKRY1J', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5L8IG6DZX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9YE13X0BA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6KBHCHF0I', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CETHHFHZ6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A06J8ST3I', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YFS93P1YR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RB74B3R6C', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2JW27O3EU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCUX84BL0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1A2KSG6HO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4QW8ILV0E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0Q09GH2VL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-16BRXBCYC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9W02B3R4L', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CVAEH76YT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CATOG0Q5I', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F81625L74', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DO4AVTPK4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SBZPXORD5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JA1AWOX03', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0FUJTRSYT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CM1R2RSTB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EHPJZCK1S', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYLKORNAF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-05A8BUD25', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RHM1Q6O4M', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ES5VPCE6Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-P0JEGLP4O', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OC4AX88D0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3BN54IEBV', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ABSC7S550', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LNIJU6R2V', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OYGQPPCOV', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-32YBOHQWR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7KGEMK4SL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FAW8T2EBW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GPV69KI9T', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JPBHSN8M2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VZ1JQOF15', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7DJXXPK1E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JXKQ7JVZ1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-88Q5UQX3B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HAI5K0JFO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-65SUKG0QH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1XFJETX1F', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7ZNUCVCBW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O1DCK15LA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WLTEA65WB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCHAFNK2P', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6ODSZ6CUT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SZ8R2EFH4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9EPPYQREC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MPCBES7UI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FCCPL0XWZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GJ23UYEGI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RQT80689I', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TDQO2AP5E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D5Q5SEUEO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DZN4ZTENM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4HVQ33VK9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F1HJ7GPHA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RM708QD2Y', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-K36N27GM5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O73TS0DAE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-54QXRWEA8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1P6LP0365', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WMZ2ST34E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-175UYF58T', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-W0HTK6F28', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D5275CTIM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IH2IWVZOH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C4VFW7GSA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O9XGULSNA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PJ1W1P5L9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MQQU30KPC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BNPIBZTYV', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7BNRCH922', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5AZLU117B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3PPJ49VJC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FMC8CQO74', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VCHW23CXJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1S4749PCB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0T9DJFZPK', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-L0RMV65W3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FZ1ZZUQNA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6IFJD23DI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZKUEP5XM0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z8KU62CLT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LO77J78X1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-27FBKYRJ4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0TDQTESGW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IMKI89WQ1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ANS65MIJS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O3T2OTT0Y', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MWW6Z1QVM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TT47MX0BB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-59CFKTM14', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DOD61TVZN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RH9Y298WS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X98AATJ7B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C3UMES1W8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8O3J7G3PT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3K4OH78OJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N4A3JMVL1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HK1SRLAC9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y9VLJJXVU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KTTH7R0EC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JKVX01E8T', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HW2VPAHJO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7WZ9UHBH8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JTKFK0CP7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-G3F27NXG5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-K7I2JWYSP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CTUU8UQ0T', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ISHMAP6RQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LVWNZ1LHP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N5CQ1YG2Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XL2XY2SLZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UCRZVFIV1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VLGQFYNEL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YPCDM9O5Y', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R6VA2S25E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4W8X8BBUL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5INDC8M80', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q8RKKOF29', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B5ED3EFBC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8IC5ZXAX1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCGM25D75', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1MO06IRID', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4QWGUGN6W', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5T9M5KEHT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9KG7W7NZF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NYGN5R2CL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H61OXFCJ2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-30WSQ4EFH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-J36NG6MY2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TZU34ZA7A', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LPWNYL74G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DDJTXOS4Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HFOS4S185', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2MLGIFL1M', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CI5AHX0TC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WSXVCH1WN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8F0C3D06T', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3YZR0664F', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1L2VMWTM0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KPMCB57O7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N6QXQDZV3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-35FAYFWDP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TVZWE2JR8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0WK86IKLF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8WBU6ESHJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGU6UNM01', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-86CXNEQZC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NZ4LFCHE3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZKB6D72RF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BKXNG77NS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3MJ332POD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SL9VEYNJ0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LY8KO43Z8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8KGF1XR1L', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WT6FB54HW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7UQ9JMG5E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KX3L2040U', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HL809F9WU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-T9GU40ERH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I5O2NX3G9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RVOUYU3NO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2T3ETNUKS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SW0B93DZZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PQ55UF3K6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VNRWWHHJB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Q26FZ7EP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZWD9FA90J', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QSGMQX3RZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-83NZ9MEAC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q9QQ4AL37', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QBE9KD60Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NRNUXUO44', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8F0XKQ9P8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-095JV1CJN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WRRSIRUTZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DTUD7IDQI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ASCEAI9LD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YOUM7BJZH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PEG2ZH9J3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WAUW31F78', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GIBZ6U7AQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-63TD9LFBG', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0MH1N9MJB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YFP9RNQIK', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SW4N5162D', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-53MWFB2MP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QWLUKBMIN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JHS6QIX9G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6R04HZ5UD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OUJLT31VN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6BMKW933S', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R4GG84E4Q', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-00XAP630X', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AK97MC2A0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NBS2GKGO5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NVFEWK4S5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MTV3WSYS1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JS8RM4JGW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6NL4QR1XN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4BUUQVSN6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-56WEAAU3M', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WCA56PFTF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TK1QAZP0B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SYZ5ADFXP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-S3VLOUW6G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V2K1V1JWJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MZ6VHV5PQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DRZDQDPN3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-231VVRYYA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-06G3MC88G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WS52I2ZVD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QTNQD55U', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EX7ALECU3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DQN8TVQY6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FJT54OQFI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BLTYUF7QR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8DL2JXDSO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KBAOXIJ4Y', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYL28R5UW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NCRDA8LYB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BQYKXQLXU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PSHCS65MR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-90Y1WFVYZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4GG33NUPW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5Y0A79GED', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RMZHTAD6J', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XBSOJ5I36', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AAJW53VNE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9NYSPSEL6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-94WMY337S', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-35Y3BJQFW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R7WY3TMRC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RXAQVH0F3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EFQ2AVFSB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XPOUJSAVD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RSHPF5NTT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z9402336V', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OI36C5WOJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XEOGV1LVS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QIQDXG9NC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9IY242GGT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PQTEUT52E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-18NKI3WPS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-34U3QAA49', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-S05TYKBBF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B4J8WCWDD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HR377WC28', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PNRR7S1T2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UNR0N0KJ9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NARQQANBE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8PUL1MYUU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KJPCT1FP3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGC80N0AM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Y1JN8DH3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y56M31T07', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NHYHXQSV1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V30RZVG7L', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CR6V2GSOU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VSAF5O0LJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4F4BF2LFH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ERSMQHXNX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q0TFLZQWS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZXCS6SMHD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JHXYAUGRA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IT2XYWES2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-22UCD94OG', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VGDLQ3K35', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O8AFL8RGX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9RBIZ8G9X', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9JIU0SVBV', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PWRBG0GWU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZME1MX12T', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A7LWRKSJP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5XISX0HD4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5T6EXKD3Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-10ILV351B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FDULBZDIY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SFVR6I980', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FKV8DCZGT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ECRK3M3IZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WMKSLOF39', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HGE60O6AL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RGCWDJOT8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DESWK5KVN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RD593HJ92', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XWNCAO39B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AQ4XGDLX8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2ZOVEA1PL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JF4FUX83X', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CQ228GK3B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XCTMU9I7U', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-M3F37T22W', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ASZUXM9M9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CJVHX24WW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KZT4T898V', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RI128R5TE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HCAG6X9MJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XOQENWBP7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1LTQGM497', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZLVZT4O1G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FTIXTXCIA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O2YE6QNHY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0JPDDBF47', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H1FP1IFJI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FYBPBMY5B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F7BWDVC97', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MLENB1LQ4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FT9YNU8UP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5W21Q2O5L', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YM61QWPR3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XXFQJJHZM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H52YKCM9X', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NT56ZNZ54', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DRJY7BMB5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-P6886RPXX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PBXW2EY5K', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5VQCJTM36', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NMM3GGM1J', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1JQQ0CDSA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R89YI91K4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7L7L9MXOT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-50Z7MXKZS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EGADRZTIB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1DR7H46H6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O28QZL994', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EYTRWVERM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HAJZAUWJV', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AGYO3AB89', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V224329ZM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4YTMSFWYK', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QP40RL1N1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CB1BVAMAH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9VGXUY02O', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BCPVVKCZ3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VDC3CWZX7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7HWLI21FA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5QWIUJEFM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4C3PBMAIZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QC7DM7PH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A6R5G3FWV', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3A6WDE12Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0F2LZA9RU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGBJXMXRX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5YOGR8PQ1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LPBFBUF3N', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TUSPGR2AY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-G05I8M2FQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H5NDXJIAQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X8FJL8WQZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KIB2FQRUP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VNV0OYWR7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GKBPM3PB2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XVPI30KE7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y3PRMJP51', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KEPP5SBML', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0PDUZ6QEQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1GHWWFLLE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-149S2TO8O', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1ZB6FSIGE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VCRQTXDZL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-645JVC3XL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2HJ00JBSR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7FZDG2W65', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HD6ANE3LN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2HS1B1J8V', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IHOHYMDF5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYZMAFEKF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JO85WX5JE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RURJDCURW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FZC3BLXPJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B0YR2LOZ1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6ZFP58ZRK', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UMZDLHQ78', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8A2IHDXY3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EDYEPWUMT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X3TM99R12', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DLV0UTQ72', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SFU0ZYIM0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YAJ6A66NH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X8CFU41AU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CJ3Z4WP32', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UJBLRQKXA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-T78R8EBGH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DDIH55GNZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F1SSD4NWF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4BE55FKRD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BG2DFBL46', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MKVMNR7W4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C3Z4JUGU5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NVP8EEEGQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MQFWP2LU7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BH873JG6H', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3D76651SM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KZ7V6KWMP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CD8NEJFJN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PWXE9L30H', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1RT95F5LR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q7CEEROE5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q08APOAEG', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NNKREGLXE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YQEG33MKX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VRD9G7H5K', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-68R86GQ1G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BXZUKQL2M', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QM13UD73C', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I7OOGJLNS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GXDBO1IQJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JJZPRFMWN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DBTDFITGW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VYHL6ASIJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F61NNU332', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6Z9H72KMC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WVOONDMA9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CXTSTBXN3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CSMZLC921', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3FTBSARZJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ESHGKBXLY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-E0YLXW5H4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QFI6UMWE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-23VOWHO88', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-02Q9U5QCH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3POMNSMB0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NTT8OWUFQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MT5XEHJWX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ILDOY0PCQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MN9HU4DGO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1YOPU7GLL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZC5BM5MYB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UD3FXK3I9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LMDJOV52Y', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N45X16BSL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1CBY3Z7QC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F0D3AO9E6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YQA8GUOD1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2EE999233', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D6GD5WT2Y', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7DFBMLTMY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-J6TJKC6VJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2AWQ3ZRF4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4KOVIF5W3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3489SXI1U', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F37VKUHVE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GHBMAVCE4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-W64U46547', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1GUJV1MGQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-M13IOZVI9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TX7EVZN1Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2PTS2ML8J', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VTG83RVX7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2IOE6BR66', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I68XZMR23', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q940UN6MU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Y9NFR0N0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MYP341DZ8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WJ68VGKAZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-819MSDR9H', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-27CGND4VG', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YYDOD47BF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YU7F6J8G5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HMY16WTCA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FPWEBRLG2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FGE79X0DE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-551LMZ84R', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UWMBDCTX4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BNHQXW9HY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WB0P5LCN6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z4P9E1SVG', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UVW2G9IRN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OO93WVLB0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NTRIK82TG', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8TXV42S74', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z74LKL50G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QQEXNIPTR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WGK2VD34L', false, 1)
ON CONFLICT DO NOTHING;

View File

@@ -1,81 +0,0 @@
-- Migration: Auto-retry failed proxies after cooldown period
-- Proxies that fail will be retried after a configurable interval
-- Add last_failed_at column to track when proxy last failed
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS last_failed_at TIMESTAMP;
-- Add retry settings
INSERT INTO settings (key, value, description)
VALUES
('proxy_retry_interval_hours', '4', 'Hours to wait before retrying a failed proxy'),
('proxy_max_failures_before_permanent', '10', 'Max failures before proxy is permanently disabled')
ON CONFLICT (key) DO NOTHING;
-- Create function to get eligible proxies (active OR failed but past retry interval)
CREATE OR REPLACE FUNCTION get_eligible_proxy_ids()
RETURNS TABLE(proxy_id INT) AS $$
DECLARE
retry_hours INT;
BEGIN
-- Get retry interval from settings (default 4 hours)
SELECT COALESCE(value::int, 4) INTO retry_hours
FROM settings WHERE key = 'proxy_retry_interval_hours';
RETURN QUERY
SELECT p.id
FROM proxies p
WHERE p.active = true
OR (
p.active = false
AND p.last_failed_at IS NOT NULL
AND p.last_failed_at < NOW() - (retry_hours || ' hours')::interval
AND p.failure_count < 10 -- Don't retry if too many failures
)
ORDER BY
p.active DESC, -- Prefer active proxies
p.failure_count ASC, -- Then prefer proxies with fewer failures
RANDOM();
END;
$$ LANGUAGE plpgsql;
-- Create scheduled job to periodically re-enable proxies past their retry window
-- This runs every hour and marks proxies as active if they're past retry interval
CREATE OR REPLACE FUNCTION auto_reenable_proxies()
RETURNS INT AS $$
DECLARE
retry_hours INT;
max_failures INT;
reenabled_count INT;
BEGIN
-- Get settings
SELECT COALESCE(value::int, 4) INTO retry_hours
FROM settings WHERE key = 'proxy_retry_interval_hours';
SELECT COALESCE(value::int, 10) INTO max_failures
FROM settings WHERE key = 'proxy_max_failures_before_permanent';
-- Re-enable proxies that have cooled down
UPDATE proxies
SET active = true,
updated_at = NOW()
WHERE active = false
AND last_failed_at IS NOT NULL
AND last_failed_at < NOW() - (retry_hours || ' hours')::interval
AND failure_count < max_failures;
GET DIAGNOSTICS reenabled_count = ROW_COUNT;
IF reenabled_count > 0 THEN
RAISE NOTICE 'Auto-reenabled % proxies after % hour cooldown', reenabled_count, retry_hours;
END IF;
RETURN reenabled_count;
END;
$$ LANGUAGE plpgsql;
-- Add index for efficient querying
CREATE INDEX IF NOT EXISTS idx_proxies_retry
ON proxies(active, last_failed_at, failure_count);
COMMENT ON COLUMN proxies.last_failed_at IS 'Timestamp of last failure - used for auto-retry logic';
COMMENT ON FUNCTION auto_reenable_proxies() IS 'Call periodically to re-enable failed proxies that have cooled down';

View File

@@ -1,20 +0,0 @@
-- Migration: Add trigram indexes for fast ILIKE product searches
-- Enables fast searches on name_raw, brand_name_raw, and description
-- Enable pg_trgm extension if not already enabled
CREATE EXTENSION IF NOT EXISTS pg_trgm;
-- Create GIN trigram indexes for fast ILIKE searches
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_name_trgm
ON store_products USING gin (name_raw gin_trgm_ops);
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_brand_name_trgm
ON store_products USING gin (brand_name_raw gin_trgm_ops);
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_description_trgm
ON store_products USING gin (description gin_trgm_ops);
-- Add comment
COMMENT ON INDEX idx_store_products_name_trgm IS 'Trigram index for fast ILIKE searches on product name';
COMMENT ON INDEX idx_store_products_brand_name_trgm IS 'Trigram index for fast ILIKE searches on brand name';
COMMENT ON INDEX idx_store_products_description_trgm IS 'Trigram index for fast ILIKE searches on description';

View File

@@ -1,11 +0,0 @@
-- Migration: Add indexes for dashboard performance
-- Speeds up the tasks listing query with ORDER BY and JOIN
-- Index for JOIN with worker_registry
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_worker_tasks_worker_id
ON worker_tasks(worker_id)
WHERE worker_id IS NOT NULL;
-- Index for ORDER BY created_at DESC (dashboard listing)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_worker_tasks_created_at_desc
ON worker_tasks(created_at DESC);

View File

@@ -1,13 +0,0 @@
-- Migration: Add stage tracking columns to dispensaries table
-- Required for stage checkpoint feature in task handlers
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_successes INTEGER DEFAULT 0;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_failures INTEGER DEFAULT 0;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_successful_crawl_at TIMESTAMPTZ;
-- Indexes for finding stores by status
CREATE INDEX IF NOT EXISTS idx_dispensaries_consecutive_successes
ON dispensaries(consecutive_successes) WHERE consecutive_successes > 0;
CREATE INDEX IF NOT EXISTS idx_dispensaries_consecutive_failures
ON dispensaries(consecutive_failures) WHERE consecutive_failures > 0;

View File

@@ -1,68 +0,0 @@
-- Migration: 099_working_hours.sql
-- Description: Working hours profiles for natural traffic pattern simulation
-- Created: 2024-12-13
-- Working hours table: defines hourly activity weights to mimic natural traffic
CREATE TABLE IF NOT EXISTS working_hours (
id SERIAL PRIMARY KEY,
name VARCHAR(50) UNIQUE NOT NULL,
description TEXT,
-- Hour weights: {"0": 15, "1": 5, ..., "18": 100, ...}
-- Value = percent chance to trigger activity that hour (0-100)
hour_weights JSONB NOT NULL,
-- Day-of-week multipliers (0=Sunday, 6=Saturday)
-- Optional adjustment for weekend vs weekday patterns
dow_weights JSONB DEFAULT '{"0": 90, "1": 100, "2": 100, "3": 100, "4": 100, "5": 110, "6": 95}',
timezone VARCHAR(50) DEFAULT 'America/Phoenix',
enabled BOOLEAN DEFAULT true,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Seed: Natural traffic pattern based on internet usage research
-- Optimized for cannabis dispensary browsing (lunch + after-work peaks)
INSERT INTO working_hours (name, description, timezone, hour_weights) VALUES (
'natural_traffic',
'Mimics natural user browsing patterns - peaks at lunch and 5-7 PM',
'America/Phoenix',
'{
"0": 15,
"1": 5,
"2": 5,
"3": 5,
"4": 5,
"5": 10,
"6": 20,
"7": 30,
"8": 35,
"9": 45,
"10": 50,
"11": 60,
"12": 75,
"13": 65,
"14": 60,
"15": 70,
"16": 80,
"17": 95,
"18": 100,
"19": 100,
"20": 90,
"21": 70,
"22": 45,
"23": 25
}'::jsonb
) ON CONFLICT (name) DO UPDATE SET
hour_weights = EXCLUDED.hour_weights,
description = EXCLUDED.description,
updated_at = NOW();
-- Index for quick lookups
CREATE INDEX IF NOT EXISTS idx_working_hours_name ON working_hours(name);
CREATE INDEX IF NOT EXISTS idx_working_hours_enabled ON working_hours(enabled);
COMMENT ON TABLE working_hours IS 'Activity profiles for natural traffic simulation. Hour weights are percent chance (0-100) to trigger activity.';
COMMENT ON COLUMN working_hours.hour_weights IS 'JSON object mapping hour (0-23) to percent chance (0-100). 100 = always run, 0 = never run.';
COMMENT ON COLUMN working_hours.dow_weights IS 'Optional day-of-week multipliers. 0=Sunday. Applied as (hour_weight * dow_weight / 100).';

View File

@@ -1,19 +0,0 @@
-- Migration: 100_worker_timezone.sql
-- Description: Add timezone column to worker_registry for working hours support
-- Created: 2024-12-13
-- Add timezone column to worker_registry
-- Populated from preflight IP geolocation (e.g., 'America/New_York')
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
-- Add working_hours_id to link worker to a specific working hours profile
-- NULL means use default 'natural_traffic' profile
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS working_hours_id INTEGER REFERENCES working_hours(id);
-- Index for workers by timezone (useful for capacity planning)
CREATE INDEX IF NOT EXISTS idx_worker_registry_timezone ON worker_registry(timezone);
COMMENT ON COLUMN worker_registry.timezone IS 'IANA timezone from preflight IP geolocation (e.g., America/New_York)';
COMMENT ON COLUMN worker_registry.working_hours_id IS 'Reference to working_hours profile. NULL uses default natural_traffic.';

View File

@@ -1,78 +0,0 @@
-- Migration: 101_worker_preflight_timezone.sql
-- Description: Update update_worker_preflight to extract timezone from fingerprint
-- Created: 2024-12-13
CREATE OR REPLACE FUNCTION public.update_worker_preflight(
p_worker_id character varying,
p_transport character varying,
p_status character varying,
p_ip character varying DEFAULT NULL,
p_response_ms integer DEFAULT NULL,
p_error text DEFAULT NULL,
p_fingerprint jsonb DEFAULT NULL
)
RETURNS void
LANGUAGE plpgsql
AS $function$
DECLARE
v_curl_status VARCHAR(20);
v_http_status VARCHAR(20);
v_overall_status VARCHAR(20);
v_timezone VARCHAR(50);
BEGIN
IF p_transport = 'curl' THEN
UPDATE worker_registry
SET
preflight_curl_status = p_status,
preflight_curl_at = NOW(),
preflight_curl_ms = p_response_ms,
preflight_curl_error = p_error,
curl_ip = p_ip,
updated_at = NOW()
WHERE worker_id = p_worker_id;
ELSIF p_transport = 'http' THEN
-- Extract timezone from fingerprint JSON if present
v_timezone := p_fingerprint->>'detectedTimezone';
UPDATE worker_registry
SET
preflight_http_status = p_status,
preflight_http_at = NOW(),
preflight_http_ms = p_response_ms,
preflight_http_error = p_error,
http_ip = p_ip,
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
-- Save extracted timezone
timezone = COALESCE(v_timezone, timezone),
updated_at = NOW()
WHERE worker_id = p_worker_id;
END IF;
-- Update overall preflight status
SELECT preflight_curl_status, preflight_http_status
INTO v_curl_status, v_http_status
FROM worker_registry
WHERE worker_id = p_worker_id;
-- Compute overall status
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
v_overall_status := 'passed';
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
v_overall_status := 'partial';
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
v_overall_status := 'failed';
ELSE
v_overall_status := 'pending';
END IF;
UPDATE worker_registry
SET
preflight_status = v_overall_status,
preflight_at = NOW()
WHERE worker_id = p_worker_id;
END;
$function$;
COMMENT ON FUNCTION update_worker_preflight(varchar, varchar, varchar, varchar, integer, text, jsonb)
IS 'Updates worker preflight status and extracts timezone from fingerprint for working hours';

View File

@@ -1,114 +0,0 @@
-- Migration: 102_check_working_hours.sql
-- Description: Function to check if worker should be available based on working hours
-- Created: 2024-12-13
-- Function to check if a worker should be available for work
-- Returns TRUE if worker passes the probability check for current hour
-- Returns FALSE if worker should sleep/skip this cycle
CREATE OR REPLACE FUNCTION check_working_hours(
p_worker_id VARCHAR,
p_profile_name VARCHAR DEFAULT 'natural_traffic'
)
RETURNS TABLE (
is_available BOOLEAN,
current_hour INTEGER,
hour_weight INTEGER,
worker_timezone VARCHAR,
roll INTEGER,
reason TEXT
)
LANGUAGE plpgsql
AS $function$
DECLARE
v_timezone VARCHAR(50);
v_hour INTEGER;
v_weight INTEGER;
v_dow INTEGER;
v_dow_weight INTEGER;
v_final_weight INTEGER;
v_roll INTEGER;
v_hour_weights JSONB;
v_dow_weights JSONB;
v_profile_enabled BOOLEAN;
BEGIN
-- Get worker's timezone (from preflight)
SELECT wr.timezone INTO v_timezone
FROM worker_registry wr
WHERE wr.worker_id = p_worker_id;
-- Default to America/Phoenix if no timezone set
v_timezone := COALESCE(v_timezone, 'America/Phoenix');
-- Get current hour in worker's timezone
v_hour := EXTRACT(HOUR FROM NOW() AT TIME ZONE v_timezone)::INTEGER;
-- Get day of week (0=Sunday)
v_dow := EXTRACT(DOW FROM NOW() AT TIME ZONE v_timezone)::INTEGER;
-- Get working hours profile
SELECT wh.hour_weights, wh.dow_weights, wh.enabled
INTO v_hour_weights, v_dow_weights, v_profile_enabled
FROM working_hours wh
WHERE wh.name = p_profile_name AND wh.enabled = true;
-- If profile not found or disabled, always available
IF v_hour_weights IS NULL THEN
RETURN QUERY SELECT
TRUE::BOOLEAN,
v_hour,
100::INTEGER,
v_timezone,
0::INTEGER,
'Profile not found or disabled - defaulting to available'::TEXT;
RETURN;
END IF;
-- Get hour weight (default to 50 if hour not specified)
v_weight := COALESCE((v_hour_weights->>v_hour::TEXT)::INTEGER, 50);
-- Get day-of-week weight (default to 100)
v_dow_weight := COALESCE((v_dow_weights->>v_dow::TEXT)::INTEGER, 100);
-- Calculate final weight (hour_weight * dow_weight / 100)
v_final_weight := (v_weight * v_dow_weight / 100);
-- Roll the dice (0-99)
v_roll := floor(random() * 100)::INTEGER;
-- Return result
RETURN QUERY SELECT
(v_roll < v_final_weight)::BOOLEAN AS is_available,
v_hour AS current_hour,
v_final_weight AS hour_weight,
v_timezone AS worker_timezone,
v_roll AS roll,
CASE
WHEN v_roll < v_final_weight THEN
format('Available: rolled %s < %s%% threshold', v_roll, v_final_weight)
ELSE
format('Sleeping: rolled %s >= %s%% threshold', v_roll, v_final_weight)
END AS reason;
END;
$function$;
-- Simplified version that just returns boolean
CREATE OR REPLACE FUNCTION is_worker_available(
p_worker_id VARCHAR,
p_profile_name VARCHAR DEFAULT 'natural_traffic'
)
RETURNS BOOLEAN
LANGUAGE plpgsql
AS $function$
DECLARE
v_result BOOLEAN;
BEGIN
SELECT is_available INTO v_result
FROM check_working_hours(p_worker_id, p_profile_name);
RETURN COALESCE(v_result, TRUE);
END;
$function$;
COMMENT ON FUNCTION check_working_hours(VARCHAR, VARCHAR) IS
'Check if worker should be available based on working hours profile. Returns detailed info.';
COMMENT ON FUNCTION is_worker_available(VARCHAR, VARCHAR) IS
'Simple boolean check if worker passes working hours probability roll.';

View File

@@ -1,12 +0,0 @@
-- Migration: 103_schedule_dispensary_id.sql
-- Description: Add dispensary_id to task_schedules for per-store schedules
-- Created: 2025-12-13
-- Add dispensary_id column for single-store schedules
ALTER TABLE task_schedules
ADD COLUMN IF NOT EXISTS dispensary_id INTEGER REFERENCES dispensaries(id);
-- Index for quick lookups
CREATE INDEX IF NOT EXISTS idx_task_schedules_dispensary_id ON task_schedules(dispensary_id);
COMMENT ON COLUMN task_schedules.dispensary_id IS 'For single-store schedules. If set, only this store is refreshed. If NULL, uses state_code for all stores in state.';

View File

@@ -1,25 +0,0 @@
-- Migration 104: Add source tracking to worker_tasks
-- Purpose: Track WHERE tasks are created from (schedule vs API endpoint)
--
-- All automated task creation should be visible in task_schedules.
-- This column helps identify "phantom" tasks created outside the schedule system.
-- Add source column to worker_tasks
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS source VARCHAR(100);
-- Add source_id column (references schedule_id if from a schedule)
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS source_schedule_id INTEGER REFERENCES task_schedules(id);
-- Add request metadata (IP, user agent) for debugging
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS source_metadata JSONB;
-- Create index for querying by source
CREATE INDEX IF NOT EXISTS idx_worker_tasks_source ON worker_tasks(source);
-- Comment explaining source values
COMMENT ON COLUMN worker_tasks.source IS 'Task creation source: schedule, api_run_now, api_crawl_state, api_batch_staggered, api_batch_az_stores, task_chain, manual';
COMMENT ON COLUMN worker_tasks.source_schedule_id IS 'ID of the schedule that created this task (if source=schedule or source=api_run_now)';
COMMENT ON COLUMN worker_tasks.source_metadata IS 'Request metadata: {ip, user_agent, endpoint, timestamp}';

View File

@@ -1,25 +0,0 @@
-- Migration 105: Add indexes for dashboard performance
-- Purpose: Speed up the /dashboard and /national/summary endpoints
--
-- These queries were identified as slow:
-- 1. COUNT(*) FROM store_product_snapshots WHERE captured_at >= NOW() - INTERVAL '24 hours'
-- 2. National summary aggregate queries
-- Index for snapshot counts by time (used in dashboard)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_product_snapshots_captured_at
ON store_product_snapshots(captured_at DESC);
-- Index for crawl traces by time and success (used in dashboard)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_crawl_traces_started_success
ON crawl_orchestration_traces(started_at DESC, success);
-- Partial index for recent failed crawls (faster for dashboard alerts)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_crawl_traces_recent_failures
ON crawl_orchestration_traces(started_at DESC)
WHERE success = false;
-- Composite index for store_products aggregations by dispensary
-- Helps with national summary state metrics query
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_dispensary_brand
ON store_products(dispensary_id, brand_name_raw)
WHERE brand_name_raw IS NOT NULL;

View File

@@ -1,10 +0,0 @@
-- Migration: 106_rename_store_discovery_schedule.sql
-- Description: Rename store_discovery_dutchie to 'Store Discovery'
-- Created: 2025-12-13
-- Update the schedule name for better display
-- The platform='dutchie' field is preserved for badge display in UI
UPDATE task_schedules
SET name = 'Store Discovery',
updated_at = NOW()
WHERE name = 'store_discovery_dutchie';

View File

@@ -1,23 +0,0 @@
-- Migration: 107_proxy_tracking.sql
-- Description: Add proxy tracking columns to worker_tasks for geo-targeting visibility
-- Created: 2025-12-13
-- Add proxy tracking columns to worker_tasks
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS proxy_source VARCHAR(10);
-- Comments
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'IP address of proxy used for this task';
COMMENT ON COLUMN worker_tasks.proxy_geo IS 'Geo target used (e.g., "arizona", "phoenix, arizona")';
COMMENT ON COLUMN worker_tasks.proxy_source IS 'Source of proxy: "api" (Evomi dynamic) or "static" (fallback table)';
-- Index for proxy analysis
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip
ON worker_tasks(proxy_ip)
WHERE proxy_ip IS NOT NULL;

View File

@@ -1,231 +0,0 @@
-- Migration: 108_worker_geo_sessions.sql
-- Description: Add geo session tracking to worker_registry for state-based task assignment
-- Created: 2025-12-13
-- Worker geo session columns
-- Worker qualifies with a geo (state/city), then only claims tasks matching that geo
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS current_state VARCHAR(2);
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS current_city VARCHAR(100);
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS geo_session_started_at TIMESTAMPTZ;
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS session_task_count INT DEFAULT 0;
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS session_max_tasks INT DEFAULT 7;
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
-- Comments
COMMENT ON COLUMN worker_registry.current_state IS 'Worker''s current geo assignment (US state code, e.g., AZ)';
COMMENT ON COLUMN worker_registry.current_city IS 'Worker''s current city assignment (optional, e.g., phoenix)';
COMMENT ON COLUMN worker_registry.geo_session_started_at IS 'When worker''s current geo session started';
COMMENT ON COLUMN worker_registry.session_task_count IS 'Number of tasks completed in current geo session';
COMMENT ON COLUMN worker_registry.session_max_tasks IS 'Max tasks per geo session before re-qualification (default 7)';
COMMENT ON COLUMN worker_registry.proxy_geo IS 'Geo target string used for proxy (e.g., "arizona" or "phoenix, arizona")';
-- Index for finding workers by state
CREATE INDEX IF NOT EXISTS idx_worker_registry_current_state
ON worker_registry(current_state)
WHERE current_state IS NOT NULL;
-- ============================================================
-- UPDATED claim_task FUNCTION
-- Now filters by worker's geo session state
-- ============================================================
CREATE OR REPLACE FUNCTION claim_task(
p_role VARCHAR(50),
p_worker_id VARCHAR(100),
p_curl_passed BOOLEAN DEFAULT TRUE,
p_http_passed BOOLEAN DEFAULT FALSE
) RETURNS worker_tasks AS $$
DECLARE
claimed_task worker_tasks;
worker_state VARCHAR(2);
session_valid BOOLEAN;
session_tasks INT;
max_tasks INT;
BEGIN
-- Get worker's current geo session info
SELECT
current_state,
session_task_count,
session_max_tasks,
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
INTO worker_state, session_tasks, max_tasks, session_valid
FROM worker_registry
WHERE worker_id = p_worker_id;
-- If no valid geo session, or session exhausted, worker can't claim tasks
-- Worker must re-qualify first
IF worker_state IS NULL OR NOT session_valid OR session_tasks >= COALESCE(max_tasks, 7) THEN
RETURN NULL;
END IF;
-- Claim task matching worker's state
UPDATE worker_tasks
SET
status = 'claimed',
worker_id = p_worker_id,
claimed_at = NOW(),
updated_at = NOW()
WHERE id = (
SELECT wt.id FROM worker_tasks wt
JOIN dispensaries d ON wt.dispensary_id = d.id
WHERE wt.role = p_role
AND wt.status = 'pending'
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
-- GEO FILTER: Task's dispensary must match worker's state
AND d.state = worker_state
-- Method compatibility: worker must have passed the required preflight
AND (
wt.method IS NULL -- No preference, any worker can claim
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
OR (wt.method = 'http' AND p_http_passed = TRUE)
)
-- Exclude stores that already have an active task
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
SELECT dispensary_id FROM worker_tasks
WHERE status IN ('claimed', 'running')
AND dispensary_id IS NOT NULL
))
ORDER BY wt.priority DESC, wt.created_at ASC
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING * INTO claimed_task;
-- If task claimed, increment session task count
-- Note: Use claimed_task.id IS NOT NULL (not claimed_task IS NOT NULL)
-- PostgreSQL composite type NULL check quirk
IF claimed_task.id IS NOT NULL THEN
UPDATE worker_registry
SET session_task_count = session_task_count + 1
WHERE worker_id = p_worker_id;
END IF;
RETURN claimed_task;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: assign_worker_geo
-- Assigns a geo session to a worker based on demand
-- Returns the assigned state, or NULL if no tasks available
-- ============================================================
CREATE OR REPLACE FUNCTION assign_worker_geo(
p_worker_id VARCHAR(100)
) RETURNS VARCHAR(2) AS $$
DECLARE
assigned_state VARCHAR(2);
BEGIN
-- Find state with highest demand (pending tasks) and lowest coverage (workers)
SELECT d.state INTO assigned_state
FROM dispensaries d
JOIN worker_tasks wt ON wt.dispensary_id = d.id
LEFT JOIN worker_registry wr ON wr.current_state = d.state
AND wr.status = 'active'
AND wr.geo_session_started_at > NOW() - INTERVAL '60 minutes'
WHERE wt.status = 'pending'
AND d.platform_dispensary_id IS NOT NULL
GROUP BY d.state
ORDER BY
COUNT(wt.id) DESC, -- Most pending tasks first
COUNT(DISTINCT wr.worker_id) ASC -- Fewest workers second
LIMIT 1;
-- If no pending tasks anywhere, return NULL
IF assigned_state IS NULL THEN
RETURN NULL;
END IF;
-- Assign the state to this worker
UPDATE worker_registry
SET
current_state = assigned_state,
current_city = NULL, -- City assigned later if available
geo_session_started_at = NOW(),
session_task_count = 0
WHERE worker_id = p_worker_id;
RETURN assigned_state;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: check_worker_geo_session
-- Returns info about worker's current geo session
-- ============================================================
CREATE OR REPLACE FUNCTION check_worker_geo_session(
p_worker_id VARCHAR(100)
) RETURNS TABLE (
current_state VARCHAR(2),
current_city VARCHAR(100),
session_valid BOOLEAN,
session_tasks_remaining INT,
session_minutes_remaining INT
) AS $$
BEGIN
RETURN QUERY
SELECT
wr.current_state,
wr.current_city,
(wr.geo_session_started_at IS NOT NULL AND wr.geo_session_started_at > NOW() - INTERVAL '60 minutes') as session_valid,
GREATEST(0, wr.session_max_tasks - wr.session_task_count) as session_tasks_remaining,
GREATEST(0, EXTRACT(EPOCH FROM (wr.geo_session_started_at + INTERVAL '60 minutes' - NOW())) / 60)::INT as session_minutes_remaining
FROM worker_registry wr
WHERE wr.worker_id = p_worker_id;
END;
$$ LANGUAGE plpgsql;
-- View for worker thinness per state
-- Derives states from dispensaries table - no external states table dependency
CREATE OR REPLACE VIEW worker_state_capacity AS
WITH active_states AS (
-- Get unique states from dispensaries with valid platform IDs
SELECT DISTINCT state as code
FROM dispensaries
WHERE state IS NOT NULL
AND platform_dispensary_id IS NOT NULL
),
pending_by_state AS (
SELECT d.state, COUNT(*) as count
FROM worker_tasks t
JOIN dispensaries d ON t.dispensary_id = d.id
WHERE t.status = 'pending'
AND d.state IS NOT NULL
GROUP BY d.state
),
workers_by_state AS (
SELECT
current_state,
COUNT(*) as count,
SUM(GREATEST(0, session_max_tasks - session_task_count)) as remaining_capacity
FROM worker_registry
WHERE status IN ('active', 'idle') -- Include both active and idle workers
AND preflight_http_status = 'passed'
AND current_state IS NOT NULL
AND geo_session_started_at > NOW() - INTERVAL '60 minutes'
GROUP BY current_state
)
SELECT
s.code as state,
s.code as state_name, -- Use code as name since we don't have a states lookup table
COALESCE(p.count, 0) as pending_tasks,
COALESCE(w.count, 0) as workers_on_state,
COALESCE(w.remaining_capacity, 0) as remaining_capacity,
CASE
WHEN COALESCE(w.remaining_capacity, 0) = 0 AND COALESCE(p.count, 0) > 0 THEN 'no_coverage'
WHEN COALESCE(w.remaining_capacity, 0) < COALESCE(p.count, 0) THEN 'thin'
ELSE 'ok'
END as status
FROM active_states s
LEFT JOIN pending_by_state p ON p.state = s.code
LEFT JOIN workers_by_state w ON w.current_state = s.code
ORDER BY COALESCE(p.count, 0) DESC;

View File

@@ -1,354 +0,0 @@
-- Migration: 109_worker_identity_pool.sql
-- Description: Identity pool for diverse IP/fingerprint rotation
-- Created: 2025-12-14
--
-- Workers claim identities (IP + fingerprint) from pool.
-- Each identity used for 3-5 tasks, then cools down 2-3 hours.
-- This creates natural browsing patterns - same person doesn't hit 20 stores.
-- ============================================================
-- IDENTITY POOL TABLE
-- ============================================================
CREATE TABLE IF NOT EXISTS worker_identities (
id SERIAL PRIMARY KEY,
-- Evomi session controls the IP
session_id VARCHAR(100) UNIQUE NOT NULL,
-- Detected IP from this session
ip_address INET,
-- Geo targeting
state_code VARCHAR(2) NOT NULL,
city VARCHAR(100), -- City-level targeting for diversity
-- Fingerprint data (UA, timezone, locale, device, etc.)
fingerprint JSONB NOT NULL,
-- Timestamps
created_at TIMESTAMPTZ DEFAULT NOW(),
last_used_at TIMESTAMPTZ,
cooldown_until TIMESTAMPTZ, -- Can't reuse until this time
-- Usage stats
total_tasks_completed INT DEFAULT 0,
total_sessions INT DEFAULT 1, -- How many times this identity has been used
-- Current state
is_active BOOLEAN DEFAULT FALSE, -- Currently claimed by a worker
active_worker_id VARCHAR(100), -- Which worker has it
-- Health tracking
consecutive_failures INT DEFAULT 0,
is_healthy BOOLEAN DEFAULT TRUE -- Set false if IP gets blocked
);
-- Indexes for efficient lookups
CREATE INDEX IF NOT EXISTS idx_worker_identities_state_city
ON worker_identities(state_code, city);
CREATE INDEX IF NOT EXISTS idx_worker_identities_available
ON worker_identities(state_code, is_active, cooldown_until)
WHERE is_healthy = TRUE;
CREATE INDEX IF NOT EXISTS idx_worker_identities_cooldown
ON worker_identities(cooldown_until)
WHERE is_healthy = TRUE AND is_active = FALSE;
-- ============================================================
-- METRO AREA MAPPING
-- For fallback when exact city not available
-- ============================================================
CREATE TABLE IF NOT EXISTS metro_areas (
id SERIAL PRIMARY KEY,
metro_name VARCHAR(100) NOT NULL,
state_code VARCHAR(2) NOT NULL,
city VARCHAR(100) NOT NULL,
is_primary BOOLEAN DEFAULT FALSE, -- Primary city of the metro
UNIQUE(state_code, city)
);
-- Phoenix Metro Area
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
('Phoenix Metro', 'AZ', 'Phoenix', TRUE),
('Phoenix Metro', 'AZ', 'Mesa', FALSE),
('Phoenix Metro', 'AZ', 'Glendale', FALSE),
('Phoenix Metro', 'AZ', 'Tempe', FALSE),
('Phoenix Metro', 'AZ', 'Scottsdale', FALSE),
('Phoenix Metro', 'AZ', 'Chandler', FALSE),
('Phoenix Metro', 'AZ', 'Peoria', FALSE),
('Phoenix Metro', 'AZ', 'El Mirage', FALSE),
('Phoenix Metro', 'AZ', 'Tolleson', FALSE),
('Phoenix Metro', 'AZ', 'Sun City', FALSE),
('Phoenix Metro', 'AZ', 'Apache Junction', FALSE),
('Phoenix Metro', 'AZ', 'Cave Creek', FALSE),
('Phoenix Metro', 'AZ', 'Gilbert', FALSE),
('Phoenix Metro', 'AZ', 'Surprise', FALSE),
('Phoenix Metro', 'AZ', 'Avondale', FALSE),
('Phoenix Metro', 'AZ', 'Goodyear', FALSE),
('Phoenix Metro', 'AZ', 'Buckeye', FALSE),
('Phoenix Metro', 'AZ', 'Queen Creek', FALSE)
ON CONFLICT (state_code, city) DO NOTHING;
-- Tucson Metro Area
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
('Tucson Metro', 'AZ', 'Tucson', TRUE),
('Tucson Metro', 'AZ', 'Oro Valley', FALSE),
('Tucson Metro', 'AZ', 'Marana', FALSE),
('Tucson Metro', 'AZ', 'Sahuarita', FALSE),
('Tucson Metro', 'AZ', 'South Tucson', FALSE)
ON CONFLICT (state_code, city) DO NOTHING;
-- Flagstaff Area
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
('Flagstaff Area', 'AZ', 'Flagstaff', TRUE),
('Flagstaff Area', 'AZ', 'Sedona', FALSE)
ON CONFLICT (state_code, city) DO NOTHING;
-- Prescott Area
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
('Prescott Area', 'AZ', 'Prescott', TRUE),
('Prescott Area', 'AZ', 'Prescott Valley', FALSE)
ON CONFLICT (state_code, city) DO NOTHING;
-- ============================================================
-- FUNCTION: claim_identity
-- Claims an available identity for a worker
-- Tries: exact city -> metro area -> any in state -> create new
-- ============================================================
CREATE OR REPLACE FUNCTION claim_identity(
p_worker_id VARCHAR(100),
p_state_code VARCHAR(2),
p_city VARCHAR(100) DEFAULT NULL
) RETURNS worker_identities AS $$
DECLARE
claimed_identity worker_identities;
metro_name_val VARCHAR(100);
primary_city VARCHAR(100);
BEGIN
-- 1. Try exact city match (if city provided)
IF p_city IS NOT NULL THEN
UPDATE worker_identities
SET is_active = TRUE,
active_worker_id = p_worker_id,
last_used_at = NOW()
WHERE id = (
SELECT id FROM worker_identities
WHERE state_code = p_state_code
AND city = p_city
AND is_active = FALSE
AND is_healthy = TRUE
AND (cooldown_until IS NULL OR cooldown_until < NOW())
ORDER BY last_used_at ASC NULLS FIRST
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING * INTO claimed_identity;
IF claimed_identity.id IS NOT NULL THEN
RETURN claimed_identity;
END IF;
END IF;
-- 2. Try metro area fallback
IF p_city IS NOT NULL THEN
-- Find the metro area for this city
SELECT ma.metro_name INTO metro_name_val
FROM metro_areas ma
WHERE ma.state_code = p_state_code AND ma.city = p_city;
IF metro_name_val IS NOT NULL THEN
-- Get primary city of metro
SELECT ma.city INTO primary_city
FROM metro_areas ma
WHERE ma.metro_name = metro_name_val AND ma.is_primary = TRUE;
-- Try any city in same metro
UPDATE worker_identities wi
SET is_active = TRUE,
active_worker_id = p_worker_id,
last_used_at = NOW()
WHERE wi.id = (
SELECT wi2.id FROM worker_identities wi2
JOIN metro_areas ma ON wi2.city = ma.city AND wi2.state_code = ma.state_code
WHERE ma.metro_name = metro_name_val
AND wi2.is_active = FALSE
AND wi2.is_healthy = TRUE
AND (wi2.cooldown_until IS NULL OR wi2.cooldown_until < NOW())
ORDER BY wi2.last_used_at ASC NULLS FIRST
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING * INTO claimed_identity;
IF claimed_identity.id IS NOT NULL THEN
RETURN claimed_identity;
END IF;
END IF;
END IF;
-- 3. Try any identity in state
UPDATE worker_identities
SET is_active = TRUE,
active_worker_id = p_worker_id,
last_used_at = NOW()
WHERE id = (
SELECT id FROM worker_identities
WHERE state_code = p_state_code
AND is_active = FALSE
AND is_healthy = TRUE
AND (cooldown_until IS NULL OR cooldown_until < NOW())
ORDER BY last_used_at ASC NULLS FIRST
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING * INTO claimed_identity;
-- Return whatever we got (NULL if nothing available - caller should create new)
RETURN claimed_identity;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: release_identity
-- Releases an identity back to pool with cooldown
-- ============================================================
CREATE OR REPLACE FUNCTION release_identity(
p_identity_id INT,
p_tasks_completed INT DEFAULT 0,
p_failed BOOLEAN DEFAULT FALSE
) RETURNS VOID AS $$
DECLARE
cooldown_hours FLOAT;
BEGIN
-- Random cooldown between 2-3 hours for diversity
cooldown_hours := 2 + random(); -- 2.0 to 3.0 hours
UPDATE worker_identities
SET is_active = FALSE,
active_worker_id = NULL,
total_tasks_completed = total_tasks_completed + p_tasks_completed,
total_sessions = total_sessions + 1,
cooldown_until = NOW() + (cooldown_hours || ' hours')::INTERVAL,
consecutive_failures = CASE WHEN p_failed THEN consecutive_failures + 1 ELSE 0 END,
is_healthy = CASE WHEN consecutive_failures >= 3 THEN FALSE ELSE TRUE END
WHERE id = p_identity_id;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: get_pending_tasks_by_geo
-- Gets pending tasks grouped by state/city for identity assignment
-- ============================================================
CREATE OR REPLACE FUNCTION get_pending_tasks_by_geo(
p_limit INT DEFAULT 10
) RETURNS TABLE (
state_code VARCHAR(2),
city VARCHAR(100),
pending_count BIGINT,
available_identities BIGINT
) AS $$
BEGIN
RETURN QUERY
SELECT
d.state as state_code,
d.city,
COUNT(t.id) as pending_count,
(
SELECT COUNT(*) FROM worker_identities wi
WHERE wi.state_code = d.state
AND (wi.city = d.city OR wi.city IS NULL)
AND wi.is_active = FALSE
AND wi.is_healthy = TRUE
AND (wi.cooldown_until IS NULL OR wi.cooldown_until < NOW())
) as available_identities
FROM worker_tasks t
JOIN dispensaries d ON t.dispensary_id = d.id
WHERE t.status = 'pending'
AND d.state IS NOT NULL
GROUP BY d.state, d.city
ORDER BY COUNT(t.id) DESC
LIMIT p_limit;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: get_tasks_for_identity
-- Gets tasks matching an identity's geo (same city or metro)
-- ============================================================
CREATE OR REPLACE FUNCTION get_tasks_for_identity(
p_state_code VARCHAR(2),
p_city VARCHAR(100),
p_limit INT DEFAULT 5
) RETURNS TABLE (
task_id INT,
dispensary_id INT,
dispensary_name VARCHAR(255),
dispensary_city VARCHAR(100),
role VARCHAR(50)
) AS $$
DECLARE
metro_name_val VARCHAR(100);
BEGIN
-- Find metro area for this city
SELECT ma.metro_name INTO metro_name_val
FROM metro_areas ma
WHERE ma.state_code = p_state_code AND ma.city = p_city;
RETURN QUERY
SELECT
t.id as task_id,
d.id as dispensary_id,
d.name as dispensary_name,
d.city as dispensary_city,
t.role
FROM worker_tasks t
JOIN dispensaries d ON t.dispensary_id = d.id
WHERE t.status = 'pending'
AND d.state = p_state_code
AND (
-- Exact city match
d.city = p_city
-- Or same metro area
OR (metro_name_val IS NOT NULL AND d.city IN (
SELECT ma.city FROM metro_areas ma WHERE ma.metro_name = metro_name_val
))
-- Or any in state if no metro
OR (metro_name_val IS NULL)
)
ORDER BY
CASE WHEN d.city = p_city THEN 0 ELSE 1 END, -- Prefer exact city
t.priority DESC,
t.created_at ASC
LIMIT p_limit;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- VIEW: identity_pool_status
-- Overview of identity pool health and availability
-- ============================================================
CREATE OR REPLACE VIEW identity_pool_status AS
SELECT
state_code,
city,
COUNT(*) as total_identities,
COUNT(*) FILTER (WHERE is_active) as active,
COUNT(*) FILTER (WHERE NOT is_active AND is_healthy AND (cooldown_until IS NULL OR cooldown_until < NOW())) as available,
COUNT(*) FILTER (WHERE NOT is_active AND cooldown_until > NOW()) as cooling_down,
COUNT(*) FILTER (WHERE NOT is_healthy) as unhealthy,
SUM(total_tasks_completed) as total_tasks,
AVG(total_tasks_completed)::INT as avg_tasks_per_identity
FROM worker_identities
GROUP BY state_code, city
ORDER BY state_code, city;
-- ============================================================
-- Comments
-- ============================================================
COMMENT ON TABLE worker_identities IS 'Pool of IP/fingerprint identities for worker rotation';
COMMENT ON TABLE metro_areas IS 'City groupings for geographic fallback matching';
COMMENT ON FUNCTION claim_identity IS 'Claim an available identity: exact city -> metro -> state -> NULL (create new)';
COMMENT ON FUNCTION release_identity IS 'Release identity with 2-3 hour random cooldown';
COMMENT ON FUNCTION get_pending_tasks_by_geo IS 'Get pending task counts by state/city';
COMMENT ON FUNCTION get_tasks_for_identity IS 'Get tasks matching identity geo (city or metro area)';

View File

@@ -1,92 +0,0 @@
-- Migration: 110_trusted_origins.sql
-- Description: Trusted origins for API access without token
-- Created: 2024-12-14
--
-- Manages which domains, IPs, and patterns can access the API without a Bearer token.
-- Used by auth middleware to grant 'internal' role to trusted requests.
-- ============================================================
-- TRUSTED ORIGINS TABLE
-- ============================================================
CREATE TABLE IF NOT EXISTS trusted_origins (
id SERIAL PRIMARY KEY,
-- Origin identification
name VARCHAR(100) NOT NULL, -- Friendly name (e.g., "CannaIQ Production")
origin_type VARCHAR(20) NOT NULL, -- 'domain', 'ip', or 'pattern'
origin_value VARCHAR(255) NOT NULL, -- The actual value to match
-- Metadata
description TEXT, -- Optional notes
active BOOLEAN DEFAULT TRUE,
-- Tracking
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
created_by INTEGER REFERENCES users(id),
-- Constraints
CONSTRAINT valid_origin_type CHECK (origin_type IN ('domain', 'ip', 'pattern')),
UNIQUE(origin_type, origin_value)
);
-- Index for active lookups (used by auth middleware)
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active
ON trusted_origins(active) WHERE active = TRUE;
-- Updated at trigger
CREATE OR REPLACE FUNCTION update_trusted_origins_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS trusted_origins_updated_at ON trusted_origins;
CREATE TRIGGER trusted_origins_updated_at
BEFORE UPDATE ON trusted_origins
FOR EACH ROW
EXECUTE FUNCTION update_trusted_origins_updated_at();
-- ============================================================
-- SEED DEFAULT TRUSTED ORIGINS
-- These match the hardcoded fallbacks in middleware.ts
-- ============================================================
-- Production domains
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
('CannaIQ Production', 'domain', 'https://cannaiq.co', 'Main CannaIQ dashboard'),
('CannaIQ Production (www)', 'domain', 'https://www.cannaiq.co', 'Main CannaIQ dashboard with www'),
('FindADispo Production', 'domain', 'https://findadispo.com', 'Consumer dispensary finder'),
('FindADispo Production (www)', 'domain', 'https://www.findadispo.com', 'Consumer dispensary finder with www'),
('Findagram Production', 'domain', 'https://findagram.co', 'Instagram-style cannabis discovery'),
('Findagram Production (www)', 'domain', 'https://www.findagram.co', 'Instagram-style cannabis discovery with www')
ON CONFLICT (origin_type, origin_value) DO NOTHING;
-- Wildcard patterns
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
('CannaBrands Subdomains', 'pattern', '^https://.*\\.cannabrands\\.app$', 'All *.cannabrands.app subdomains'),
('CannaIQ Subdomains', 'pattern', '^https://.*\\.cannaiq\\.co$', 'All *.cannaiq.co subdomains')
ON CONFLICT (origin_type, origin_value) DO NOTHING;
-- Local development
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
('Local API', 'domain', 'http://localhost:3010', 'Local backend API'),
('Local Admin', 'domain', 'http://localhost:8080', 'Local admin dashboard'),
('Local Vite Dev', 'domain', 'http://localhost:5173', 'Vite dev server')
ON CONFLICT (origin_type, origin_value) DO NOTHING;
-- Trusted IPs (localhost)
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
('Localhost IPv4', 'ip', '127.0.0.1', 'Local machine'),
('Localhost IPv6', 'ip', '::1', 'Local machine IPv6'),
('Localhost IPv6 Mapped', 'ip', '::ffff:127.0.0.1', 'IPv6-mapped IPv4 localhost')
ON CONFLICT (origin_type, origin_value) DO NOTHING;
-- ============================================================
-- COMMENTS
-- ============================================================
COMMENT ON TABLE trusted_origins IS 'Domains, IPs, and patterns that can access API without token';
COMMENT ON COLUMN trusted_origins.origin_type IS 'domain = exact URL match, ip = IP address, pattern = regex pattern';
COMMENT ON COLUMN trusted_origins.origin_value IS 'For domain: full URL. For ip: IP address. For pattern: regex string';

View File

@@ -1,35 +0,0 @@
-- Migration: 111_system_settings.sql
-- Description: System settings table for runtime configuration
-- Created: 2024-12-14
CREATE TABLE IF NOT EXISTS system_settings (
key VARCHAR(100) PRIMARY KEY,
value TEXT NOT NULL,
description TEXT,
updated_at TIMESTAMPTZ DEFAULT NOW(),
updated_by INTEGER REFERENCES users(id)
);
-- Task pool gate - controls whether workers can claim tasks
INSERT INTO system_settings (key, value, description) VALUES
('task_pool_open', 'true', 'When false, workers cannot claim new tasks from the pool')
ON CONFLICT (key) DO NOTHING;
-- Updated at trigger
CREATE OR REPLACE FUNCTION update_system_settings_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS system_settings_updated_at ON system_settings;
CREATE TRIGGER system_settings_updated_at
BEFORE UPDATE ON system_settings
FOR EACH ROW
EXECUTE FUNCTION update_system_settings_updated_at();
COMMENT ON TABLE system_settings IS 'Runtime configuration settings';
COMMENT ON COLUMN system_settings.key IS 'Setting name (e.g., task_pool_open)';
COMMENT ON COLUMN system_settings.value IS 'Setting value as string';

View File

@@ -1,390 +0,0 @@
-- Migration 112: Worker Session Pool
-- Tracks IP/fingerprint sessions with exclusive locks and cooldowns
-- Each worker claims up to 6 tasks, uses one IP/fingerprint for those tasks,
-- then retires the session (8hr cooldown before IP can be reused)
-- Drop old identity pool tables if they exist (replacing with simpler session model)
DROP TABLE IF EXISTS worker_identity_claims CASCADE;
DROP TABLE IF EXISTS worker_identities CASCADE;
-- Worker sessions: tracks active and cooling down IP/fingerprint pairs
CREATE TABLE IF NOT EXISTS worker_sessions (
id SERIAL PRIMARY KEY,
-- IP and fingerprint for this session
ip_address VARCHAR(45) NOT NULL,
fingerprint_hash VARCHAR(64) NOT NULL,
fingerprint_data JSONB,
-- Geo this session is locked to
state_code VARCHAR(2) NOT NULL,
city VARCHAR(100),
-- Ownership
worker_id VARCHAR(255), -- NULL if in cooldown
-- Status: 'active' (locked to worker), 'cooldown' (8hr wait), 'available'
status VARCHAR(20) NOT NULL DEFAULT 'available',
-- Task tracking
tasks_claimed INTEGER NOT NULL DEFAULT 0,
tasks_completed INTEGER NOT NULL DEFAULT 0,
tasks_failed INTEGER NOT NULL DEFAULT 0,
max_tasks INTEGER NOT NULL DEFAULT 6,
-- Timestamps
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
locked_at TIMESTAMPTZ, -- When worker locked this session
retired_at TIMESTAMPTZ, -- When session was retired (cooldown starts)
cooldown_until TIMESTAMPTZ, -- When session becomes available again
-- Constraints
CONSTRAINT valid_status CHECK (status IN ('active', 'cooldown', 'available'))
);
-- Indexes for fast lookups
CREATE INDEX IF NOT EXISTS idx_worker_sessions_ip ON worker_sessions(ip_address);
CREATE INDEX IF NOT EXISTS idx_worker_sessions_status ON worker_sessions(status);
CREATE INDEX IF NOT EXISTS idx_worker_sessions_worker ON worker_sessions(worker_id) WHERE worker_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_worker_sessions_geo ON worker_sessions(state_code, city);
CREATE INDEX IF NOT EXISTS idx_worker_sessions_cooldown ON worker_sessions(cooldown_until) WHERE status = 'cooldown';
-- Unique constraint: only one active session per IP
CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_sessions_active_ip
ON worker_sessions(ip_address)
WHERE status = 'active';
-- Function: Check if IP is available (not active, not in cooldown)
CREATE OR REPLACE FUNCTION is_ip_available(check_ip VARCHAR(45))
RETURNS BOOLEAN AS $$
BEGIN
-- Check if any session has this IP and is either active or in cooldown
RETURN NOT EXISTS (
SELECT 1 FROM worker_sessions
WHERE ip_address = check_ip
AND (status = 'active' OR (status = 'cooldown' AND cooldown_until > NOW()))
);
END;
$$ LANGUAGE plpgsql;
-- Function: Lock a session to a worker
-- Returns the session if successful, NULL if IP not available
CREATE OR REPLACE FUNCTION lock_worker_session(
p_worker_id VARCHAR(255),
p_ip_address VARCHAR(45),
p_state_code VARCHAR(2),
p_city VARCHAR(100) DEFAULT NULL,
p_fingerprint_hash VARCHAR(64) DEFAULT NULL,
p_fingerprint_data JSONB DEFAULT NULL
) RETURNS worker_sessions AS $$
DECLARE
v_session worker_sessions;
BEGIN
-- First check if IP is available
IF NOT is_ip_available(p_ip_address) THEN
RETURN NULL;
END IF;
-- Try to find an existing available session for this IP
SELECT * INTO v_session
FROM worker_sessions
WHERE ip_address = p_ip_address
AND status = 'available'
FOR UPDATE SKIP LOCKED
LIMIT 1;
IF v_session.id IS NOT NULL THEN
-- Reuse existing session
UPDATE worker_sessions SET
worker_id = p_worker_id,
status = 'active',
state_code = p_state_code,
city = p_city,
fingerprint_hash = COALESCE(p_fingerprint_hash, fingerprint_hash),
fingerprint_data = COALESCE(p_fingerprint_data, fingerprint_data),
tasks_claimed = 0,
tasks_completed = 0,
tasks_failed = 0,
locked_at = NOW(),
retired_at = NULL,
cooldown_until = NULL
WHERE id = v_session.id
RETURNING * INTO v_session;
ELSE
-- Create new session
INSERT INTO worker_sessions (
ip_address, fingerprint_hash, fingerprint_data,
state_code, city, worker_id, status, locked_at
) VALUES (
p_ip_address, COALESCE(p_fingerprint_hash, md5(random()::text)),
p_fingerprint_data, p_state_code, p_city, p_worker_id, 'active', NOW()
)
RETURNING * INTO v_session;
END IF;
RETURN v_session;
END;
$$ LANGUAGE plpgsql;
-- Function: Retire a session (start 8hr cooldown)
CREATE OR REPLACE FUNCTION retire_worker_session(p_worker_id VARCHAR(255))
RETURNS BOOLEAN AS $$
DECLARE
v_updated INTEGER;
BEGIN
UPDATE worker_sessions SET
status = 'cooldown',
worker_id = NULL,
retired_at = NOW(),
cooldown_until = NOW() + INTERVAL '8 hours'
WHERE worker_id = p_worker_id
AND status = 'active';
GET DIAGNOSTICS v_updated = ROW_COUNT;
RETURN v_updated > 0;
END;
$$ LANGUAGE plpgsql;
-- Function: Release expired cooldowns
CREATE OR REPLACE FUNCTION release_expired_sessions()
RETURNS INTEGER AS $$
DECLARE
v_released INTEGER;
BEGIN
UPDATE worker_sessions SET
status = 'available'
WHERE status = 'cooldown'
AND cooldown_until <= NOW();
GET DIAGNOSTICS v_released = ROW_COUNT;
RETURN v_released;
END;
$$ LANGUAGE plpgsql;
-- Function: Get session for worker
CREATE OR REPLACE FUNCTION get_worker_session(p_worker_id VARCHAR(255))
RETURNS worker_sessions AS $$
SELECT * FROM worker_sessions
WHERE worker_id = p_worker_id AND status = 'active'
LIMIT 1;
$$ LANGUAGE sql;
-- Function: Increment task counters
CREATE OR REPLACE FUNCTION session_task_completed(p_worker_id VARCHAR(255))
RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_sessions SET
tasks_completed = tasks_completed + 1
WHERE worker_id = p_worker_id AND status = 'active';
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION session_task_failed(p_worker_id VARCHAR(255))
RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_sessions SET
tasks_failed = tasks_failed + 1
WHERE worker_id = p_worker_id AND status = 'active';
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION session_task_claimed(p_worker_id VARCHAR(255), p_count INTEGER DEFAULT 1)
RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_sessions SET
tasks_claimed = tasks_claimed + p_count
WHERE worker_id = p_worker_id AND status = 'active';
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
-- Scheduled job hint: Run release_expired_sessions() every 5 minutes
COMMENT ON FUNCTION release_expired_sessions() IS
'Run periodically to release sessions from cooldown. Suggest: every 5 minutes.';
-- =============================================================================
-- ATOMIC TASK CLAIMING
-- Worker claims up to 6 tasks for same geo in one transaction
-- =============================================================================
-- Function: Claim up to N tasks for same geo
-- Returns claimed tasks with dispensary geo info
CREATE OR REPLACE FUNCTION claim_tasks_batch(
p_worker_id VARCHAR(255),
p_max_tasks INTEGER DEFAULT 6,
p_role VARCHAR(50) DEFAULT NULL -- Optional role filter
) RETURNS TABLE (
task_id INTEGER,
role VARCHAR(50),
dispensary_id INTEGER,
dispensary_name VARCHAR(255),
city VARCHAR(100),
state_code VARCHAR(2),
platform VARCHAR(50),
method VARCHAR(20)
) AS $$
DECLARE
v_target_state VARCHAR(2);
v_target_city VARCHAR(100);
v_claimed_count INTEGER := 0;
BEGIN
-- First, find the geo with most pending tasks to target
SELECT d.state, d.city INTO v_target_state, v_target_city
FROM worker_tasks t
JOIN dispensaries d ON t.dispensary_id = d.id
WHERE t.status = 'pending'
AND (p_role IS NULL OR t.role = p_role)
GROUP BY d.state, d.city
ORDER BY COUNT(*) DESC
LIMIT 1;
-- No pending tasks
IF v_target_state IS NULL THEN
RETURN;
END IF;
-- Claim up to p_max_tasks for this geo
RETURN QUERY
WITH claimed AS (
UPDATE worker_tasks t SET
status = 'claimed',
worker_id = p_worker_id,
claimed_at = NOW()
FROM (
SELECT t2.id
FROM worker_tasks t2
JOIN dispensaries d ON t2.dispensary_id = d.id
WHERE t2.status = 'pending'
AND d.state = v_target_state
AND (v_target_city IS NULL OR d.city = v_target_city)
AND (p_role IS NULL OR t2.role = p_role)
ORDER BY t2.priority DESC, t2.created_at ASC
FOR UPDATE SKIP LOCKED
LIMIT p_max_tasks
) sub
WHERE t.id = sub.id
RETURNING t.id, t.role, t.dispensary_id, t.method
)
SELECT
c.id as task_id,
c.role,
c.dispensary_id,
d.name as dispensary_name,
d.city,
d.state as state_code,
d.platform,
c.method
FROM claimed c
JOIN dispensaries d ON c.dispensary_id = d.id;
END;
$$ LANGUAGE plpgsql;
-- Function: Release claimed tasks back to pending (for failed worker or cleanup)
CREATE OR REPLACE FUNCTION release_claimed_tasks(p_worker_id VARCHAR(255))
RETURNS INTEGER AS $$
DECLARE
v_released INTEGER;
BEGIN
UPDATE worker_tasks SET
status = 'pending',
worker_id = NULL,
claimed_at = NULL
WHERE worker_id = p_worker_id
AND status IN ('claimed', 'running');
GET DIAGNOSTICS v_released = ROW_COUNT;
RETURN v_released;
END;
$$ LANGUAGE plpgsql;
-- Function: Mark task as running
CREATE OR REPLACE FUNCTION start_task(p_task_id INTEGER, p_worker_id VARCHAR(255))
RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_tasks SET
status = 'running',
started_at = NOW()
WHERE id = p_task_id
AND worker_id = p_worker_id
AND status = 'claimed';
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
-- Function: Mark task as completed (leaves pool)
CREATE OR REPLACE FUNCTION complete_task(
p_task_id INTEGER,
p_worker_id VARCHAR(255),
p_result JSONB DEFAULT NULL
) RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_tasks SET
status = 'completed',
completed_at = NOW(),
result = p_result
WHERE id = p_task_id
AND worker_id = p_worker_id
AND status = 'running';
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
-- Function: Mark task as failed (returns to pending for retry)
CREATE OR REPLACE FUNCTION fail_task(
p_task_id INTEGER,
p_worker_id VARCHAR(255),
p_error TEXT DEFAULT NULL,
p_max_retries INTEGER DEFAULT 3
) RETURNS BOOLEAN AS $$
DECLARE
v_retry_count INTEGER;
BEGIN
-- Get current retry count
SELECT COALESCE(retry_count, 0) INTO v_retry_count
FROM worker_tasks WHERE id = p_task_id;
IF v_retry_count >= p_max_retries THEN
-- Max retries exceeded - mark as permanently failed
UPDATE worker_tasks SET
status = 'failed',
completed_at = NOW(),
error_message = p_error,
retry_count = v_retry_count + 1
WHERE id = p_task_id
AND worker_id = p_worker_id;
ELSE
-- Return to pending for retry
UPDATE worker_tasks SET
status = 'pending',
worker_id = NULL,
claimed_at = NULL,
started_at = NULL,
error_message = p_error,
retry_count = v_retry_count + 1
WHERE id = p_task_id
AND worker_id = p_worker_id;
END IF;
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
-- Add retry_count column if not exists
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'worker_tasks' AND column_name = 'retry_count'
) THEN
ALTER TABLE worker_tasks ADD COLUMN retry_count INTEGER NOT NULL DEFAULT 0;
END IF;
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'worker_tasks' AND column_name = 'claimed_at'
) THEN
ALTER TABLE worker_tasks ADD COLUMN claimed_at TIMESTAMPTZ;
END IF;
END $$;

View File

@@ -1,381 +0,0 @@
-- Task Pools: Group tasks by geo area for worker assignment
-- Workers claim a pool, get proxy for that geo, then pull tasks from pool
-- ============================================================================
-- TASK POOLS TABLE
-- ============================================================================
-- Each pool represents a metro area (e.g., Phoenix AZ = 100mi radius)
-- Dispensaries are assigned to pools based on location
-- Workers claim a pool, not individual tasks
CREATE TABLE IF NOT EXISTS task_pools (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL UNIQUE, -- e.g., 'phoenix_az'
display_name VARCHAR(100) NOT NULL, -- e.g., 'Phoenix, AZ'
state_code VARCHAR(2) NOT NULL, -- e.g., 'AZ'
city VARCHAR(100) NOT NULL, -- e.g., 'Phoenix'
latitude DECIMAL(10, 6) NOT NULL, -- pool center lat
longitude DECIMAL(10, 6) NOT NULL, -- pool center lng
radius_miles INTEGER DEFAULT 100, -- pool radius (100mi default)
timezone VARCHAR(50) NOT NULL, -- e.g., 'America/Phoenix'
is_active BOOLEAN DEFAULT true,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Index for active pools
CREATE INDEX IF NOT EXISTS idx_task_pools_active ON task_pools(is_active) WHERE is_active = true;
-- ============================================================================
-- LINK DISPENSARIES TO POOLS
-- ============================================================================
-- Add pool_id to dispensaries table
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
-- Index for pool membership
CREATE INDEX IF NOT EXISTS idx_dispensaries_pool ON dispensaries(pool_id) WHERE pool_id IS NOT NULL;
-- ============================================================================
-- WORKER POOL ASSIGNMENT
-- ============================================================================
-- Track which pool a worker is currently assigned to
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS current_pool_id INTEGER REFERENCES task_pools(id),
ADD COLUMN IF NOT EXISTS pool_claimed_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS pool_stores_visited INTEGER DEFAULT 0,
ADD COLUMN IF NOT EXISTS pool_max_stores INTEGER DEFAULT 6;
-- ============================================================================
-- SEED INITIAL POOLS
-- ============================================================================
-- Major cannabis markets with approximate center coordinates
INSERT INTO task_pools (name, display_name, state_code, city, latitude, longitude, timezone, radius_miles) VALUES
-- Arizona
('phoenix_az', 'Phoenix, AZ', 'AZ', 'Phoenix', 33.4484, -112.0740, 'America/Phoenix', 100),
('tucson_az', 'Tucson, AZ', 'AZ', 'Tucson', 32.2226, -110.9747, 'America/Phoenix', 75),
-- California
('los_angeles_ca', 'Los Angeles, CA', 'CA', 'Los Angeles', 34.0522, -118.2437, 'America/Los_Angeles', 100),
('san_francisco_ca', 'San Francisco, CA', 'CA', 'San Francisco', 37.7749, -122.4194, 'America/Los_Angeles', 75),
('san_diego_ca', 'San Diego, CA', 'CA', 'San Diego', 32.7157, -117.1611, 'America/Los_Angeles', 75),
('sacramento_ca', 'Sacramento, CA', 'CA', 'Sacramento', 38.5816, -121.4944, 'America/Los_Angeles', 75),
-- Colorado
('denver_co', 'Denver, CO', 'CO', 'Denver', 39.7392, -104.9903, 'America/Denver', 100),
-- Illinois
('chicago_il', 'Chicago, IL', 'IL', 'Chicago', 41.8781, -87.6298, 'America/Chicago', 100),
-- Massachusetts
('boston_ma', 'Boston, MA', 'MA', 'Boston', 42.3601, -71.0589, 'America/New_York', 75),
-- Michigan
('detroit_mi', 'Detroit, MI', 'MI', 'Detroit', 42.3314, -83.0458, 'America/Detroit', 100),
-- Nevada
('las_vegas_nv', 'Las Vegas, NV', 'NV', 'Las Vegas', 36.1699, -115.1398, 'America/Los_Angeles', 75),
('reno_nv', 'Reno, NV', 'NV', 'Reno', 39.5296, -119.8138, 'America/Los_Angeles', 50),
-- New Jersey
('newark_nj', 'Newark, NJ', 'NJ', 'Newark', 40.7357, -74.1724, 'America/New_York', 75),
-- New York
('new_york_ny', 'New York, NY', 'NY', 'New York', 40.7128, -74.0060, 'America/New_York', 75),
-- Oklahoma
('oklahoma_city_ok', 'Oklahoma City, OK', 'OK', 'Oklahoma City', 35.4676, -97.5164, 'America/Chicago', 100),
('tulsa_ok', 'Tulsa, OK', 'OK', 'Tulsa', 36.1540, -95.9928, 'America/Chicago', 75),
-- Oregon
('portland_or', 'Portland, OR', 'OR', 'Portland', 45.5152, -122.6784, 'America/Los_Angeles', 75),
-- Washington
('seattle_wa', 'Seattle, WA', 'WA', 'Seattle', 47.6062, -122.3321, 'America/Los_Angeles', 100)
ON CONFLICT (name) DO NOTHING;
-- ============================================================================
-- FUNCTION: Assign dispensary to nearest pool
-- ============================================================================
CREATE OR REPLACE FUNCTION assign_dispensary_to_pool(disp_id INTEGER)
RETURNS INTEGER AS $$
DECLARE
disp_lat DECIMAL(10,6);
disp_lng DECIMAL(10,6);
nearest_pool_id INTEGER;
BEGIN
-- Get dispensary coordinates
SELECT latitude, longitude INTO disp_lat, disp_lng
FROM dispensaries WHERE id = disp_id;
IF disp_lat IS NULL OR disp_lng IS NULL THEN
RETURN NULL;
END IF;
-- Find nearest active pool within radius
-- Using Haversine approximation (accurate enough for 100mi)
SELECT id INTO nearest_pool_id
FROM task_pools
WHERE is_active = true
AND (
3959 * acos(
cos(radians(latitude)) * cos(radians(disp_lat)) *
cos(radians(disp_lng) - radians(longitude)) +
sin(radians(latitude)) * sin(radians(disp_lat))
)
) <= radius_miles
ORDER BY (
3959 * acos(
cos(radians(latitude)) * cos(radians(disp_lat)) *
cos(radians(disp_lng) - radians(longitude)) +
sin(radians(latitude)) * sin(radians(disp_lat))
)
)
LIMIT 1;
-- Update dispensary
IF nearest_pool_id IS NOT NULL THEN
UPDATE dispensaries SET pool_id = nearest_pool_id WHERE id = disp_id;
END IF;
RETURN nearest_pool_id;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- FUNCTION: Assign all dispensaries to pools (batch)
-- ============================================================================
CREATE OR REPLACE FUNCTION assign_all_dispensaries_to_pools()
RETURNS TABLE(assigned INTEGER, unassigned INTEGER) AS $$
DECLARE
assigned_count INTEGER := 0;
unassigned_count INTEGER := 0;
disp RECORD;
pool_id INTEGER;
BEGIN
FOR disp IN SELECT id FROM dispensaries WHERE pool_id IS NULL AND latitude IS NOT NULL LOOP
pool_id := assign_dispensary_to_pool(disp.id);
IF pool_id IS NOT NULL THEN
assigned_count := assigned_count + 1;
ELSE
unassigned_count := unassigned_count + 1;
END IF;
END LOOP;
RETURN QUERY SELECT assigned_count, unassigned_count;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- FUNCTION: Get pools with pending tasks
-- ============================================================================
CREATE OR REPLACE FUNCTION get_pools_with_pending_tasks()
RETURNS TABLE(
pool_id INTEGER,
pool_name VARCHAR(100),
display_name VARCHAR(100),
state_code VARCHAR(2),
city VARCHAR(100),
timezone VARCHAR(50),
pending_count BIGINT,
store_count BIGINT
) AS $$
BEGIN
RETURN QUERY
SELECT
tp.id as pool_id,
tp.name as pool_name,
tp.display_name,
tp.state_code,
tp.city,
tp.timezone,
COUNT(DISTINCT t.id) as pending_count,
COUNT(DISTINCT d.id) as store_count
FROM task_pools tp
JOIN dispensaries d ON d.pool_id = tp.id
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
WHERE tp.is_active = true
GROUP BY tp.id, tp.name, tp.display_name, tp.state_code, tp.city, tp.timezone
HAVING COUNT(DISTINCT t.id) > 0
ORDER BY COUNT(DISTINCT t.id) DESC;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- FUNCTION: Worker claims a pool
-- ============================================================================
CREATE OR REPLACE FUNCTION worker_claim_pool(
p_worker_id VARCHAR(100),
p_pool_id INTEGER DEFAULT NULL
)
RETURNS TABLE(
pool_id INTEGER,
pool_name VARCHAR(100),
display_name VARCHAR(100),
state_code VARCHAR(2),
city VARCHAR(100),
latitude DECIMAL(10,6),
longitude DECIMAL(10,6),
timezone VARCHAR(50)
) AS $$
DECLARE
claimed_pool_id INTEGER;
BEGIN
-- If no pool specified, pick the one with most pending tasks
IF p_pool_id IS NULL THEN
SELECT tp.id INTO claimed_pool_id
FROM task_pools tp
JOIN dispensaries d ON d.pool_id = tp.id
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
WHERE tp.is_active = true
GROUP BY tp.id
ORDER BY COUNT(DISTINCT t.id) DESC
LIMIT 1;
ELSE
claimed_pool_id := p_pool_id;
END IF;
IF claimed_pool_id IS NULL THEN
RETURN;
END IF;
-- Update worker registry with pool assignment
UPDATE worker_registry
SET
current_pool_id = claimed_pool_id,
pool_claimed_at = NOW(),
pool_stores_visited = 0,
pool_max_stores = 6,
updated_at = NOW()
WHERE worker_id = p_worker_id;
-- Return pool info
RETURN QUERY
SELECT
tp.id,
tp.name,
tp.display_name,
tp.state_code,
tp.city,
tp.latitude,
tp.longitude,
tp.timezone
FROM task_pools tp
WHERE tp.id = claimed_pool_id;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- FUNCTION: Pull tasks from worker's pool (up to 6 stores)
-- ============================================================================
CREATE OR REPLACE FUNCTION pull_tasks_from_pool(
p_worker_id VARCHAR(100),
p_max_stores INTEGER DEFAULT 6
)
RETURNS TABLE(
task_id INTEGER,
dispensary_id INTEGER,
dispensary_name VARCHAR(255),
role VARCHAR(50),
platform VARCHAR(50),
method VARCHAR(20)
) AS $$
DECLARE
worker_pool_id INTEGER;
stores_visited INTEGER;
max_stores INTEGER;
stores_remaining INTEGER;
BEGIN
-- Get worker's current pool and store count
SELECT current_pool_id, pool_stores_visited, pool_max_stores
INTO worker_pool_id, stores_visited, max_stores
FROM worker_registry
WHERE worker_id = p_worker_id;
IF worker_pool_id IS NULL THEN
RAISE EXCEPTION 'Worker % has no pool assigned', p_worker_id;
END IF;
stores_remaining := max_stores - stores_visited;
IF stores_remaining <= 0 THEN
RETURN; -- Worker exhausted
END IF;
-- Claim tasks from pool (one task per store, up to remaining capacity)
RETURN QUERY
WITH available_stores AS (
SELECT DISTINCT ON (d.id)
t.id as task_id,
d.id as dispensary_id,
d.name as dispensary_name,
t.role,
t.platform,
t.method
FROM tasks t
JOIN dispensaries d ON d.id = t.dispensary_id
WHERE d.pool_id = worker_pool_id
AND t.status = 'pending'
AND t.scheduled_for <= NOW()
ORDER BY d.id, t.priority DESC, t.created_at ASC
LIMIT stores_remaining
),
claimed AS (
UPDATE tasks
SET
status = 'claimed',
claimed_by = p_worker_id,
claimed_at = NOW()
WHERE id IN (SELECT task_id FROM available_stores)
RETURNING id
)
SELECT
av.task_id,
av.dispensary_id,
av.dispensary_name,
av.role,
av.platform,
av.method
FROM available_stores av
WHERE av.task_id IN (SELECT id FROM claimed);
-- Update worker store count
UPDATE worker_registry
SET
pool_stores_visited = pool_stores_visited + (
SELECT COUNT(DISTINCT dispensary_id)
FROM tasks
WHERE claimed_by = p_worker_id AND status = 'claimed'
),
updated_at = NOW()
WHERE worker_id = p_worker_id;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- FUNCTION: Worker releases pool (exhausted or done)
-- ============================================================================
CREATE OR REPLACE FUNCTION worker_release_pool(p_worker_id VARCHAR(100))
RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_registry
SET
current_pool_id = NULL,
pool_claimed_at = NULL,
pool_stores_visited = 0,
current_state = NULL,
current_city = NULL,
updated_at = NOW()
WHERE worker_id = p_worker_id;
RETURN true;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- RUN: Assign existing dispensaries to pools
-- ============================================================================
SELECT * FROM assign_all_dispensaries_to_pools();

View File

@@ -1,10 +0,0 @@
-- Migration 114: Add pool_id to task_schedules
-- Allows schedules to target specific geo pools
ALTER TABLE task_schedules
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
-- Index for pool-based schedule queries
CREATE INDEX IF NOT EXISTS idx_task_schedules_pool ON task_schedules(pool_id) WHERE pool_id IS NOT NULL;
COMMENT ON COLUMN task_schedules.pool_id IS 'Optional geo pool filter. NULL = all pools/dispensaries matching state_code';

View File

@@ -1,17 +0,0 @@
-- Migration: Add proxy_ip tracking to worker_tasks
-- Purpose: Prevent same IP from hitting multiple stores on same platform simultaneously
--
-- Anti-detection measure: Dutchie/Jane may flag if same IP makes requests
-- for multiple different stores. This column lets us track and prevent that.
-- Add proxy_ip column to track which proxy IP is being used for each task
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
-- Index for quick lookup of active tasks by proxy IP
-- Used to check: "Is this IP already hitting another store?"
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip_active
ON worker_tasks (proxy_ip, platform)
WHERE status IN ('claimed', 'running') AND proxy_ip IS NOT NULL;
-- Comment
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'Proxy IP assigned to this task. Used to prevent same IP hitting multiple stores on same platform.';

View File

@@ -1,16 +0,0 @@
-- Migration: Add source tracking columns to worker_tasks
-- Purpose: Track where tasks originated from (schedule, API, manual)
-- Add source tracking columns
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source VARCHAR(50);
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source_schedule_id INTEGER REFERENCES task_schedules(id);
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source_metadata JSONB;
-- Index for tracking tasks by schedule
CREATE INDEX IF NOT EXISTS idx_worker_tasks_source_schedule
ON worker_tasks (source_schedule_id) WHERE source_schedule_id IS NOT NULL;
-- Comments
COMMENT ON COLUMN worker_tasks.source IS 'Origin of task: schedule, api, manual, chain';
COMMENT ON COLUMN worker_tasks.source_schedule_id IS 'ID of schedule that created this task';
COMMENT ON COLUMN worker_tasks.source_metadata IS 'Additional metadata about task origin';

View File

@@ -1,32 +0,0 @@
-- Migration 117: Per-store crawl interval scheduling
-- Adds columns for configurable per-store crawl intervals
-- Part of Real-Time Inventory Tracking feature
-- Per-store crawl interval (NULL = use state schedule default 4h)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_interval_minutes INT DEFAULT NULL;
-- When this store should next be crawled (used by high-frequency scheduler)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS next_crawl_at TIMESTAMPTZ DEFAULT NULL;
-- Track last request time to enforce minimum spacing
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_crawl_started_at TIMESTAMPTZ DEFAULT NULL;
-- Change tracking for optimization
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_inventory_hash TEXT DEFAULT NULL;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_price_hash TEXT DEFAULT NULL;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS inventory_changes_24h INT DEFAULT 0;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS price_changes_24h INT DEFAULT 0;
-- Index for scheduler query: find stores due for high-frequency crawl
CREATE INDEX IF NOT EXISTS idx_dispensaries_next_crawl
ON dispensaries(next_crawl_at)
WHERE crawl_interval_minutes IS NOT NULL AND crawl_enabled = TRUE;
-- Comment for documentation
COMMENT ON COLUMN dispensaries.crawl_interval_minutes IS 'Custom crawl interval in minutes. NULL = use state schedule (4h default). Set to 15/30/60 for high-frequency tracking.';
COMMENT ON COLUMN dispensaries.next_crawl_at IS 'When this store should next be crawled. Updated after each crawl with interval + jitter.';
COMMENT ON COLUMN dispensaries.last_crawl_started_at IS 'When the last crawl task was created. Used to enforce minimum spacing.';
COMMENT ON COLUMN dispensaries.last_inventory_hash IS 'Hash of inventory state from last crawl. Used to detect changes and skip unchanged payloads.';
COMMENT ON COLUMN dispensaries.last_price_hash IS 'Hash of price state from last crawl. Used to detect price changes.';
COMMENT ON COLUMN dispensaries.inventory_changes_24h IS 'Number of inventory changes detected in last 24h. Indicates store volatility.';
COMMENT ON COLUMN dispensaries.price_changes_24h IS 'Number of price changes detected in last 24h.';

View File

@@ -1,48 +0,0 @@
-- Migration 118: Inventory snapshots table
-- Lightweight per-product tracking for sales velocity estimation
-- Part of Real-Time Inventory Tracking feature
CREATE TABLE IF NOT EXISTS inventory_snapshots (
id BIGSERIAL PRIMARY KEY,
dispensary_id INT NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
product_id TEXT NOT NULL, -- provider_product_id (normalized across platforms)
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Platform (for debugging/filtering)
platform TEXT NOT NULL, -- 'dutchie' | 'jane' | 'treez'
-- Inventory fields (normalized from all platforms)
quantity_available INT, -- Dutchie: quantityAvailable, Jane: quantity, Treez: quantityAvailable
is_below_threshold BOOLEAN, -- Dutchie: isBelowThreshold, Jane: computed, Treez: lowInventory
status TEXT, -- Active/Inactive/available
-- Price fields (normalized)
price_rec NUMERIC(10,2), -- recreational price
price_med NUMERIC(10,2), -- medical price (if different)
-- Denormalized for fast queries
brand_name TEXT,
category TEXT,
product_name TEXT
);
-- Primary query: get snapshots for a store over time
CREATE INDEX idx_inv_snap_store_time ON inventory_snapshots(dispensary_id, captured_at DESC);
-- Delta calculation: get consecutive snapshots for a product
CREATE INDEX idx_inv_snap_product_time ON inventory_snapshots(dispensary_id, product_id, captured_at DESC);
-- Brand-level analytics
CREATE INDEX idx_inv_snap_brand_time ON inventory_snapshots(brand_name, captured_at DESC) WHERE brand_name IS NOT NULL;
-- Platform filtering
CREATE INDEX idx_inv_snap_platform ON inventory_snapshots(platform, captured_at DESC);
-- Retention cleanup (30 days) - simple index, cleanup job handles the WHERE
CREATE INDEX IF NOT EXISTS idx_inv_snap_cleanup ON inventory_snapshots(captured_at);
-- Comments
COMMENT ON TABLE inventory_snapshots IS 'Lightweight inventory snapshots for sales velocity tracking. Retained 30 days.';
COMMENT ON COLUMN inventory_snapshots.product_id IS 'Provider product ID, normalized across platforms';
COMMENT ON COLUMN inventory_snapshots.platform IS 'Menu platform: dutchie, jane, or treez';
COMMENT ON COLUMN inventory_snapshots.quantity_available IS 'Current quantity in stock (Dutchie: quantityAvailable, Jane: quantity)';

View File

@@ -1,53 +0,0 @@
-- Migration 119: Product visibility events table
-- Tracks OOS, brand drops, and other notable events for alerts
-- Part of Real-Time Inventory Tracking feature
CREATE TABLE IF NOT EXISTS product_visibility_events (
id SERIAL PRIMARY KEY,
dispensary_id INT NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
-- Product identification (null for brand-level events)
product_id TEXT, -- provider_product_id
product_name TEXT, -- For display in alerts
-- Brand (always populated)
brand_name TEXT,
-- Event details
event_type TEXT NOT NULL, -- 'oos', 'back_in_stock', 'brand_dropped', 'brand_added', 'price_change'
detected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Context
previous_quantity INT, -- For OOS events: what quantity was before
previous_price NUMERIC(10,2), -- For price change events
new_price NUMERIC(10,2), -- For price change events
price_change_pct NUMERIC(5,2), -- Percentage change (e.g., -15.5 for 15.5% decrease)
-- Platform
platform TEXT, -- 'dutchie' | 'jane' | 'treez'
-- Alert status
notified BOOLEAN DEFAULT FALSE, -- Has external system been notified?
acknowledged_at TIMESTAMPTZ, -- When user acknowledged the alert
acknowledged_by TEXT -- User who acknowledged
);
-- Primary query: recent events by store
CREATE INDEX idx_vis_events_store_time ON product_visibility_events(dispensary_id, detected_at DESC);
-- Alert queries: unnotified events
CREATE INDEX idx_vis_events_unnotified ON product_visibility_events(notified, detected_at DESC) WHERE notified = FALSE;
-- Event type filtering
CREATE INDEX idx_vis_events_type ON product_visibility_events(event_type, detected_at DESC);
-- Brand-level queries
CREATE INDEX idx_vis_events_brand ON product_visibility_events(brand_name, event_type, detected_at DESC) WHERE brand_name IS NOT NULL;
-- Cleanup (90 days retention) - simple index, cleanup job handles the WHERE
CREATE INDEX IF NOT EXISTS idx_vis_events_cleanup ON product_visibility_events(detected_at);
-- Comments
COMMENT ON TABLE product_visibility_events IS 'Notable inventory events for alerting. OOS, brand drops, significant price changes. Retained 90 days.';
COMMENT ON COLUMN product_visibility_events.event_type IS 'Event type: oos (out of stock), back_in_stock, brand_dropped, brand_added, price_change';
COMMENT ON COLUMN product_visibility_events.notified IS 'Whether external systems (other apps) have been notified of this event';

View File

@@ -1,13 +0,0 @@
-- Migration 120: Daily baseline tracking
-- Track when each store's daily baseline payload was last saved
-- Part of Real-Time Inventory Tracking feature
-- Add column to track last baseline save time
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_baseline_at TIMESTAMPTZ DEFAULT NULL;
-- Index for finding stores that need baselines
CREATE INDEX IF NOT EXISTS idx_dispensaries_baseline ON dispensaries(last_baseline_at)
WHERE crawl_enabled = TRUE;
-- Comment
COMMENT ON COLUMN dispensaries.last_baseline_at IS 'Timestamp of last daily baseline payload save. Baselines saved once per day between 12:01 AM - 3:00 AM.';

View File

@@ -1,383 +0,0 @@
-- Migration 121: Sales Analytics Materialized Views
-- Pre-computed views for sales velocity, brand market share, and store performance
-- ============================================================
-- VIEW 1: Daily Sales Estimates (per product/store)
-- Calculates delta between consecutive snapshots
-- ============================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_daily_sales_estimates AS
WITH qty_deltas AS (
SELECT
dispensary_id,
product_id,
brand_name,
category,
DATE(captured_at) AS sale_date,
price_rec,
quantity_available,
LAG(quantity_available) OVER (
PARTITION BY dispensary_id, product_id
ORDER BY captured_at
) AS prev_quantity
FROM inventory_snapshots
WHERE quantity_available IS NOT NULL
AND captured_at >= NOW() - INTERVAL '30 days'
)
SELECT
dispensary_id,
product_id,
brand_name,
category,
sale_date,
AVG(price_rec) AS avg_price,
SUM(GREATEST(0, COALESCE(prev_quantity, 0) - quantity_available)) AS units_sold,
SUM(GREATEST(0, quantity_available - COALESCE(prev_quantity, 0))) AS units_restocked,
SUM(GREATEST(0, COALESCE(prev_quantity, 0) - quantity_available) * COALESCE(price_rec, 0)) AS revenue_estimate,
COUNT(*) AS snapshot_count
FROM qty_deltas
WHERE prev_quantity IS NOT NULL
GROUP BY dispensary_id, product_id, brand_name, category, sale_date;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_daily_sales_pk
ON mv_daily_sales_estimates(dispensary_id, product_id, sale_date);
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_brand
ON mv_daily_sales_estimates(brand_name, sale_date);
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_category
ON mv_daily_sales_estimates(category, sale_date);
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_date
ON mv_daily_sales_estimates(sale_date DESC);
-- ============================================================
-- VIEW 2: Brand Market Share by State
-- Weighted distribution across stores
-- ============================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_brand_market_share AS
WITH brand_presence AS (
SELECT
sp.brand AS brand_name,
d.state AS state_code,
COUNT(DISTINCT sp.dispensary_id) AS stores_carrying,
COUNT(*) AS sku_count,
SUM(CASE WHEN sp.is_in_stock THEN 1 ELSE 0 END) AS in_stock_skus,
AVG(sp.price_rec) AS avg_price
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
WHERE sp.brand IS NOT NULL
AND d.state IS NOT NULL
GROUP BY sp.brand, d.state
),
state_totals AS (
SELECT
d.state AS state_code,
COUNT(DISTINCT d.id) FILTER (WHERE d.crawl_enabled) AS total_stores
FROM dispensaries d
WHERE d.state IS NOT NULL
GROUP BY d.state
)
SELECT
bp.brand_name,
bp.state_code,
bp.stores_carrying,
st.total_stores,
ROUND(bp.stores_carrying::NUMERIC * 100 / NULLIF(st.total_stores, 0), 2) AS penetration_pct,
bp.sku_count,
bp.in_stock_skus,
bp.avg_price,
NOW() AS calculated_at
FROM brand_presence bp
JOIN state_totals st ON st.state_code = bp.state_code;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_brand_market_pk
ON mv_brand_market_share(brand_name, state_code);
CREATE INDEX IF NOT EXISTS idx_mv_brand_market_state
ON mv_brand_market_share(state_code);
CREATE INDEX IF NOT EXISTS idx_mv_brand_market_penetration
ON mv_brand_market_share(penetration_pct DESC);
-- ============================================================
-- VIEW 3: SKU Velocity (30-day rolling)
-- Average daily units sold per SKU
-- ============================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_sku_velocity AS
SELECT
dse.product_id,
dse.brand_name,
dse.category,
dse.dispensary_id,
d.name AS dispensary_name,
d.state AS state_code,
SUM(dse.units_sold) AS total_units_30d,
SUM(dse.revenue_estimate) AS total_revenue_30d,
COUNT(DISTINCT dse.sale_date) AS days_with_sales,
ROUND(SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0), 2) AS avg_daily_units,
AVG(dse.avg_price) AS avg_price,
CASE
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 5 THEN 'hot'
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 1 THEN 'steady'
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 0.1 THEN 'slow'
ELSE 'stale'
END AS velocity_tier,
NOW() AS calculated_at
FROM mv_daily_sales_estimates dse
JOIN dispensaries d ON d.id = dse.dispensary_id
WHERE dse.sale_date >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY dse.product_id, dse.brand_name, dse.category, dse.dispensary_id, d.name, d.state;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_sku_velocity_pk
ON mv_sku_velocity(dispensary_id, product_id);
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_brand
ON mv_sku_velocity(brand_name);
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_tier
ON mv_sku_velocity(velocity_tier);
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_state
ON mv_sku_velocity(state_code);
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_units
ON mv_sku_velocity(total_units_30d DESC);
-- ============================================================
-- VIEW 4: Store Performance Rankings
-- Revenue estimates and brand diversity per store
-- ============================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_store_performance AS
SELECT
d.id AS dispensary_id,
d.name AS dispensary_name,
d.city,
d.state AS state_code,
-- Revenue metrics from sales estimates
COALESCE(sales.total_revenue_30d, 0) AS total_revenue_30d,
COALESCE(sales.total_units_30d, 0) AS total_units_30d,
-- Inventory metrics
COUNT(DISTINCT sp.id) AS total_skus,
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock) AS in_stock_skus,
-- Brand diversity
COUNT(DISTINCT sp.brand) AS unique_brands,
COUNT(DISTINCT sp.category) AS unique_categories,
-- Pricing
AVG(sp.price_rec) AS avg_price,
-- Activity
MAX(sp.updated_at) AS last_updated,
NOW() AS calculated_at
FROM dispensaries d
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
LEFT JOIN (
SELECT
dispensary_id,
SUM(revenue_estimate) AS total_revenue_30d,
SUM(units_sold) AS total_units_30d
FROM mv_daily_sales_estimates
WHERE sale_date >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY dispensary_id
) sales ON sales.dispensary_id = d.id
WHERE d.crawl_enabled = TRUE
GROUP BY d.id, d.name, d.city, d.state, sales.total_revenue_30d, sales.total_units_30d;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_store_perf_pk
ON mv_store_performance(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_mv_store_perf_state
ON mv_store_performance(state_code);
CREATE INDEX IF NOT EXISTS idx_mv_store_perf_revenue
ON mv_store_performance(total_revenue_30d DESC);
-- ============================================================
-- VIEW 5: Weekly Category Trends
-- Category performance over time
-- ============================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_category_weekly_trends AS
SELECT
dse.category,
d.state AS state_code,
DATE_TRUNC('week', dse.sale_date)::DATE AS week_start,
COUNT(DISTINCT dse.product_id) AS sku_count,
COUNT(DISTINCT dse.dispensary_id) AS store_count,
SUM(dse.units_sold) AS total_units,
SUM(dse.revenue_estimate) AS total_revenue,
AVG(dse.avg_price) AS avg_price,
NOW() AS calculated_at
FROM mv_daily_sales_estimates dse
JOIN dispensaries d ON d.id = dse.dispensary_id
WHERE dse.category IS NOT NULL
AND dse.sale_date >= CURRENT_DATE - INTERVAL '90 days'
GROUP BY dse.category, d.state, DATE_TRUNC('week', dse.sale_date);
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_cat_weekly_pk
ON mv_category_weekly_trends(category, state_code, week_start);
CREATE INDEX IF NOT EXISTS idx_mv_cat_weekly_state
ON mv_category_weekly_trends(state_code, week_start);
CREATE INDEX IF NOT EXISTS idx_mv_cat_weekly_date
ON mv_category_weekly_trends(week_start DESC);
-- ============================================================
-- VIEW 6: Product Intelligence (Hoodie-style per-product metrics)
-- Includes stock diff, days since OOS, days until stockout
-- ============================================================
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_product_intelligence AS
WITH
-- Calculate stock diff over 120 days
stock_diff AS (
SELECT
dispensary_id,
product_id,
-- Get oldest and newest quantity in last 120 days
FIRST_VALUE(quantity_available) OVER (
PARTITION BY dispensary_id, product_id
ORDER BY captured_at ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS qty_120d_ago,
LAST_VALUE(quantity_available) OVER (
PARTITION BY dispensary_id, product_id
ORDER BY captured_at ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS qty_current
FROM inventory_snapshots
WHERE captured_at >= NOW() - INTERVAL '120 days'
),
stock_diff_calc AS (
SELECT DISTINCT
dispensary_id,
product_id,
qty_current - COALESCE(qty_120d_ago, qty_current) AS stock_diff_120
FROM stock_diff
),
-- Get days since last OOS event
last_oos AS (
SELECT
dispensary_id,
product_id,
MAX(detected_at) AS last_oos_date
FROM product_visibility_events
WHERE event_type = 'oos'
GROUP BY dispensary_id, product_id
),
-- Calculate avg daily units sold (from velocity view)
velocity AS (
SELECT
dispensary_id,
product_id,
avg_daily_units
FROM mv_sku_velocity
)
SELECT
sp.dispensary_id,
d.name AS dispensary_name,
d.state AS state_code,
d.city,
sp.provider_product_id AS sku,
sp.name_raw AS product_name,
sp.brand_name_raw AS brand,
sp.category_raw AS category,
sp.is_in_stock,
sp.stock_status,
sp.stock_quantity,
sp.price_rec AS price,
sp.first_seen_at AS first_seen,
sp.last_seen_at AS last_seen,
-- Calculated fields
COALESCE(sd.stock_diff_120, 0) AS stock_diff_120,
CASE
WHEN lo.last_oos_date IS NOT NULL
THEN EXTRACT(DAY FROM NOW() - lo.last_oos_date)::INT
ELSE NULL
END AS days_since_oos,
-- Days until stockout = current stock / daily burn rate
CASE
WHEN v.avg_daily_units > 0 AND sp.stock_quantity > 0
THEN ROUND(sp.stock_quantity::NUMERIC / v.avg_daily_units)::INT
ELSE NULL
END AS days_until_stock_out,
v.avg_daily_units,
NOW() AS calculated_at
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
LEFT JOIN stock_diff_calc sd ON sd.dispensary_id = sp.dispensary_id
AND sd.product_id = sp.provider_product_id
LEFT JOIN last_oos lo ON lo.dispensary_id = sp.dispensary_id
AND lo.product_id = sp.provider_product_id
LEFT JOIN velocity v ON v.dispensary_id = sp.dispensary_id
AND v.product_id = sp.provider_product_id
WHERE d.crawl_enabled = TRUE;
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_prod_intel_pk
ON mv_product_intelligence(dispensary_id, sku);
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_brand
ON mv_product_intelligence(brand);
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_state
ON mv_product_intelligence(state_code);
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_stock_out
ON mv_product_intelligence(days_until_stock_out ASC NULLS LAST);
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_oos
ON mv_product_intelligence(days_since_oos DESC NULLS LAST);
-- ============================================================
-- REFRESH FUNCTION
-- ============================================================
CREATE OR REPLACE FUNCTION refresh_sales_analytics_views()
RETURNS TABLE(view_name TEXT, rows_affected BIGINT) AS $$
DECLARE
row_count BIGINT;
BEGIN
-- Must refresh in dependency order:
-- 1. daily_sales (base view)
-- 2. sku_velocity (depends on daily_sales)
-- 3. product_intelligence (depends on sku_velocity)
-- 4. others (independent)
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_daily_sales_estimates;
SELECT COUNT(*) INTO row_count FROM mv_daily_sales_estimates;
view_name := 'mv_daily_sales_estimates';
rows_affected := row_count;
RETURN NEXT;
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_brand_market_share;
SELECT COUNT(*) INTO row_count FROM mv_brand_market_share;
view_name := 'mv_brand_market_share';
rows_affected := row_count;
RETURN NEXT;
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_sku_velocity;
SELECT COUNT(*) INTO row_count FROM mv_sku_velocity;
view_name := 'mv_sku_velocity';
rows_affected := row_count;
RETURN NEXT;
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_store_performance;
SELECT COUNT(*) INTO row_count FROM mv_store_performance;
view_name := 'mv_store_performance';
rows_affected := row_count;
RETURN NEXT;
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_category_weekly_trends;
SELECT COUNT(*) INTO row_count FROM mv_category_weekly_trends;
view_name := 'mv_category_weekly_trends';
rows_affected := row_count;
RETURN NEXT;
-- Product intelligence depends on sku_velocity, so refresh last
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_product_intelligence;
SELECT COUNT(*) INTO row_count FROM mv_product_intelligence;
view_name := 'mv_product_intelligence';
rows_affected := row_count;
RETURN NEXT;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION refresh_sales_analytics_views IS
'Refresh all sales analytics materialized views. Call hourly via scheduler.';
-- ============================================================
-- INITIAL REFRESH (populate views)
-- ============================================================
-- Note: Initial refresh must be non-concurrent (no unique index yet populated)
-- Run these manually after migration:
-- REFRESH MATERIALIZED VIEW mv_daily_sales_estimates;
-- REFRESH MATERIALIZED VIEW mv_brand_market_share;
-- REFRESH MATERIALIZED VIEW mv_sku_velocity;
-- REFRESH MATERIALIZED VIEW mv_store_performance;
-- REFRESH MATERIALIZED VIEW mv_category_weekly_trends;

View File

@@ -1,359 +0,0 @@
-- Migration 122: Market Intelligence Schema
-- Separate schema for external market data ingestion
-- Supports product, brand, and dispensary data from third-party sources
-- Create dedicated schema
CREATE SCHEMA IF NOT EXISTS market_intel;
-- ============================================================
-- BRANDS: Brand/Company Intelligence
-- ============================================================
CREATE TABLE IF NOT EXISTS market_intel.brands (
id SERIAL PRIMARY KEY,
-- Identity
brand_name VARCHAR(255) NOT NULL,
parent_brand VARCHAR(255),
parent_company VARCHAR(255),
slug VARCHAR(255),
external_id VARCHAR(255) UNIQUE, -- objectID from source
-- Details
brand_description TEXT,
brand_logo_url TEXT,
brand_url TEXT,
linkedin_url TEXT,
-- Presence
states JSONB DEFAULT '[]', -- Array of state names
active_variants INTEGER DEFAULT 0,
all_variants INTEGER DEFAULT 0,
-- Metadata
source VARCHAR(50) DEFAULT 'external',
fetched_at TIMESTAMPTZ DEFAULT NOW(),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_brands_name ON market_intel.brands(brand_name);
CREATE INDEX IF NOT EXISTS idx_brands_parent ON market_intel.brands(parent_brand);
CREATE INDEX IF NOT EXISTS idx_brands_external ON market_intel.brands(external_id);
CREATE INDEX IF NOT EXISTS idx_brands_states ON market_intel.brands USING GIN(states);
-- ============================================================
-- DISPENSARIES: Dispensary/Store Intelligence
-- ============================================================
CREATE TABLE IF NOT EXISTS market_intel.dispensaries (
id SERIAL PRIMARY KEY,
-- Identity
dispensary_name VARCHAR(255) NOT NULL,
dispensary_company_name VARCHAR(255),
dispensary_company_id VARCHAR(255),
slug VARCHAR(255),
external_id VARCHAR(255) UNIQUE, -- objectID from source
-- Location
street_address VARCHAR(255),
city VARCHAR(100),
state VARCHAR(100),
postal_code VARCHAR(20),
county_name VARCHAR(100),
country_code VARCHAR(10) DEFAULT 'USA',
full_address TEXT,
latitude DECIMAL(10, 7),
longitude DECIMAL(10, 7),
timezone VARCHAR(50),
urbanicity VARCHAR(50), -- Urban, Suburban, Rural
-- Contact
phone VARCHAR(50),
email VARCHAR(255),
website TEXT,
linkedin_url TEXT,
-- License
license_number VARCHAR(100),
license_type VARCHAR(100),
-- Store Type
is_medical BOOLEAN DEFAULT FALSE,
is_recreational BOOLEAN DEFAULT FALSE,
delivery_enabled BOOLEAN DEFAULT FALSE,
curbside_pickup BOOLEAN DEFAULT FALSE,
instore_pickup BOOLEAN DEFAULT FALSE,
location_type VARCHAR(50), -- RETAIL, DELIVERY, etc.
-- Sales Estimates
estimated_daily_sales DECIMAL(12, 2),
estimated_sales DECIMAL(12, 2),
avg_daily_sales DECIMAL(12, 2),
state_sales_bucket INTEGER,
-- Customer Demographics
affluency JSONB DEFAULT '[]', -- Array of affluency segments
age_skew JSONB DEFAULT '[]', -- Array of age brackets
customer_segments JSONB DEFAULT '[]', -- Array of segment names
-- Inventory Stats
menus_count INTEGER DEFAULT 0,
menus_count_med INTEGER DEFAULT 0,
menus_count_rec INTEGER DEFAULT 0,
parent_brands JSONB DEFAULT '[]',
brand_company_names JSONB DEFAULT '[]',
-- Business Info
banner VARCHAR(255), -- Chain/banner name
business_type VARCHAR(50), -- MSO, Independent, etc.
pos_system VARCHAR(100),
atm_presence BOOLEAN DEFAULT FALSE,
tax_included BOOLEAN DEFAULT FALSE,
-- Ratings
rating DECIMAL(3, 2),
reviews_count INTEGER DEFAULT 0,
-- Status
is_closed BOOLEAN DEFAULT FALSE,
open_date TIMESTAMPTZ,
last_updated_at TIMESTAMPTZ,
-- Media
logo_url TEXT,
cover_url TEXT,
-- Metadata
source VARCHAR(50) DEFAULT 'external',
fetched_at TIMESTAMPTZ DEFAULT NOW(),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_dispensaries_name ON market_intel.dispensaries(dispensary_name);
CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON market_intel.dispensaries(state);
CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON market_intel.dispensaries(city);
CREATE INDEX IF NOT EXISTS idx_dispensaries_external ON market_intel.dispensaries(external_id);
CREATE INDEX IF NOT EXISTS idx_dispensaries_banner ON market_intel.dispensaries(banner);
CREATE INDEX IF NOT EXISTS idx_dispensaries_business_type ON market_intel.dispensaries(business_type);
CREATE INDEX IF NOT EXISTS idx_dispensaries_geo ON market_intel.dispensaries(latitude, longitude);
CREATE INDEX IF NOT EXISTS idx_dispensaries_segments ON market_intel.dispensaries USING GIN(customer_segments);
-- ============================================================
-- PRODUCTS: Product/SKU Intelligence
-- ============================================================
CREATE TABLE IF NOT EXISTS market_intel.products (
id SERIAL PRIMARY KEY,
-- Identity
name VARCHAR(500) NOT NULL,
brand VARCHAR(255),
brand_id VARCHAR(255),
brand_company_name VARCHAR(255),
parent_brand VARCHAR(255),
external_id VARCHAR(255) UNIQUE, -- objectID from source
cm_id VARCHAR(100), -- Canonical menu ID
-- Category Hierarchy
category_0 VARCHAR(100), -- Top level: Flower, Edibles, Vapes
category_1 VARCHAR(255), -- Mid level: Flower > Pre-Rolls
category_2 VARCHAR(500), -- Detailed: Flower > Pre-Rolls > Singles
-- Cannabis Classification
cannabis_type VARCHAR(50), -- SATIVA, INDICA, HYBRID
strain VARCHAR(255),
flavor VARCHAR(255),
pack_size VARCHAR(100),
description TEXT,
-- Cannabinoids
thc_mg DECIMAL(10, 2),
cbd_mg DECIMAL(10, 2),
percent_thc DECIMAL(5, 2),
percent_cbd DECIMAL(5, 2),
-- Dispensary Context (denormalized for query performance)
master_dispensary_name VARCHAR(255),
master_dispensary_id VARCHAR(255),
dispensary_count INTEGER DEFAULT 0, -- How many stores carry this
d_state VARCHAR(100),
d_city VARCHAR(100),
d_banner VARCHAR(255),
d_business_type VARCHAR(50),
d_medical BOOLEAN,
d_recreational BOOLEAN,
-- Customer Demographics (from dispensary)
d_customer_segments JSONB DEFAULT '[]',
d_age_skew JSONB DEFAULT '[]',
d_affluency JSONB DEFAULT '[]',
d_urbanicity VARCHAR(50),
-- Stock Status
in_stock BOOLEAN DEFAULT TRUE,
last_seen_at DATE,
last_seen_at_ts BIGINT,
-- Media
img_url TEXT,
product_url TEXT,
menu_slug VARCHAR(500),
-- Geo
latitude DECIMAL(10, 7),
longitude DECIMAL(10, 7),
-- Metadata
source VARCHAR(50) DEFAULT 'external',
fetched_at TIMESTAMPTZ DEFAULT NOW(),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_products_name ON market_intel.products(name);
CREATE INDEX IF NOT EXISTS idx_products_brand ON market_intel.products(brand);
CREATE INDEX IF NOT EXISTS idx_products_external ON market_intel.products(external_id);
CREATE INDEX IF NOT EXISTS idx_products_category ON market_intel.products(category_0, category_1);
CREATE INDEX IF NOT EXISTS idx_products_cannabis_type ON market_intel.products(cannabis_type);
CREATE INDEX IF NOT EXISTS idx_products_strain ON market_intel.products(strain);
CREATE INDEX IF NOT EXISTS idx_products_state ON market_intel.products(d_state);
CREATE INDEX IF NOT EXISTS idx_products_in_stock ON market_intel.products(in_stock);
CREATE INDEX IF NOT EXISTS idx_products_dispensary_count ON market_intel.products(dispensary_count DESC);
CREATE INDEX IF NOT EXISTS idx_products_segments ON market_intel.products USING GIN(d_customer_segments);
-- ============================================================
-- PRODUCT_VARIANTS: Variant-Level Data (Pricing, Stock)
-- ============================================================
CREATE TABLE IF NOT EXISTS market_intel.product_variants (
id SERIAL PRIMARY KEY,
product_id INTEGER REFERENCES market_intel.products(id) ON DELETE CASCADE,
-- Identity
variant_id VARCHAR(255) NOT NULL,
pos_sku VARCHAR(255),
pos_product_id VARCHAR(255),
pos_system VARCHAR(100),
-- Pricing
actual_price DECIMAL(10, 2),
original_price DECIMAL(10, 2),
discounted_price DECIMAL(10, 2),
-- Presentation
product_presentation VARCHAR(255), -- "100.00 mg", "3.5g", etc.
quantity DECIMAL(10, 2),
unit VARCHAR(50), -- mg, g, oz, each
-- Availability
is_medical BOOLEAN DEFAULT FALSE,
is_recreational BOOLEAN DEFAULT FALSE,
is_active BOOLEAN DEFAULT TRUE,
-- Stock Intelligence
stock_status VARCHAR(50), -- In Stock, Low Stock, Out of Stock
stock_diff_120 DECIMAL(10, 2), -- 120-day stock change
days_since_oos INTEGER,
days_until_stock_out INTEGER,
-- Timestamps
first_seen_at_ts BIGINT,
first_seen_at TIMESTAMPTZ,
last_seen_at DATE,
-- Metadata
fetched_at TIMESTAMPTZ DEFAULT NOW(),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(product_id, variant_id)
);
CREATE INDEX IF NOT EXISTS idx_variants_product ON market_intel.product_variants(product_id);
CREATE INDEX IF NOT EXISTS idx_variants_sku ON market_intel.product_variants(pos_sku);
CREATE INDEX IF NOT EXISTS idx_variants_stock_status ON market_intel.product_variants(stock_status);
CREATE INDEX IF NOT EXISTS idx_variants_price ON market_intel.product_variants(actual_price);
CREATE INDEX IF NOT EXISTS idx_variants_days_out ON market_intel.product_variants(days_until_stock_out);
-- ============================================================
-- FETCH_LOG: Track data fetches
-- ============================================================
CREATE TABLE IF NOT EXISTS market_intel.fetch_log (
id SERIAL PRIMARY KEY,
fetch_type VARCHAR(50) NOT NULL, -- brands, dispensaries, products
state_code VARCHAR(10),
query_params JSONB,
records_fetched INTEGER DEFAULT 0,
records_inserted INTEGER DEFAULT 0,
records_updated INTEGER DEFAULT 0,
duration_ms INTEGER,
error_message TEXT,
started_at TIMESTAMPTZ DEFAULT NOW(),
completed_at TIMESTAMPTZ
);
CREATE INDEX IF NOT EXISTS idx_fetch_log_type ON market_intel.fetch_log(fetch_type);
CREATE INDEX IF NOT EXISTS idx_fetch_log_state ON market_intel.fetch_log(state_code);
CREATE INDEX IF NOT EXISTS idx_fetch_log_started ON market_intel.fetch_log(started_at DESC);
-- ============================================================
-- HELPER VIEWS
-- ============================================================
-- Brand market presence summary
CREATE OR REPLACE VIEW market_intel.v_brand_presence AS
SELECT
b.brand_name,
b.parent_company,
b.active_variants,
b.all_variants,
jsonb_array_length(b.states) as state_count,
b.states,
b.fetched_at
FROM market_intel.brands b
ORDER BY b.active_variants DESC;
-- Dispensary sales rankings by state
CREATE OR REPLACE VIEW market_intel.v_dispensary_rankings AS
SELECT
d.dispensary_name,
d.city,
d.state,
d.banner,
d.business_type,
d.estimated_daily_sales,
d.menus_count,
d.is_medical,
d.is_recreational,
d.customer_segments,
RANK() OVER (PARTITION BY d.state ORDER BY d.estimated_daily_sales DESC NULLS LAST) as state_rank
FROM market_intel.dispensaries d
WHERE d.is_closed = FALSE;
-- Product distribution by brand and state
CREATE OR REPLACE VIEW market_intel.v_product_distribution AS
SELECT
p.brand,
p.d_state as state,
p.category_0 as category,
COUNT(*) as product_count,
COUNT(*) FILTER (WHERE p.in_stock) as in_stock_count,
AVG(p.dispensary_count) as avg_store_count,
COUNT(DISTINCT p.master_dispensary_id) as unique_stores
FROM market_intel.products p
GROUP BY p.brand, p.d_state, p.category_0;
-- ============================================================
-- COMMENTS
-- ============================================================
COMMENT ON SCHEMA market_intel IS 'Market intelligence data from external sources';
COMMENT ON TABLE market_intel.brands IS 'Brand/company data with multi-state presence';
COMMENT ON TABLE market_intel.dispensaries IS 'Dispensary data with sales estimates and demographics';
COMMENT ON TABLE market_intel.products IS 'Product/SKU data with cannabinoid and category info';
COMMENT ON TABLE market_intel.product_variants IS 'Variant-level pricing and stock data';
COMMENT ON TABLE market_intel.fetch_log IS 'Log of data fetches for monitoring';

View File

@@ -1,159 +0,0 @@
-- Migration 123: Extract unmapped fields from provider_data
-- These fields exist in our crawl payloads but weren't being stored in columns
-- ============================================================
-- ADD NEW COLUMNS TO store_products
-- ============================================================
-- Cannabis classification (SATIVA, INDICA, HYBRID, CBD)
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabis_type VARCHAR(50);
-- Canonical IDs from POS systems
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_strain_id VARCHAR(100);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_vendor_id VARCHAR(100);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_brand_id VARCHAR(100);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_category_id VARCHAR(100);
-- Lab results
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS lab_result_url TEXT;
-- Flavors (extracted from JSONB to text array for easier querying)
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS flavors_list TEXT[];
-- ============================================================
-- BACKFILL FROM provider_data
-- ============================================================
-- Backfill cannabis_type from classification
UPDATE store_products
SET cannabis_type = CASE
WHEN provider_data->>'classification' IN ('HYBRID', 'H') THEN 'HYBRID'
WHEN provider_data->>'classification' IN ('INDICA', 'I') THEN 'INDICA'
WHEN provider_data->>'classification' IN ('SATIVA', 'S') THEN 'SATIVA'
WHEN provider_data->>'classification' = 'I/S' THEN 'INDICA_DOMINANT'
WHEN provider_data->>'classification' = 'S/I' THEN 'SATIVA_DOMINANT'
WHEN provider_data->>'classification' = 'CBD' THEN 'CBD'
ELSE provider_data->>'classification'
END
WHERE provider_data->>'classification' IS NOT NULL
AND cannabis_type IS NULL;
-- Also backfill from strain_type if cannabis_type still null
UPDATE store_products
SET cannabis_type = CASE
WHEN strain_type ILIKE '%indica%hybrid%' OR strain_type ILIKE '%hybrid%indica%' THEN 'INDICA_DOMINANT'
WHEN strain_type ILIKE '%sativa%hybrid%' OR strain_type ILIKE '%hybrid%sativa%' THEN 'SATIVA_DOMINANT'
WHEN strain_type ILIKE '%indica%' THEN 'INDICA'
WHEN strain_type ILIKE '%sativa%' THEN 'SATIVA'
WHEN strain_type ILIKE '%hybrid%' THEN 'HYBRID'
WHEN strain_type ILIKE '%cbd%' THEN 'CBD'
ELSE NULL
END
WHERE strain_type IS NOT NULL
AND cannabis_type IS NULL;
-- Backfill canonical IDs from POSMetaData
UPDATE store_products
SET
canonical_strain_id = provider_data->'POSMetaData'->>'canonicalStrainId',
canonical_vendor_id = provider_data->'POSMetaData'->>'canonicalVendorId',
canonical_brand_id = provider_data->'POSMetaData'->>'canonicalBrandId',
canonical_category_id = provider_data->'POSMetaData'->>'canonicalCategoryId'
WHERE provider_data->'POSMetaData' IS NOT NULL
AND canonical_strain_id IS NULL;
-- Backfill lab result URLs
UPDATE store_products
SET lab_result_url = provider_data->'POSMetaData'->>'canonicalLabResultUrl'
WHERE provider_data->'POSMetaData'->>'canonicalLabResultUrl' IS NOT NULL
AND lab_result_url IS NULL;
-- ============================================================
-- INDEXES
-- ============================================================
CREATE INDEX IF NOT EXISTS idx_store_products_cannabis_type ON store_products(cannabis_type);
CREATE INDEX IF NOT EXISTS idx_store_products_vendor_id ON store_products(canonical_vendor_id);
CREATE INDEX IF NOT EXISTS idx_store_products_strain_id ON store_products(canonical_strain_id);
-- ============================================================
-- ADD MSO FLAG TO DISPENSARIES
-- ============================================================
-- Multi-State Operator flag (calculated from chain presence in multiple states)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_mso BOOLEAN DEFAULT FALSE;
-- Update MSO flag based on chain presence in multiple states
WITH mso_chains AS (
SELECT chain_id
FROM dispensaries
WHERE chain_id IS NOT NULL
GROUP BY chain_id
HAVING COUNT(DISTINCT state) > 1
)
UPDATE dispensaries d
SET is_mso = TRUE
WHERE d.chain_id IN (SELECT chain_id FROM mso_chains);
-- Index for MSO queries
CREATE INDEX IF NOT EXISTS idx_dispensaries_is_mso ON dispensaries(is_mso) WHERE is_mso = TRUE;
-- ============================================================
-- PRODUCT DISTRIBUTION VIEW
-- ============================================================
-- View: How many stores carry each product (by brand + canonical name)
CREATE OR REPLACE VIEW v_product_distribution AS
SELECT
sp.brand_name_raw as brand,
sp.c_name as product_canonical_name,
COUNT(DISTINCT sp.dispensary_id) as store_count,
COUNT(DISTINCT d.state) as state_count,
ARRAY_AGG(DISTINCT d.state) as states,
AVG(sp.price_rec) as avg_price,
MIN(sp.price_rec) as min_price,
MAX(sp.price_rec) as max_price
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
WHERE sp.c_name IS NOT NULL
AND sp.brand_name_raw IS NOT NULL
AND sp.is_in_stock = TRUE
GROUP BY sp.brand_name_raw, sp.c_name
HAVING COUNT(DISTINCT sp.dispensary_id) > 1
ORDER BY store_count DESC;
-- ============================================================
-- MSO SUMMARY VIEW
-- ============================================================
CREATE OR REPLACE VIEW v_mso_summary AS
SELECT
c.name as chain_name,
COUNT(DISTINCT d.id) as store_count,
COUNT(DISTINCT d.state) as state_count,
ARRAY_AGG(DISTINCT d.state ORDER BY d.state) as states,
SUM(d.product_count) as total_products,
TRUE as is_mso
FROM dispensaries d
JOIN chains c ON c.id = d.chain_id
WHERE d.chain_id IN (
SELECT chain_id
FROM dispensaries
WHERE chain_id IS NOT NULL
GROUP BY chain_id
HAVING COUNT(DISTINCT state) > 1
)
GROUP BY c.id, c.name
ORDER BY state_count DESC, store_count DESC;
-- ============================================================
-- COMMENTS
-- ============================================================
COMMENT ON COLUMN store_products.cannabis_type IS 'Normalized cannabis classification: SATIVA, INDICA, HYBRID, INDICA_DOMINANT, SATIVA_DOMINANT, CBD';
COMMENT ON COLUMN store_products.canonical_strain_id IS 'POS system strain identifier for cross-store matching';
COMMENT ON COLUMN store_products.canonical_vendor_id IS 'POS system vendor/supplier identifier';
COMMENT ON COLUMN store_products.lab_result_url IS 'Link to Certificate of Analysis / lab test results';
COMMENT ON COLUMN dispensaries.is_mso IS 'Multi-State Operator: chain operates in 2+ states';
COMMENT ON VIEW v_product_distribution IS 'Shows how many stores carry each product for distribution analysis';
COMMENT ON VIEW v_mso_summary IS 'Summary of multi-state operator chains';

View File

@@ -1,73 +0,0 @@
-- Migration 124: Convert inventory_snapshots to TimescaleDB hypertable
-- Requires: CREATE EXTENSION timescaledb; (run after installing TimescaleDB)
-- ============================================================
-- STEP 1: Enable TimescaleDB extension
-- ============================================================
CREATE EXTENSION IF NOT EXISTS timescaledb;
-- ============================================================
-- STEP 2: Convert to hypertable
-- ============================================================
-- Note: Table must have a time column and no foreign key constraints
-- First, drop any foreign keys if they exist
ALTER TABLE inventory_snapshots DROP CONSTRAINT IF EXISTS inventory_snapshots_dispensary_id_fkey;
-- Convert to hypertable, partitioned by captured_at (1 day chunks)
SELECT create_hypertable(
'inventory_snapshots',
'captured_at',
chunk_time_interval => INTERVAL '1 day',
if_not_exists => TRUE,
migrate_data => TRUE
);
-- ============================================================
-- STEP 3: Enable compression
-- ============================================================
-- Compress by dispensary_id and product_id (common query patterns)
ALTER TABLE inventory_snapshots SET (
timescaledb.compress,
timescaledb.compress_segmentby = 'dispensary_id, product_id',
timescaledb.compress_orderby = 'captured_at DESC'
);
-- ============================================================
-- STEP 4: Compression policy (compress chunks older than 1 day)
-- ============================================================
SELECT add_compression_policy('inventory_snapshots', INTERVAL '1 day');
-- ============================================================
-- STEP 5: Retention policy (optional - drop chunks older than 90 days)
-- ============================================================
-- Uncomment if you want automatic cleanup:
-- SELECT add_retention_policy('inventory_snapshots', INTERVAL '90 days');
-- ============================================================
-- STEP 6: Optimize indexes for time-series queries
-- ============================================================
-- TimescaleDB automatically creates time-based indexes
-- Add composite index for common queries
CREATE INDEX IF NOT EXISTS idx_snapshots_disp_prod_time
ON inventory_snapshots (dispensary_id, product_id, captured_at DESC);
-- ============================================================
-- VERIFICATION QUERIES (run after migration)
-- ============================================================
-- Check hypertable status:
-- SELECT * FROM timescaledb_information.hypertables WHERE hypertable_name = 'inventory_snapshots';
-- Check compression status:
-- SELECT * FROM timescaledb_information.compression_settings WHERE hypertable_name = 'inventory_snapshots';
-- Check chunk sizes:
-- SELECT chunk_name, pg_size_pretty(before_compression_total_bytes) as before,
-- pg_size_pretty(after_compression_total_bytes) as after,
-- round(100 - (after_compression_total_bytes::numeric / before_compression_total_bytes * 100), 1) as compression_pct
-- FROM chunk_compression_stats('inventory_snapshots');
-- ============================================================
-- COMMENTS
-- ============================================================
COMMENT ON TABLE inventory_snapshots IS 'TimescaleDB hypertable for inventory time-series data. Compressed after 1 day.';

View File

@@ -1,402 +0,0 @@
-- Migration 125: Delta-only inventory snapshots
-- Only store a row when something meaningful changes
-- Revenue calculated as: effective_price × qty_sold
-- ============================================================
-- ADD DELTA TRACKING COLUMNS
-- ============================================================
-- Previous values (to show what changed)
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_quantity INTEGER;
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_price_rec DECIMAL(10,2);
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_price_med DECIMAL(10,2);
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_status VARCHAR(50);
-- Calculated deltas
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS qty_delta INTEGER; -- negative = sold, positive = restocked
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS price_delta DECIMAL(10,2);
-- Change type flags
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS change_type VARCHAR(50); -- 'sale', 'restock', 'price_change', 'oos', 'back_in_stock'
-- ============================================================
-- INDEX FOR CHANGE TYPE QUERIES
-- ============================================================
CREATE INDEX IF NOT EXISTS idx_snapshots_change_type ON inventory_snapshots(change_type);
CREATE INDEX IF NOT EXISTS idx_snapshots_qty_delta ON inventory_snapshots(qty_delta) WHERE qty_delta != 0;
-- ============================================================
-- VIEW: Latest product state (for delta comparison)
-- ============================================================
CREATE OR REPLACE VIEW v_product_latest_state AS
SELECT DISTINCT ON (dispensary_id, product_id)
dispensary_id,
product_id,
quantity_available,
price_rec,
price_med,
status,
captured_at
FROM inventory_snapshots
ORDER BY dispensary_id, product_id, captured_at DESC;
-- ============================================================
-- FUNCTION: Check if product state changed
-- ============================================================
CREATE OR REPLACE FUNCTION should_capture_snapshot(
p_dispensary_id INTEGER,
p_product_id TEXT,
p_quantity INTEGER,
p_price_rec DECIMAL,
p_price_med DECIMAL,
p_status VARCHAR
) RETURNS TABLE (
should_capture BOOLEAN,
prev_quantity INTEGER,
prev_price_rec DECIMAL,
prev_price_med DECIMAL,
prev_status VARCHAR,
qty_delta INTEGER,
price_delta DECIMAL,
change_type VARCHAR
) AS $$
DECLARE
v_prev RECORD;
BEGIN
-- Get previous state
SELECT
ls.quantity_available,
ls.price_rec,
ls.price_med,
ls.status
INTO v_prev
FROM v_product_latest_state ls
WHERE ls.dispensary_id = p_dispensary_id
AND ls.product_id = p_product_id;
-- First time seeing this product
IF NOT FOUND THEN
RETURN QUERY SELECT
TRUE,
NULL::INTEGER,
NULL::DECIMAL,
NULL::DECIMAL,
NULL::VARCHAR,
NULL::INTEGER,
NULL::DECIMAL,
'new_product'::VARCHAR;
RETURN;
END IF;
-- Check for changes
IF v_prev.quantity_available IS DISTINCT FROM p_quantity
OR v_prev.price_rec IS DISTINCT FROM p_price_rec
OR v_prev.price_med IS DISTINCT FROM p_price_med
OR v_prev.status IS DISTINCT FROM p_status THEN
RETURN QUERY SELECT
TRUE,
v_prev.quantity_available,
v_prev.price_rec,
v_prev.price_med,
v_prev.status,
COALESCE(p_quantity, 0) - COALESCE(v_prev.quantity_available, 0),
COALESCE(p_price_rec, 0) - COALESCE(v_prev.price_rec, 0),
CASE
WHEN COALESCE(p_quantity, 0) < COALESCE(v_prev.quantity_available, 0) THEN 'sale'
WHEN COALESCE(p_quantity, 0) > COALESCE(v_prev.quantity_available, 0) THEN 'restock'
WHEN p_quantity = 0 AND v_prev.quantity_available > 0 THEN 'oos'
WHEN p_quantity > 0 AND v_prev.quantity_available = 0 THEN 'back_in_stock'
WHEN p_price_rec IS DISTINCT FROM v_prev.price_rec THEN 'price_change'
ELSE 'status_change'
END;
RETURN;
END IF;
-- No change
RETURN QUERY SELECT
FALSE,
NULL::INTEGER,
NULL::DECIMAL,
NULL::DECIMAL,
NULL::VARCHAR,
NULL::INTEGER,
NULL::DECIMAL,
NULL::VARCHAR;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- REVENUE CALCULATION COLUMNS
-- ============================================================
-- Effective prices (sale price if on special, otherwise regular)
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS effective_price_rec DECIMAL(10,2);
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS effective_price_med DECIMAL(10,2);
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS is_on_special BOOLEAN DEFAULT FALSE;
-- Revenue by market type
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS revenue_rec DECIMAL(10,2);
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS revenue_med DECIMAL(10,2);
-- Time between snapshots (for velocity calc)
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS time_since_last_snapshot INTERVAL;
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS hours_since_last DECIMAL(10,2);
-- ============================================================
-- VIEW: Hourly Sales Velocity
-- ============================================================
CREATE OR REPLACE VIEW v_hourly_sales AS
SELECT
dispensary_id,
DATE(captured_at) as sale_date,
EXTRACT(HOUR FROM captured_at) as sale_hour,
COUNT(*) FILTER (WHERE qty_delta < 0) as transactions,
SUM(ABS(qty_delta)) FILTER (WHERE qty_delta < 0) as units_sold,
SUM(revenue_estimate) FILTER (WHERE qty_delta < 0) as revenue,
COUNT(DISTINCT product_id) FILTER (WHERE qty_delta < 0) as unique_products_sold
FROM inventory_snapshots
WHERE change_type = 'sale'
GROUP BY dispensary_id, DATE(captured_at), EXTRACT(HOUR FROM captured_at);
-- ============================================================
-- VIEW: Daily Sales by Store
-- ============================================================
CREATE OR REPLACE VIEW v_daily_store_sales AS
SELECT
s.dispensary_id,
d.name as store_name,
d.state,
DATE(s.captured_at) as sale_date,
SUM(ABS(s.qty_delta)) as units_sold,
SUM(s.revenue_estimate) as revenue,
COUNT(*) as sale_events,
COUNT(DISTINCT s.product_id) as unique_products
FROM inventory_snapshots s
JOIN dispensaries d ON d.id = s.dispensary_id
WHERE s.change_type = 'sale'
GROUP BY s.dispensary_id, d.name, d.state, DATE(s.captured_at);
-- ============================================================
-- VIEW: Daily Sales by Brand
-- ============================================================
CREATE OR REPLACE VIEW v_daily_brand_sales AS
SELECT
s.brand_name,
d.state,
DATE(s.captured_at) as sale_date,
SUM(ABS(s.qty_delta)) as units_sold,
SUM(s.revenue_estimate) as revenue,
COUNT(DISTINCT s.dispensary_id) as stores_with_sales,
COUNT(DISTINCT s.product_id) as unique_skus_sold
FROM inventory_snapshots s
JOIN dispensaries d ON d.id = s.dispensary_id
WHERE s.change_type = 'sale'
AND s.brand_name IS NOT NULL
GROUP BY s.brand_name, d.state, DATE(s.captured_at);
-- ============================================================
-- VIEW: Product Velocity Rankings
-- ============================================================
CREATE OR REPLACE VIEW v_product_velocity AS
SELECT
s.product_id,
s.brand_name,
s.category,
s.dispensary_id,
d.name as store_name,
d.state,
SUM(ABS(s.qty_delta)) as units_sold_30d,
SUM(s.revenue_estimate) as revenue_30d,
COUNT(*) as sale_events,
ROUND(SUM(ABS(s.qty_delta))::NUMERIC / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0), 2) as avg_daily_units,
ROUND(SUM(s.revenue_estimate) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0), 2) as avg_daily_revenue,
CASE
WHEN SUM(ABS(s.qty_delta)) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0) >= 10 THEN 'hot'
WHEN SUM(ABS(s.qty_delta)) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0) >= 3 THEN 'steady'
WHEN SUM(ABS(s.qty_delta)) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0) >= 1 THEN 'slow'
ELSE 'stale'
END as velocity_tier
FROM inventory_snapshots s
JOIN dispensaries d ON d.id = s.dispensary_id
WHERE s.change_type = 'sale'
AND s.captured_at >= NOW() - INTERVAL '30 days'
GROUP BY s.product_id, s.brand_name, s.category, s.dispensary_id, d.name, d.state;
-- ============================================================
-- VIEW: Busiest Hours by Store
-- ============================================================
CREATE OR REPLACE VIEW v_busiest_hours AS
SELECT
dispensary_id,
sale_hour,
AVG(units_sold) as avg_units_per_hour,
AVG(revenue) as avg_revenue_per_hour,
SUM(units_sold) as total_units,
SUM(revenue) as total_revenue,
COUNT(*) as days_with_data,
RANK() OVER (PARTITION BY dispensary_id ORDER BY AVG(revenue) DESC) as hour_rank
FROM v_hourly_sales
GROUP BY dispensary_id, sale_hour;
-- ============================================================
-- VIEW: Promotion Effectiveness (compare sale vs non-sale prices)
-- ============================================================
CREATE OR REPLACE VIEW v_promotion_effectiveness AS
SELECT
s.dispensary_id,
d.name as store_name,
s.product_id,
s.brand_name,
DATE(s.captured_at) as sale_date,
SUM(ABS(s.qty_delta)) FILTER (WHERE s.price_rec < s.prev_price_rec) as units_on_discount,
SUM(ABS(s.qty_delta)) FILTER (WHERE s.price_rec >= COALESCE(s.prev_price_rec, s.price_rec)) as units_full_price,
SUM(s.revenue_estimate) FILTER (WHERE s.price_rec < s.prev_price_rec) as revenue_discounted,
SUM(s.revenue_estimate) FILTER (WHERE s.price_rec >= COALESCE(s.prev_price_rec, s.price_rec)) as revenue_full_price
FROM inventory_snapshots s
JOIN dispensaries d ON d.id = s.dispensary_id
WHERE s.change_type = 'sale'
GROUP BY s.dispensary_id, d.name, s.product_id, s.brand_name, DATE(s.captured_at);
-- ============================================================
-- COMMENTS
-- ============================================================
COMMENT ON COLUMN inventory_snapshots.qty_delta IS 'Quantity change: negative=sold, positive=restocked';
COMMENT ON COLUMN inventory_snapshots.revenue_estimate IS 'Estimated revenue: ABS(qty_delta) * price_rec when qty_delta < 0';
COMMENT ON COLUMN inventory_snapshots.change_type IS 'Type of change: sale, restock, price_change, oos, back_in_stock, new_product';
COMMENT ON FUNCTION should_capture_snapshot IS 'Returns whether a snapshot should be captured and delta values';
COMMENT ON VIEW v_hourly_sales IS 'Sales aggregated by hour - find busiest times';
COMMENT ON VIEW v_daily_store_sales IS 'Daily revenue by store';
COMMENT ON VIEW v_daily_brand_sales IS 'Daily brand performance by state';
COMMENT ON VIEW v_product_velocity IS 'Product sales velocity rankings (hot/steady/slow/stale)';
COMMENT ON VIEW v_busiest_hours IS 'Rank hours by sales volume per store';
-- ============================================================
-- VIEW: Days Until Stock Out (Predictive)
-- ============================================================
CREATE OR REPLACE VIEW v_stock_out_prediction AS
WITH velocity AS (
SELECT
dispensary_id,
product_id,
brand_name,
-- Average units sold per day (last 7 days)
ROUND(SUM(ABS(qty_delta))::NUMERIC / NULLIF(COUNT(DISTINCT DATE(captured_at)), 0), 2) as daily_velocity,
-- Hours between sales
AVG(hours_since_last) FILTER (WHERE qty_delta < 0) as avg_hours_between_sales
FROM inventory_snapshots
WHERE change_type = 'sale'
AND captured_at >= NOW() - INTERVAL '7 days'
GROUP BY dispensary_id, product_id, brand_name
),
current_stock AS (
SELECT DISTINCT ON (dispensary_id, product_id)
dispensary_id,
product_id,
quantity_available as current_qty,
captured_at as last_seen
FROM inventory_snapshots
ORDER BY dispensary_id, product_id, captured_at DESC
)
SELECT
cs.dispensary_id,
d.name as store_name,
cs.product_id,
v.brand_name,
cs.current_qty,
v.daily_velocity,
CASE
WHEN v.daily_velocity > 0 THEN ROUND(cs.current_qty / v.daily_velocity, 1)
ELSE NULL
END as days_until_stock_out,
CASE
WHEN v.daily_velocity > 0 AND cs.current_qty / v.daily_velocity <= 3 THEN 'critical'
WHEN v.daily_velocity > 0 AND cs.current_qty / v.daily_velocity <= 7 THEN 'low'
WHEN v.daily_velocity > 0 AND cs.current_qty / v.daily_velocity <= 14 THEN 'moderate'
ELSE 'healthy'
END as stock_health,
cs.last_seen
FROM current_stock cs
JOIN velocity v ON v.dispensary_id = cs.dispensary_id AND v.product_id = cs.product_id
JOIN dispensaries d ON d.id = cs.dispensary_id
WHERE cs.current_qty > 0
AND v.daily_velocity > 0;
-- ============================================================
-- VIEW: Days Since OOS (for products currently out of stock)
-- ============================================================
CREATE OR REPLACE VIEW v_days_since_oos AS
SELECT
s.dispensary_id,
d.name as store_name,
s.product_id,
s.brand_name,
s.captured_at as went_oos_at,
EXTRACT(EPOCH FROM (NOW() - s.captured_at)) / 86400 as days_since_oos,
s.prev_quantity as last_known_qty
FROM inventory_snapshots s
JOIN dispensaries d ON d.id = s.dispensary_id
WHERE s.change_type = 'oos'
AND NOT EXISTS (
-- No back_in_stock event after this OOS
SELECT 1 FROM inventory_snapshots s2
WHERE s2.dispensary_id = s.dispensary_id
AND s2.product_id = s.product_id
AND s2.change_type = 'back_in_stock'
AND s2.captured_at > s.captured_at
);
-- ============================================================
-- VIEW: Brand Variant Counts (track brand growth)
-- ============================================================
CREATE OR REPLACE VIEW v_brand_variants AS
SELECT
sp.brand_name_raw as brand_name,
d.state,
COUNT(DISTINCT sp.id) as total_variants,
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = TRUE) as active_variants,
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = FALSE) as inactive_variants,
COUNT(DISTINCT sp.dispensary_id) as stores_carrying,
COUNT(DISTINCT sp.category_raw) as categories,
MIN(sp.first_seen_at) as brand_first_seen,
MAX(sp.last_seen_at) as brand_last_seen
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
WHERE sp.brand_name_raw IS NOT NULL
GROUP BY sp.brand_name_raw, d.state;
-- ============================================================
-- VIEW: Brand Growth (compare variant counts over time)
-- ============================================================
CREATE OR REPLACE VIEW v_brand_growth AS
WITH weekly_counts AS (
SELECT
brand_name_raw as brand_name,
DATE_TRUNC('week', last_seen_at) as week,
COUNT(DISTINCT id) as variant_count
FROM store_products
WHERE brand_name_raw IS NOT NULL
AND last_seen_at >= NOW() - INTERVAL '90 days'
GROUP BY brand_name_raw, DATE_TRUNC('week', last_seen_at)
)
SELECT
w1.brand_name,
w1.week as current_week,
w1.variant_count as current_variants,
w2.variant_count as prev_week_variants,
w1.variant_count - COALESCE(w2.variant_count, 0) as variant_change,
CASE
WHEN w2.variant_count IS NULL THEN 'new'
WHEN w1.variant_count > w2.variant_count THEN 'growing'
WHEN w1.variant_count < w2.variant_count THEN 'declining'
ELSE 'stable'
END as growth_status
FROM weekly_counts w1
LEFT JOIN weekly_counts w2
ON w2.brand_name = w1.brand_name
AND w2.week = w1.week - INTERVAL '1 week'
ORDER BY w1.brand_name, w1.week DESC;
COMMENT ON VIEW v_stock_out_prediction IS 'Predict days until stock out based on velocity';
COMMENT ON VIEW v_days_since_oos IS 'Products currently OOS and how long they have been out';
COMMENT ON VIEW v_brand_variants IS 'Active vs inactive SKU counts per brand per state';
COMMENT ON VIEW v_brand_growth IS 'Week-over-week brand variant growth tracking';

View File

@@ -1,53 +0,0 @@
-- Migration 126: Set AZ stores to 5-minute high-frequency crawls
-- Other states default to 60-minute (1 hour) intervals
-- ============================================================
-- SET AZ STORES TO 5-MINUTE INTERVALS (with 3-min jitter)
-- ============================================================
-- Base interval: 5 minutes
-- Jitter: +/- 3 minutes (so 2-8 minute effective range)
UPDATE dispensaries
SET
crawl_interval_minutes = 5,
next_crawl_at = NOW() + (RANDOM() * INTERVAL '5 minutes') -- Stagger initial crawls
WHERE state = 'AZ'
AND crawl_enabled = TRUE;
-- ============================================================
-- SET OTHER STATES TO 60-MINUTE INTERVALS (with 3-min jitter)
-- ============================================================
UPDATE dispensaries
SET
crawl_interval_minutes = 60,
next_crawl_at = NOW() + (RANDOM() * INTERVAL '60 minutes') -- Stagger initial crawls
WHERE state != 'AZ'
AND crawl_enabled = TRUE
AND crawl_interval_minutes IS NULL;
-- ============================================================
-- VERIFY RESULTS
-- ============================================================
-- SELECT state, crawl_interval_minutes, COUNT(*)
-- FROM dispensaries
-- WHERE crawl_enabled = TRUE
-- GROUP BY state, crawl_interval_minutes
-- ORDER BY state;
-- ============================================================
-- CREATE VIEW FOR MONITORING CRAWL LOAD
-- ============================================================
CREATE OR REPLACE VIEW v_crawl_load AS
SELECT
state,
crawl_interval_minutes,
COUNT(*) as store_count,
-- Crawls per hour = stores * (60 / interval)
ROUND(COUNT(*) * (60.0 / COALESCE(crawl_interval_minutes, 60))) as crawls_per_hour,
-- Assuming 30 sec per crawl, workers needed = crawls_per_hour / 120
ROUND(COUNT(*) * (60.0 / COALESCE(crawl_interval_minutes, 60)) / 120, 1) as workers_needed
FROM dispensaries
WHERE crawl_enabled = TRUE
GROUP BY state, crawl_interval_minutes
ORDER BY crawls_per_hour DESC;
COMMENT ON VIEW v_crawl_load IS 'Monitor crawl load by state and interval';

View File

@@ -1,164 +0,0 @@
-- Migration 127: Fix worker task concurrency limit
-- Problem: claim_task function checks session_task_count but never increments it
-- Solution: Increment on claim, decrement on complete/fail/release
-- =============================================================================
-- STEP 1: Set max tasks to 5 for all workers
-- =============================================================================
UPDATE worker_registry SET session_max_tasks = 5;
-- Set default to 5 for new workers
ALTER TABLE worker_registry ALTER COLUMN session_max_tasks SET DEFAULT 5;
-- =============================================================================
-- STEP 2: Reset all session_task_count to match actual active tasks
-- =============================================================================
UPDATE worker_registry wr SET session_task_count = (
SELECT COUNT(*) FROM worker_tasks wt
WHERE wt.worker_id = wr.worker_id
AND wt.status IN ('claimed', 'running')
);
-- =============================================================================
-- STEP 3: Update claim_task function to increment session_task_count
-- =============================================================================
CREATE OR REPLACE FUNCTION claim_task(
p_role VARCHAR(50),
p_worker_id VARCHAR(100),
p_curl_passed BOOLEAN DEFAULT TRUE,
p_http_passed BOOLEAN DEFAULT FALSE
) RETURNS worker_tasks AS $$
DECLARE
claimed_task worker_tasks;
worker_state VARCHAR(2);
session_valid BOOLEAN;
session_tasks INT;
max_tasks INT;
BEGIN
-- Get worker's current geo session info
SELECT
current_state,
session_task_count,
session_max_tasks,
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
INTO worker_state, session_tasks, max_tasks, session_valid
FROM worker_registry
WHERE worker_id = p_worker_id;
-- Check if worker has reached max concurrent tasks (default 5)
IF session_tasks >= COALESCE(max_tasks, 5) THEN
RETURN NULL;
END IF;
-- If no valid geo session, or session expired, worker can't claim tasks
-- Worker must re-qualify first
IF worker_state IS NULL OR NOT session_valid THEN
RETURN NULL;
END IF;
-- Claim task matching worker's state
UPDATE worker_tasks
SET
status = 'claimed',
worker_id = p_worker_id,
claimed_at = NOW(),
updated_at = NOW()
WHERE id = (
SELECT wt.id FROM worker_tasks wt
JOIN dispensaries d ON wt.dispensary_id = d.id
WHERE wt.role = p_role
AND wt.status = 'pending'
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
-- GEO FILTER: Task's dispensary must match worker's state
AND d.state = worker_state
-- Method compatibility: worker must have passed the required preflight
AND (
wt.method IS NULL -- No preference, any worker can claim
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
OR (wt.method = 'http' AND p_http_passed = TRUE)
)
-- Exclude stores that already have an active task
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
SELECT dispensary_id FROM worker_tasks
WHERE status IN ('claimed', 'running')
AND dispensary_id IS NOT NULL
AND dispensary_id != wt.dispensary_id
))
ORDER BY wt.priority DESC, wt.created_at ASC
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING * INTO claimed_task;
-- INCREMENT session_task_count if we claimed a task
IF claimed_task.id IS NOT NULL THEN
UPDATE worker_registry
SET session_task_count = session_task_count + 1
WHERE worker_id = p_worker_id;
END IF;
RETURN claimed_task;
END;
$$ LANGUAGE plpgsql;
-- =============================================================================
-- STEP 4: Create trigger to decrement on task completion/failure/release
-- =============================================================================
CREATE OR REPLACE FUNCTION decrement_worker_task_count()
RETURNS TRIGGER AS $$
BEGIN
-- Only decrement when task was assigned to a worker and is now complete/released
IF OLD.worker_id IS NOT NULL AND OLD.status IN ('claimed', 'running') THEN
-- Task completed/failed/released - decrement count
IF NEW.status IN ('pending', 'completed', 'failed') OR NEW.worker_id IS NULL THEN
UPDATE worker_registry
SET session_task_count = GREATEST(0, session_task_count - 1)
WHERE worker_id = OLD.worker_id;
END IF;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Drop existing trigger if any
DROP TRIGGER IF EXISTS trg_decrement_worker_task_count ON worker_tasks;
-- Create trigger on UPDATE (status change or worker_id cleared)
CREATE TRIGGER trg_decrement_worker_task_count
AFTER UPDATE ON worker_tasks
FOR EACH ROW
EXECUTE FUNCTION decrement_worker_task_count();
-- Also handle DELETE (completed tasks are deleted from pool)
CREATE OR REPLACE FUNCTION decrement_worker_task_count_delete()
RETURNS TRIGGER AS $$
BEGIN
IF OLD.worker_id IS NOT NULL AND OLD.status IN ('claimed', 'running') THEN
UPDATE worker_registry
SET session_task_count = GREATEST(0, session_task_count - 1)
WHERE worker_id = OLD.worker_id;
END IF;
RETURN OLD;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS trg_decrement_worker_task_count_delete ON worker_tasks;
CREATE TRIGGER trg_decrement_worker_task_count_delete
AFTER DELETE ON worker_tasks
FOR EACH ROW
EXECUTE FUNCTION decrement_worker_task_count_delete();
-- =============================================================================
-- STEP 5: Verify current state
-- =============================================================================
SELECT
wr.worker_id,
wr.friendly_name,
wr.session_task_count,
wr.session_max_tasks,
(SELECT COUNT(*) FROM worker_tasks wt WHERE wt.worker_id = wr.worker_id AND wt.status IN ('claimed', 'running')) as actual_count
FROM worker_registry wr
WHERE wr.status = 'active'
ORDER BY wr.friendly_name;

View File

@@ -1,109 +0,0 @@
-- Migration 128: Pool configuration table
-- Controls whether workers can claim tasks from the pool
CREATE TABLE IF NOT EXISTS pool_config (
id SERIAL PRIMARY KEY,
pool_open BOOLEAN NOT NULL DEFAULT true,
closed_reason TEXT,
closed_at TIMESTAMPTZ,
closed_by VARCHAR(100),
opened_at TIMESTAMPTZ DEFAULT NOW(),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Insert default config (pool open)
INSERT INTO pool_config (pool_open, opened_at)
VALUES (true, NOW())
ON CONFLICT DO NOTHING;
-- Update claim_task function to check pool status
CREATE OR REPLACE FUNCTION claim_task(
p_role VARCHAR(50),
p_worker_id VARCHAR(100),
p_curl_passed BOOLEAN DEFAULT TRUE,
p_http_passed BOOLEAN DEFAULT FALSE
) RETURNS worker_tasks AS $$
DECLARE
claimed_task worker_tasks;
worker_state VARCHAR(2);
session_valid BOOLEAN;
session_tasks INT;
max_tasks INT;
is_pool_open BOOLEAN;
BEGIN
-- Check if pool is open
SELECT pool_open INTO is_pool_open FROM pool_config LIMIT 1;
IF NOT COALESCE(is_pool_open, true) THEN
RETURN NULL; -- Pool is closed, no claiming allowed
END IF;
-- Get worker's current geo session info
SELECT
current_state,
session_task_count,
session_max_tasks,
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
INTO worker_state, session_tasks, max_tasks, session_valid
FROM worker_registry
WHERE worker_id = p_worker_id;
-- Check if worker has reached max concurrent tasks (default 5)
IF session_tasks >= COALESCE(max_tasks, 5) THEN
RETURN NULL;
END IF;
-- If no valid geo session, or session expired, worker can't claim tasks
-- Worker must re-qualify first
IF worker_state IS NULL OR NOT session_valid THEN
RETURN NULL;
END IF;
-- Claim task matching worker's state
UPDATE worker_tasks
SET
status = 'claimed',
worker_id = p_worker_id,
claimed_at = NOW(),
updated_at = NOW()
WHERE id = (
SELECT wt.id FROM worker_tasks wt
JOIN dispensaries d ON wt.dispensary_id = d.id
WHERE wt.role = p_role
AND wt.status = 'pending'
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
-- GEO FILTER: Task's dispensary must match worker's state
AND d.state = worker_state
-- Method compatibility: worker must have passed the required preflight
AND (
wt.method IS NULL -- No preference, any worker can claim
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
OR (wt.method = 'http' AND p_http_passed = TRUE)
)
-- Exclude stores that already have an active task
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
SELECT dispensary_id FROM worker_tasks
WHERE status IN ('claimed', 'running')
AND dispensary_id IS NOT NULL
AND dispensary_id != wt.dispensary_id
))
ORDER BY wt.priority DESC, wt.created_at ASC
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING * INTO claimed_task;
-- INCREMENT session_task_count if we claimed a task
IF claimed_task.id IS NOT NULL THEN
UPDATE worker_registry
SET session_task_count = session_task_count + 1
WHERE worker_id = p_worker_id;
END IF;
RETURN claimed_task;
END;
$$ LANGUAGE plpgsql;
-- Verify
SELECT 'pool_config table created' as status;
SELECT * FROM pool_config;

View File

@@ -1,60 +0,0 @@
-- Migration 129: Claim tasks for specific geo
-- Used after worker gets IP to claim more tasks for same geo
-- Function: Claim up to N tasks for a SPECIFIC geo (state/city)
-- Different from claim_tasks_batch which picks the geo with most tasks
CREATE OR REPLACE FUNCTION claim_tasks_batch_for_geo(
p_worker_id VARCHAR(255),
p_max_tasks INTEGER DEFAULT 4,
p_state_code VARCHAR(2),
p_city VARCHAR(100) DEFAULT NULL,
p_role VARCHAR(50) DEFAULT NULL
) RETURNS TABLE (
task_id INTEGER,
role VARCHAR(50),
dispensary_id INTEGER,
dispensary_name VARCHAR(255),
city VARCHAR(100),
state_code VARCHAR(2),
platform VARCHAR(50),
method VARCHAR(20)
) AS $$
BEGIN
-- Claim up to p_max_tasks for the specified geo
RETURN QUERY
WITH claimed AS (
UPDATE worker_tasks t SET
status = 'claimed',
worker_id = p_worker_id,
claimed_at = NOW()
FROM (
SELECT t2.id
FROM worker_tasks t2
JOIN dispensaries d ON t2.dispensary_id = d.id
WHERE t2.status = 'pending'
AND d.state = p_state_code
AND (p_city IS NULL OR d.city = p_city)
AND (p_role IS NULL OR t2.role = p_role)
ORDER BY t2.priority DESC, t2.created_at ASC
FOR UPDATE SKIP LOCKED
LIMIT p_max_tasks
) sub
WHERE t.id = sub.id
RETURNING t.id, t.role, t.dispensary_id, t.method
)
SELECT
c.id as task_id,
c.role,
c.dispensary_id,
d.name as dispensary_name,
d.city,
d.state as state_code,
d.platform,
c.method
FROM claimed c
JOIN dispensaries d ON c.dispensary_id = d.id;
END;
$$ LANGUAGE plpgsql;
-- Verify
SELECT 'claim_tasks_batch_for_geo function created' as status;

View File

@@ -1,53 +0,0 @@
-- Migration 130: Worker qualification badge
-- Session-scoped badge showing worker qualification status
-- Add badge column to worker_registry
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS badge VARCHAR(20) DEFAULT NULL;
-- Add qualified_at timestamp
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS qualified_at TIMESTAMPTZ DEFAULT NULL;
-- Add current_session_id to link worker to their active session
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS current_session_id INTEGER DEFAULT NULL;
-- Badge values:
-- 'gold' = preflight passed, actively qualified with valid session
-- NULL = not qualified (no active session or session expired)
-- Function: Set worker badge to gold when qualified
CREATE OR REPLACE FUNCTION set_worker_qualified(
p_worker_id VARCHAR(255),
p_session_id INTEGER
) RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_registry
SET badge = 'gold',
qualified_at = NOW(),
current_session_id = p_session_id
WHERE worker_id = p_worker_id;
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
-- Function: Clear worker badge when session ends
CREATE OR REPLACE FUNCTION clear_worker_badge(p_worker_id VARCHAR(255))
RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_registry
SET badge = NULL,
qualified_at = NULL,
current_session_id = NULL
WHERE worker_id = p_worker_id;
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
-- Index for finding qualified workers
CREATE INDEX IF NOT EXISTS idx_worker_registry_badge
ON worker_registry(badge) WHERE badge IS NOT NULL;
-- Verify
SELECT 'worker_registry badge column added' as status;

1784
backend/node_modules/.package-lock.json generated vendored

File diff suppressed because it is too large Load Diff

1789
backend/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -22,7 +22,6 @@
"seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
},
"dependencies": {
"@aws-sdk/client-s3": "^3.953.0",
"@kubernetes/client-node": "^1.4.0",
"@types/bcryptjs": "^3.0.0",
"axios": "^1.6.2",
@@ -50,8 +49,6 @@
"puppeteer-extra-plugin-stealth": "^2.11.2",
"sharp": "^0.32.0",
"socks-proxy-agent": "^8.0.2",
"swagger-jsdoc": "^6.2.8",
"swagger-ui-express": "^5.0.1",
"user-agents": "^1.1.669",
"uuid": "^9.0.1",
"zod": "^3.22.4"
@@ -64,8 +61,6 @@
"@types/node": "^20.10.5",
"@types/node-cron": "^3.0.11",
"@types/pg": "^8.15.6",
"@types/swagger-jsdoc": "^6.0.4",
"@types/swagger-ui-express": "^4.1.8",
"@types/uuid": "^9.0.7",
"tsx": "^4.7.0",
"typescript": "^5.3.3"

View File

@@ -1 +1 @@
cannaiq-menus-2.3.0.zip
cannaiq-menus-1.6.0.zip

View File

@@ -1,130 +0,0 @@
/**
* Count Jane stores - v2: Try Algolia store search
* Usage: npx ts-node scripts/count-jane-stores-v2.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
const STATES = [
'AZ', 'CA', 'CO', 'FL', 'IL', 'MA', 'MI', 'NV', 'NJ', 'NY', 'OH', 'PA', 'WA', 'OR'
];
async function main() {
console.log('Counting Jane stores by exploring state pages...\n');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
const allStores: Map<number, any> = new Map();
await page.setRequestInterception(true);
page.on('request', (req) => {
const type = req.resourceType();
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
req.abort();
} else {
req.continue();
}
});
page.on('response', async (response) => {
const url = response.url();
const contentType = response.headers()['content-type'] || '';
if (url.includes('iheartjane.com') && contentType.includes('json')) {
try {
const json = await response.json();
// Look for stores in any response
if (json.stores && Array.isArray(json.stores)) {
for (const s of json.stores) {
if (s.id) allStores.set(s.id, s);
}
}
// Also check hits (Algolia format)
if (json.hits && Array.isArray(json.hits)) {
for (const s of json.hits) {
if (s.id) allStores.set(s.id, s);
}
}
} catch {}
}
});
// First visit the main stores page
console.log('Visiting main stores page...');
await page.goto('https://www.iheartjane.com/stores', {
waitUntil: 'networkidle0',
timeout: 60000,
});
await new Promise(r => setTimeout(r, 3000));
// Try to scroll to load more stores
console.log('Scrolling to load more...');
for (let i = 0; i < 5; i++) {
await page.evaluate(() => window.scrollBy(0, 1000));
await new Promise(r => setTimeout(r, 1000));
}
// Try clicking "Load More" if it exists
try {
const loadMore = await page.$('button:has-text("Load More"), [class*="load-more"]');
if (loadMore) {
console.log('Clicking Load More...');
await loadMore.click();
await new Promise(r => setTimeout(r, 3000));
}
} catch {}
// Extract stores from DOM as fallback
const domStores = await page.evaluate(() => {
const storeElements = document.querySelectorAll('[data-store-id], [class*="StoreCard"], [class*="store-card"]');
return storeElements.length;
});
console.log(`\nStores from DOM elements: ${domStores}`);
await browser.close();
// Count by state
const byState: Record<string, number> = {};
for (const store of allStores.values()) {
const state = store.state || 'Unknown';
byState[state] = (byState[state] || 0) + 1;
}
console.log('\n=== JANE STORE COUNTS ===\n');
console.log(`Unique stores captured: ${allStores.size}`);
if (allStores.size > 0) {
console.log('\nBy State:');
const sorted = Object.entries(byState).sort((a, b) => b[1] - a[1]);
for (const [state, count] of sorted.slice(0, 20)) {
console.log(` ${state}: ${count}`);
}
// Check Arizona specifically
const azStores = Array.from(allStores.values()).filter(s =>
s.state === 'Arizona' || s.state === 'AZ'
);
console.log(`\nArizona stores: ${azStores.length}`);
if (azStores.length > 0) {
console.log('AZ stores:');
for (const s of azStores.slice(0, 10)) {
console.log(` - ${s.name} (ID: ${s.id}) - ${s.city}`);
}
}
}
// Note about total
console.log('\n--- Note ---');
console.log('Jane uses server-side rendering. To get full store count,');
console.log('you may need to check their public marketing materials or');
console.log('iterate through known store IDs.');
}
main().catch(console.error);

View File

@@ -1,98 +0,0 @@
/**
* Count Jane stores by state
* Usage: npx ts-node scripts/count-jane-stores.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
console.log('Counting Jane stores...\n');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
// Capture store data from API
const stores: any[] = [];
await page.setRequestInterception(true);
page.on('request', (req) => {
const type = req.resourceType();
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
req.abort();
} else {
req.continue();
}
});
page.on('response', async (response) => {
const url = response.url();
if (url.includes('iheartjane.com') && url.includes('stores')) {
try {
const json = await response.json();
if (json.stores && Array.isArray(json.stores)) {
stores.push(...json.stores);
}
} catch {}
}
});
// Visit the store directory
console.log('Loading Jane store directory...');
await page.goto('https://www.iheartjane.com/stores', {
waitUntil: 'networkidle2',
timeout: 60000,
});
// Wait for stores to load
await new Promise(r => setTimeout(r, 5000));
// Also try to get store count from page content
const pageStoreCount = await page.evaluate(() => {
// Look for store count in page text
const text = document.body.innerText;
const match = text.match(/(\d+)\s*stores?/i);
return match ? parseInt(match[1]) : null;
});
await browser.close();
// Count by state
const byState: Record<string, number> = {};
for (const store of stores) {
const state = store.state || 'Unknown';
byState[state] = (byState[state] || 0) + 1;
}
console.log('\n=== JANE STORE COUNTS ===\n');
console.log(`Total stores captured from API: ${stores.length}`);
if (pageStoreCount) {
console.log(`Page claims: ${pageStoreCount} stores`);
}
console.log('\nBy State:');
const sorted = Object.entries(byState).sort((a, b) => b[1] - a[1]);
for (const [state, count] of sorted) {
console.log(` ${state}: ${count}`);
}
// Check Arizona specifically
const azStores = stores.filter(s =>
s.state === 'Arizona' || s.state === 'AZ'
);
console.log(`\nArizona stores: ${azStores.length}`);
if (azStores.length > 0) {
console.log('Sample AZ stores:');
for (const s of azStores.slice(0, 5)) {
console.log(` - ${s.name} (ID: ${s.id}) - ${s.city}`);
}
}
}
main().catch(console.error);

View File

@@ -1,184 +0,0 @@
/**
* Explore all Treez page URLs to find the full product catalog
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log(' Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function countProducts(page: Page): Promise<number> {
return page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
}
async function scrollAndCount(page: Page, maxScrolls: number = 30): Promise<{ products: number; scrolls: number }> {
let previousHeight = 0;
let scrollCount = 0;
let sameHeightCount = 0;
while (scrollCount < maxScrolls) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
sameHeightCount++;
if (sameHeightCount >= 3) break;
} else {
sameHeightCount = 0;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
previousHeight = currentHeight;
scrollCount++;
}
const products = await countProducts(page);
return { products, scrolls: scrollCount };
}
async function testUrl(page: Page, path: string): Promise<{ products: number; scrolls: number; error?: string }> {
const url = `https://${STORE_ID}.treez.io${path}`;
console.log(`\nTesting: ${url}`);
try {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
await bypassAgeGate(page);
await sleep(1000);
const initialCount = await countProducts(page);
console.log(` Initial products: ${initialCount}`);
if (initialCount > 0) {
const result = await scrollAndCount(page);
console.log(` After scroll: ${result.products} products (${result.scrolls} scrolls)`);
return result;
}
// Check for brand/category cards instead
const cardCount = await page.evaluate(() => {
const selectors = [
'[class*="brand"]',
'[class*="Brand"]',
'[class*="category"]',
'[class*="Category"]',
'[class*="card"]',
'a[href*="/brand/"]',
'a[href*="/category/"]',
];
let count = 0;
selectors.forEach(sel => {
count += document.querySelectorAll(sel).length;
});
return count;
});
console.log(` Cards/links found: ${cardCount}`);
return { products: initialCount, scrolls: 0 };
} catch (error: any) {
console.log(` Error: ${error.message}`);
return { products: 0, scrolls: 0, error: error.message };
}
}
async function main() {
console.log('='.repeat(60));
console.log('Exploring Treez Page URLs');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Block images to speed up
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media', 'stylesheet'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
const urlsToTest = [
'/onlinemenu/?customerType=ADULT', // Homepage
'/onlinemenu/brands?customerType=ADULT', // Brands page
'/onlinemenu/shop?customerType=ADULT', // Shop page?
'/onlinemenu/products?customerType=ADULT', // Products page?
'/onlinemenu/menu?customerType=ADULT', // Menu page?
'/onlinemenu/all?customerType=ADULT', // All products?
'/onlinemenu/flower?customerType=ADULT', // Flower category
'/onlinemenu/vapes?customerType=ADULT', // Vapes category
'/onlinemenu/edibles?customerType=ADULT', // Edibles category
'/onlinemenu/concentrates?customerType=ADULT', // Concentrates category
];
const results: { path: string; products: number; scrolls: number }[] = [];
for (const path of urlsToTest) {
const result = await testUrl(page, path);
results.push({ path, ...result });
}
// Look for navigation links on the main page
console.log('\n' + '='.repeat(60));
console.log('Checking navigation structure on homepage...');
console.log('='.repeat(60));
await page.goto(`https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`, {
waitUntil: 'networkidle2',
timeout: 30000,
});
await sleep(2000);
await bypassAgeGate(page);
await sleep(1000);
const navLinks = await page.evaluate(() => {
const links: { text: string; href: string }[] = [];
document.querySelectorAll('a[href*="/onlinemenu/"]').forEach(el => {
const text = el.textContent?.trim() || '';
const href = el.getAttribute('href') || '';
if (text && !links.some(l => l.href === href)) {
links.push({ text: text.slice(0, 50), href });
}
});
return links;
});
console.log('\nNavigation links found:');
navLinks.forEach(l => console.log(` "${l.text}" → ${l.href}`));
// Summary
console.log('\n' + '='.repeat(60));
console.log('Summary');
console.log('='.repeat(60));
results.sort((a, b) => b.products - a.products);
results.forEach(r => {
console.log(`${r.products.toString().padStart(4)} products | ${r.path}`);
});
await browser.close();
}
main().catch(console.error);

View File

@@ -1,247 +0,0 @@
/**
* Explore Treez site structure to find full product catalog
*
* Usage: npx ts-node scripts/explore-treez-structure.ts
*/
import puppeteer from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
console.log('='.repeat(60));
console.log('Exploring Treez Site Structure');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
try {
// Navigate to base menu URL
const baseUrl = `https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`;
console.log(`\n[1] Navigating to: ${baseUrl}`);
await page.goto(baseUrl, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
// Bypass age gate if present
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('[1] Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Get all navigation links
console.log('\n[2] Extracting navigation structure...');
const navInfo = await page.evaluate(() => {
const links: { text: string; href: string }[] = [];
// Look for nav links
document.querySelectorAll('nav a, [class*="nav"] a, [class*="menu"] a, header a').forEach(el => {
const text = el.textContent?.trim() || '';
const href = el.getAttribute('href') || '';
if (text && href && !links.some(l => l.href === href)) {
links.push({ text, href });
}
});
// Look for category tabs/buttons
document.querySelectorAll('[class*="category"], [class*="tab"], [role="tab"]').forEach(el => {
const text = el.textContent?.trim() || '';
const href = el.getAttribute('href') || el.getAttribute('data-href') || '';
if (text && !links.some(l => l.text === text)) {
links.push({ text, href: href || `(click: ${el.className})` });
}
});
// Get current URL
const currentUrl = window.location.href;
// Count products on page
const productCount = document.querySelectorAll('[class*="product_product__"]').length;
return { links, currentUrl, productCount };
});
console.log(`Current URL: ${navInfo.currentUrl}`);
console.log(`Products on homepage: ${navInfo.productCount}`);
console.log('\nNavigation links found:');
navInfo.links.forEach(l => {
console.log(` "${l.text}" → ${l.href}`);
});
// Look for category buttons/tabs specifically
console.log('\n[3] Looking for category navigation...');
const categories = await page.evaluate(() => {
const cats: { text: string; className: string; tagName: string }[] = [];
// Find all clickable elements that might be categories
const selectors = [
'[class*="CategoryNav"]',
'[class*="category"]',
'[class*="Category"]',
'[class*="nav"] button',
'[class*="tab"]',
'[role="tablist"] *',
'.MuiTab-root',
'[class*="filter"]',
];
selectors.forEach(sel => {
document.querySelectorAll(sel).forEach(el => {
const text = el.textContent?.trim() || '';
if (text && text.length < 50 && !cats.some(c => c.text === text)) {
cats.push({
text,
className: el.className?.toString().slice(0, 80) || '',
tagName: el.tagName,
});
}
});
});
return cats;
});
console.log('Category-like elements:');
categories.forEach(c => {
console.log(` [${c.tagName}] "${c.text}" (class: ${c.className})`);
});
// Try clicking on "Flower" or "All" if found
console.log('\n[4] Looking for "Flower" or "All Products" link...');
const clickTargets = ['Flower', 'All', 'All Products', 'Shop All', 'View All'];
for (const target of clickTargets) {
const element = await page.evaluate((targetText) => {
const els = Array.from(document.querySelectorAll('a, button, [role="tab"], [class*="category"]'));
const match = els.find(el =>
el.textContent?.trim().toLowerCase() === targetText.toLowerCase()
);
if (match) {
return {
found: true,
text: match.textContent?.trim(),
tag: match.tagName,
};
}
return { found: false };
}, target);
if (element.found) {
console.log(`Found "${element.text}" (${element.tag}), clicking...`);
await page.evaluate((targetText) => {
const els = Array.from(document.querySelectorAll('a, button, [role="tab"], [class*="category"]'));
const match = els.find(el =>
el.textContent?.trim().toLowerCase() === targetText.toLowerCase()
);
if (match) (match as HTMLElement).click();
}, target);
await sleep(3000);
const newUrl = page.url();
const newCount = await page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
console.log(` New URL: ${newUrl}`);
console.log(` Products after click: ${newCount}`);
if (newCount > navInfo.productCount) {
console.log(` ✓ Found more products! (${navInfo.productCount}${newCount})`);
}
break;
}
}
// Check page height and scroll behavior
console.log('\n[5] Checking scroll behavior on current page...');
let previousHeight = 0;
let scrollCount = 0;
let previousProductCount = await page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
while (scrollCount < 10) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
console.log(` Scroll ${scrollCount + 1}: No height change, stopping`);
break;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
const currentProductCount = await page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
console.log(` Scroll ${scrollCount + 1}: height=${currentHeight}, products=${currentProductCount}`);
if (currentProductCount === previousProductCount && scrollCount > 2) {
console.log(' No new products loading, stopping');
break;
}
previousHeight = currentHeight;
previousProductCount = currentProductCount;
scrollCount++;
}
// Try direct URL patterns
console.log('\n[6] Testing URL patterns...');
const urlPatterns = [
'/onlinemenu/flower?customerType=ADULT',
'/onlinemenu/all?customerType=ADULT',
'/onlinemenu?category=flower&customerType=ADULT',
'/onlinemenu?view=all&customerType=ADULT',
];
for (const pattern of urlPatterns) {
const testUrl = `https://${STORE_ID}.treez.io${pattern}`;
console.log(`\nTrying: ${testUrl}`);
await page.goto(testUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
// Bypass age gate again if needed
const gate = await page.$('[data-testid="age-gate-modal"]');
if (gate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
const productCount = await page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
console.log(` Products found: ${productCount}`);
}
// Screenshot the final state
await page.screenshot({ path: '/tmp/treez-explore.png', fullPage: true });
console.log('\n[7] Screenshot saved to /tmp/treez-explore.png');
} catch (error: any) {
console.error('Error:', error.message);
} finally {
await browser.close();
}
}
main().catch(console.error);

View File

@@ -1,138 +0,0 @@
/**
* Run Jane product discovery for stores in database
* Usage: npx ts-node scripts/run-jane-product-discovery.ts [DISPENSARY_ID]
* Example: npx ts-node scripts/run-jane-product-discovery.ts 4220
* Or run for all Jane stores: npx ts-node scripts/run-jane-product-discovery.ts all
*/
import { Pool } from 'pg';
import { fetchProductsByStoreIdDirect } from '../src/platforms/jane';
import { saveRawPayload } from '../src/utils/payload-storage';
async function main() {
const arg = process.argv[2];
console.log('='.repeat(60));
console.log('Jane Product Discovery');
console.log('='.repeat(60));
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
});
try {
// Get dispensaries to process
let dispensaries: any[];
if (arg === 'all') {
const result = await pool.query(
`SELECT id, name, menu_url, platform_dispensary_id
FROM dispensaries
WHERE platform = 'jane' AND menu_url IS NOT NULL
ORDER BY id`
);
dispensaries = result.rows;
} else if (arg) {
const result = await pool.query(
`SELECT id, name, menu_url, platform_dispensary_id
FROM dispensaries
WHERE id = $1`,
[parseInt(arg)]
);
dispensaries = result.rows;
} else {
// Default: get first Jane store
const result = await pool.query(
`SELECT id, name, menu_url, platform_dispensary_id
FROM dispensaries
WHERE platform = 'jane' AND menu_url IS NOT NULL
ORDER BY id LIMIT 1`
);
dispensaries = result.rows;
}
if (dispensaries.length === 0) {
console.log('No Jane dispensaries found');
return;
}
console.log(`Processing ${dispensaries.length} dispensary(ies)...\n`);
let successCount = 0;
let failCount = 0;
for (const disp of dispensaries) {
console.log(`\n${'─'.repeat(60)}`);
console.log(`${disp.name} (ID: ${disp.id}, Jane ID: ${disp.platform_dispensary_id})`);
console.log('─'.repeat(60));
try {
const result = await fetchProductsByStoreIdDirect(disp.platform_dispensary_id);
if (result.products.length === 0) {
console.log(' ✗ No products captured');
failCount++;
continue;
}
console.log(` ✓ Captured ${result.products.length} products`);
// Build payload
const rawPayload = {
hits: result.products.map(p => p.raw),
store: result.store?.raw || null,
capturedAt: new Date().toISOString(),
platform: 'jane',
dispensaryId: disp.id,
storeId: disp.platform_dispensary_id,
};
// Save payload
const { id: payloadId, sizeBytes } = await saveRawPayload(
pool,
disp.id,
rawPayload,
null,
result.products.length,
'jane'
);
console.log(` ✓ Saved payload ${payloadId} (${Math.round(sizeBytes / 1024)}KB)`);
// Update dispensary
await pool.query(
`UPDATE dispensaries
SET stage = 'hydrating',
last_fetch_at = NOW(),
product_count = $2,
consecutive_successes = consecutive_successes + 1,
consecutive_failures = 0,
updated_at = NOW()
WHERE id = $1`,
[disp.id, result.products.length]
);
console.log(` ✓ Updated dispensary (product_count: ${result.products.length})`);
successCount++;
} catch (error: any) {
console.log(` ✗ Error: ${error.message}`);
failCount++;
}
}
console.log('\n' + '='.repeat(60));
console.log('RESULTS');
console.log('='.repeat(60));
console.log(`Success: ${successCount}`);
console.log(`Failed: ${failCount}`);
} catch (error: any) {
console.error('Error:', error.message);
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -1,137 +0,0 @@
/**
* Run Jane store discovery and insert into database
* Usage: npx ts-node scripts/run-jane-store-discovery.ts [STATE_CODE]
* Example: npx ts-node scripts/run-jane-store-discovery.ts AZ
*/
import { Pool } from 'pg';
import { discoverStoresByState } from '../src/platforms/jane';
/**
* Generate slug from store name
* e.g., "Hana Meds - Phoenix (REC)" -> "hana-meds-phoenix-rec"
*/
function generateSlug(name: string): string {
return name
.toLowerCase()
.replace(/[()]/g, '') // Remove parentheses
.replace(/[^a-z0-9\s-]/g, '') // Remove special chars
.replace(/\s+/g, '-') // Spaces to hyphens
.replace(/-+/g, '-') // Collapse multiple hyphens
.replace(/^-|-$/g, ''); // Trim hyphens
}
async function main() {
const stateCode = process.argv[2] || 'AZ';
console.log('='.repeat(60));
console.log(`Jane Store Discovery - ${stateCode}`);
console.log('='.repeat(60));
// Connect to database
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
});
try {
// Test connection
const testResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']);
console.log(`Current Jane stores in DB: ${testResult.rows[0].count}`);
// Discover stores
console.log(`\nDiscovering Jane stores in ${stateCode}...`);
const stores = await discoverStoresByState(stateCode);
if (stores.length === 0) {
console.log(`No stores found in ${stateCode}`);
return;
}
console.log(`\nFound ${stores.length} stores. Inserting into database...`);
// Insert stores
let inserted = 0;
let updated = 0;
const newIds: number[] = [];
for (const store of stores) {
const menuUrl = `https://www.iheartjane.com/stores/${store.storeId}/${store.urlSlug || 'menu'}`;
const slug = generateSlug(store.name);
try {
const result = await pool.query(
`INSERT INTO dispensaries (
name, slug, address1, city, state, zipcode,
latitude, longitude, menu_url, menu_type, platform,
platform_dispensary_id, is_medical, is_recreational,
stage, created_at, updated_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, NOW(), NOW())
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
DO UPDATE SET
name = EXCLUDED.name,
slug = EXCLUDED.slug,
address1 = EXCLUDED.address1,
city = EXCLUDED.city,
latitude = EXCLUDED.latitude,
longitude = EXCLUDED.longitude,
menu_url = EXCLUDED.menu_url,
is_medical = EXCLUDED.is_medical,
is_recreational = EXCLUDED.is_recreational,
updated_at = NOW()
RETURNING id, (xmax = 0) AS is_new`,
[
store.name,
slug,
store.address,
store.city,
stateCode,
store.zip,
store.lat,
store.long,
menuUrl,
'embedded', // menu_type: how it's displayed
'jane', // platform: who provides the menu
store.storeId,
store.medical,
store.recreational,
'discovered',
]
);
if (result.rows.length > 0) {
const { id, is_new } = result.rows[0];
if (is_new) {
inserted++;
newIds.push(id);
console.log(` + Inserted: ${store.name} (DB ID: ${id}, Jane ID: ${store.storeId})`);
} else {
updated++;
console.log(` ~ Updated: ${store.name} (DB ID: ${id})`);
}
}
} catch (error: any) {
console.error(` ! Error inserting ${store.name}: ${error.message}`);
}
}
console.log('\n' + '='.repeat(60));
console.log('RESULTS');
console.log('='.repeat(60));
console.log(`Stores discovered: ${stores.length}`);
console.log(`New stores inserted: ${inserted}`);
console.log(`Existing stores updated: ${updated}`);
console.log(`New dispensary IDs: ${newIds.join(', ') || '(none)'}`);
// Show final count
const finalResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']);
console.log(`\nTotal Jane stores in DB: ${finalResult.rows[0].count}`);
} catch (error: any) {
console.error('Error:', error.message);
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -1,179 +0,0 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
console.log('Loading ALL brands from https://shop.bestdispensary.com/brands');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
console.log('Current URL:', page.url());
// Get initial brand count
let brandCount = await page.evaluate(() => {
const seen = new Set<string>();
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
const href = a.getAttribute('href');
if (href) seen.add(href);
});
return seen.size;
});
console.log(`Initial brand count: ${brandCount}`);
// Aggressive scrolling
console.log('\nScrolling to load ALL brands...');
let previousCount = 0;
let sameCount = 0;
for (let i = 0; i < 50; i++) {
// Scroll to bottom
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1000);
brandCount = await page.evaluate(() => {
const seen = new Set<string>();
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
const href = a.getAttribute('href');
if (href) seen.add(href);
});
return seen.size;
});
if (brandCount === previousCount) {
sameCount++;
if (sameCount >= 5) {
console.log(` Scroll ${i+1}: ${brandCount} brands (stopping - no change)`);
break;
}
} else {
sameCount = 0;
console.log(` Scroll ${i+1}: ${brandCount} brands`);
}
previousCount = brandCount;
}
// Get all unique brands
const brands = await page.evaluate(() => {
const results: { name: string; href: string }[] = [];
const seen = new Set<string>();
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
const href = a.getAttribute('href') || '';
const normalizedHref = href.toLowerCase();
if (seen.has(normalizedHref)) return;
seen.add(normalizedHref);
// Get brand name
let name = '';
const heading = a.querySelector('h3, h4, h5, [class*="name"]');
if (heading) {
name = heading.textContent?.trim() || '';
}
if (!name) {
name = a.textContent?.trim().split('\n')[0] || '';
}
if (!name) {
name = href.split('/brand/')[1]?.replace(/-/g, ' ') || '';
}
results.push({ name: name.slice(0, 50), href });
});
return results.sort((a, b) => a.name.localeCompare(b.name));
});
console.log('\n' + '='.repeat(60));
console.log('TOTAL BRANDS FOUND: ' + brands.length);
console.log('='.repeat(60));
brands.forEach((b, i) => {
const num = (i + 1).toString().padStart(3, ' ');
console.log(`${num}. ${b.name} (${b.href})`);
});
// Now visit each brand page and count products
console.log('\n' + '='.repeat(60));
console.log('PRODUCTS PER BRAND');
console.log('='.repeat(60));
const brandProducts: { brand: string; products: number }[] = [];
for (let i = 0; i < brands.length; i++) {
const brand = brands[i];
try {
const brandUrl = brand.href.startsWith('http')
? brand.href
: `https://shop.bestdispensary.com${brand.href}`;
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(1500);
// Scroll to load products
for (let j = 0; j < 10; j++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(800);
}
const productCount = await page.evaluate(() => {
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
const img = a.querySelector('img');
const name = img?.getAttribute('alt') || a.textContent?.trim() || '';
if (name) seen.add(name);
});
return seen.size;
});
brandProducts.push({ brand: brand.name, products: productCount });
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ${productCount} products`);
} catch (err: any) {
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ERROR - ${err.message?.slice(0, 30)}`);
brandProducts.push({ brand: brand.name, products: 0 });
}
}
// Summary
const totalProducts = brandProducts.reduce((sum, b) => sum + b.products, 0);
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
console.log(`Total brands: ${brands.length}`);
console.log(`Total products: ${totalProducts}`);
await browser.close();
}
main().catch(console.error);

View File

@@ -1,92 +0,0 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
console.log('Navigating to https://shop.bestdispensary.com/brands');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
// Go directly to the brands page
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate if present
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
console.log('Current URL:', page.url());
// Scroll to load all content
console.log('\nScrolling to load all brands...');
let previousHeight = 0;
for (let i = 0; i < 20; i++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
console.log(` Scroll ${i+1}: No new content`);
break;
}
previousHeight = currentHeight;
const brandCount = await page.evaluate(() =>
document.querySelectorAll('a[href*="/brand/"]').length
);
console.log(` Scroll ${i+1}: height=${currentHeight}, brand links=${brandCount}`);
}
// Get all brand links
const brands = await page.evaluate(() => {
const results: { name: string; href: string }[] = [];
const seen = new Set<string>();
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
const href = a.getAttribute('href') || '';
if (seen.has(href)) return;
seen.add(href);
const name = a.textContent?.trim() || href.split('/brand/')[1] || '';
results.push({ name, href });
});
return results;
});
console.log(`\nFound ${brands.length} brands:`);
brands.forEach(b => console.log(` - ${b.name} (${b.href})`));
// Take screenshot
await page.screenshot({ path: '/tmp/bestdispensary-brands.png', fullPage: true });
console.log('\nScreenshot saved to /tmp/bestdispensary-brands.png');
await browser.close();
}
main().catch(console.error);

View File

@@ -1,108 +0,0 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Check Load More button
const btnInfo = await page.evaluate(() => {
const btn = document.querySelector('button.collection__load-more');
if (!btn) return { found: false };
const rect = btn.getBoundingClientRect();
return {
found: true,
text: btn.textContent?.trim(),
visible: rect.width > 0 && rect.height > 0,
top: rect.top,
disabled: (btn as HTMLButtonElement).disabled,
class: btn.className,
};
});
console.log('Load More button:', btnInfo);
// Scroll to button and click
console.log('\nScrolling to button and clicking...');
for (let i = 0; i < 10; i++) {
const btn = await page.$('button.collection__load-more');
if (!btn) {
console.log('Button not found');
break;
}
// Scroll button into view
await page.evaluate((b) => b.scrollIntoView({ behavior: 'smooth', block: 'center' }), btn);
await sleep(500);
// Check if button is still there and clickable
const stillThere = await page.evaluate(() => {
const b = document.querySelector('button.collection__load-more');
return b ? b.textContent?.trim() : null;
});
if (!stillThere) {
console.log('Button disappeared - all loaded');
break;
}
// Click it
await btn.click();
console.log(`Click ${i+1}...`);
await sleep(2000);
const count = await page.evaluate(() =>
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').length
);
console.log(` Brands: ${count}`);
}
// Final count
const brands = await page.evaluate(() => {
const list: string[] = [];
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
list.push(a.textContent?.trim() || '');
});
return list;
});
console.log(`\nTotal brands: ${brands.length}`);
console.log(brands.join(', '));
await browser.close();
}
main().catch(console.error);

View File

@@ -1,157 +0,0 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Bypassing age gate...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Click "LOAD MORE" until all brands are loaded
console.log('Loading all brands...\n');
let loadMoreClicks = 0;
while (true) {
const loadMoreBtn = await page.$('button.collection__load-more');
if (!loadMoreBtn) {
console.log('No more "Load More" button - all brands loaded!');
break;
}
const isVisible = await page.evaluate((btn) => {
const rect = btn.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
}, loadMoreBtn);
if (!isVisible) {
console.log('Load More button not visible - all brands loaded!');
break;
}
await loadMoreBtn.click();
loadMoreClicks++;
await sleep(1500);
const brandCount = await page.evaluate(() =>
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').length
);
console.log(` Click ${loadMoreClicks}: ${brandCount} brands loaded`);
if (loadMoreClicks > 20) break; // Safety limit
}
// Get all brands
const brands = await page.evaluate(() => {
const results: { name: string; href: string }[] = [];
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
const href = a.getAttribute('href') || '';
const name = a.textContent?.trim() || '';
if (name && href) {
results.push({ name, href });
}
});
return results;
});
console.log('\n' + '='.repeat(60));
console.log(`TOTAL BRANDS: ${brands.length}`);
console.log('='.repeat(60));
// Visit each brand and count products
console.log('\nCounting products per brand...\n');
const results: { brand: string; products: number }[] = [];
for (let i = 0; i < brands.length; i++) {
const brand = brands[i];
const brandUrl = `https://shop.bestdispensary.com${brand.href}`;
try {
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(1000);
// Click load more on brand page too
for (let j = 0; j < 10; j++) {
const loadMore = await page.$('button.collection__load-more');
if (!loadMore) break;
const isVisible = await page.evaluate((btn) => {
const rect = btn.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
}, loadMore);
if (!isVisible) break;
await loadMore.click();
await sleep(1000);
}
const productCount = await page.evaluate(() => {
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
const href = a.getAttribute('href');
if (href) seen.add(href);
});
return seen.size;
});
results.push({ brand: brand.name, products: productCount });
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ${productCount} products`);
} catch (err: any) {
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ERROR`);
results.push({ brand: brand.name, products: 0 });
}
}
// Summary
const totalProducts = results.reduce((sum, r) => sum + r.products, 0);
const brandsWithProducts = results.filter(r => r.products > 0).length;
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
console.log(`Total brands: ${brands.length}`);
console.log(`Brands with products: ${brandsWithProducts}`);
console.log(`Total products: ${totalProducts}`);
// Top brands by product count
console.log('\nTop 20 brands by product count:');
results
.sort((a, b) => b.products - a.products)
.slice(0, 20)
.forEach((r, i) => console.log(` ${i+1}. ${r.brand}: ${r.products}`));
await browser.close();
}
main().catch(console.error);

View File

@@ -1,108 +0,0 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Try clicking Load More multiple times with JS
console.log('Loading all brands...');
for (let i = 0; i < 15; i++) {
const clicked = await page.evaluate(() => {
const btn = document.querySelector('button.collection__load-more') as HTMLButtonElement;
if (btn) { btn.click(); return true; }
return false;
});
if (!clicked) break;
await sleep(2000);
}
// Get all brands
const brands = await page.evaluate(() => {
const list: { name: string; href: string }[] = [];
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
list.push({
name: a.textContent?.trim() || '',
href: a.getAttribute('href') || '',
});
});
return list;
});
console.log('Total brands found: ' + brands.length + '\n');
console.log('PRODUCTS PER BRAND');
console.log('==================\n');
const results: { brand: string; products: number }[] = [];
for (let i = 0; i < brands.length; i++) {
const brand = brands[i];
const url = 'https://shop.bestdispensary.com' + brand.href;
try {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(1000);
// Click load more on brand page
for (let j = 0; j < 20; j++) {
const clicked = await page.evaluate(() => {
const btn = document.querySelector('button.collection__load-more') as HTMLButtonElement;
if (btn) { btn.click(); return true; }
return false;
});
if (!clicked) break;
await sleep(1000);
}
const productCount = await page.evaluate(() => {
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
const href = a.getAttribute('href');
if (href) seen.add(href);
});
return seen.size;
});
results.push({ brand: brand.name, products: productCount });
const num = (i + 1).toString().padStart(2, ' ');
console.log(num + '. ' + brand.name + ': ' + productCount);
} catch (err) {
results.push({ brand: brand.name, products: 0 });
const num = (i + 1).toString().padStart(2, ' ');
console.log(num + '. ' + brand.name + ': ERROR');
}
}
// Summary
const total = results.reduce((s, r) => s + r.products, 0);
console.log('\n==================');
console.log('TOTAL: ' + brands.length + ' brands, ' + total + ' products');
console.log('==================');
await browser.close();
}
main().catch(console.error);

View File

@@ -1,130 +0,0 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Use the selector hint: /html/body/main/section
console.log('Looking at main > section structure...\n');
const sectionInfo = await page.evaluate(() => {
const main = document.querySelector('main');
if (!main) return { error: 'No main element' };
const sections = main.querySelectorAll('section');
const results: any[] = [];
sections.forEach((section, i) => {
const children = section.children;
const childInfo: string[] = [];
for (let j = 0; j < Math.min(children.length, 10); j++) {
const child = children[j];
childInfo.push(child.tagName + '.' + (child.className?.slice(0, 30) || ''));
}
results.push({
index: i,
class: section.className?.slice(0, 50),
childCount: children.length,
sampleChildren: childInfo,
});
});
return results;
});
console.log('Sections in main:');
console.log(JSON.stringify(sectionInfo, null, 2));
// Look for brand cards within the section
console.log('\nLooking for brand cards in main > section...');
const brandCards = await page.evaluate(() => {
const section = document.querySelector('main > section');
if (!section) return [];
// Get all child elements that might be brand cards
const cards: { tag: string; text: string; href: string }[] = [];
section.querySelectorAll('a').forEach((a: Element) => {
const href = a.getAttribute('href') || '';
const text = a.textContent?.trim().slice(0, 50) || '';
cards.push({ tag: 'a', text, href });
});
return cards;
});
console.log(`Found ${brandCards.length} links in section:`);
brandCards.slice(0, 30).forEach(c => console.log(` ${c.text} -> ${c.href}`));
// Get the grid of brand cards
console.log('\nLooking for grid container...');
const gridCards = await page.evaluate(() => {
// Look for grid-like containers
const grids = document.querySelectorAll('[class*="grid"], [class*="Grid"], main section > div');
const results: any[] = [];
grids.forEach((grid, i) => {
const links = grid.querySelectorAll('a[href*="/brand/"]');
if (links.length > 5) {
const brands: string[] = [];
links.forEach((a: Element) => {
const text = a.textContent?.trim().split('\n')[0] || '';
if (text && !brands.includes(text)) brands.push(text);
});
results.push({
class: grid.className?.slice(0, 40),
brandCount: brands.length,
brands: brands.slice(0, 50),
});
}
});
return results;
});
console.log('Grid containers with brands:');
gridCards.forEach(g => {
console.log(`\n[${g.brandCount} brands] class="${g.class}"`);
g.brands.forEach((b: string, i: number) => console.log(` ${i+1}. ${b}`));
});
await browser.close();
}
main().catch(console.error);

View File

@@ -1,188 +0,0 @@
/**
* One-off script to test iHeartJane scraping
* Mimics remote worker: Puppeteer + stealth + proxy
*
* Usage: npx ts-node scripts/test-iheartjane.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
const TARGET_URL = 'https://theflowershopusa.com/mesa/menu/';
const STORE_ID = 2788;
async function main() {
console.log('[iHeartJane Test] Starting...');
// No proxy for local testing
const browser = await puppeteer.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Intercept network requests to capture API calls
const apiResponses: any[] = [];
await page.setRequestInterception(true);
page.on('request', (req) => {
// Block heavy resources
const type = req.resourceType();
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
req.abort();
} else {
req.continue();
}
});
page.on('response', async (response) => {
const url = response.url();
const contentType = response.headers()['content-type'] || '';
// Capture any JSON response from iheartjane domains
if ((url.includes('iheartjane.com') || url.includes('algolia')) && contentType.includes('json')) {
try {
const json = await response.json();
const type = url.includes('store') ? 'STORE' :
url.includes('product') ? 'PRODUCT' :
url.includes('algolia') ? 'ALGOLIA' : 'API';
apiResponses.push({ type, url, data: json });
console.log(`[${type}] ${url.substring(0, 120)}...`);
} catch {
// Not JSON
}
}
});
console.log(`[iHeartJane Test] Navigating to ${TARGET_URL}`);
try {
await page.goto(TARGET_URL, {
waitUntil: 'networkidle2',
timeout: 60000,
});
console.log('[iHeartJane Test] Menu page loaded, waiting for data...');
// Wait a bit for all API calls to complete
await new Promise(r => setTimeout(r, 3000));
// Also try to get store info by visiting the store page
console.log('[iHeartJane Test] Fetching store info...');
const storeInfoUrl = `https://api.iheartjane.com/v1/stores/${STORE_ID}`;
// Try to fetch store info via page.evaluate (uses browser context)
const storeInfo = await page.evaluate(async (storeId) => {
try {
const resp = await fetch(`https://api.iheartjane.com/v1/stores/${storeId}`);
if (resp.ok) return await resp.json();
return { error: resp.status };
} catch (e: any) {
return { error: e.message };
}
}, STORE_ID);
if (storeInfo && !storeInfo.error) {
apiResponses.push({ type: 'STORE_DIRECT', url: storeInfoUrl, data: storeInfo });
console.log('[STORE_DIRECT] Got store info via fetch');
} else {
console.log(`[STORE_DIRECT] Failed: ${JSON.stringify(storeInfo)}`);
}
console.log('[iHeartJane Test] Processing results...');
// Wait for products to load
await page.waitForSelector('[data-testid="product-card"], .product-card, [class*="ProductCard"]', {
timeout: 30000,
}).catch(() => console.log('[iHeartJane Test] No product cards found via selector'));
// Try to extract product data from the page
const products = await page.evaluate(() => {
// Look for product data in various places
const results: any[] = [];
// Method 1: Look for __INITIAL_STATE__ or similar
const scripts = Array.from(document.querySelectorAll('script'));
for (const script of scripts) {
const text = script.textContent || '';
if (text.includes('products') && text.includes('price')) {
try {
// Try to find JSON object
const match = text.match(/\{[\s\S]*"products"[\s\S]*\}/);
if (match) {
results.push({ source: 'script', data: match[0].substring(0, 500) });
}
} catch {}
}
}
// Method 2: Look for product elements in DOM
const productElements = document.querySelectorAll('[data-testid="product-card"], .product-card, [class*="product"]');
for (const el of Array.from(productElements).slice(0, 5)) {
const name = el.querySelector('[class*="name"], h3, h4')?.textContent;
const price = el.querySelector('[class*="price"]')?.textContent;
if (name) {
results.push({ source: 'dom', name, price });
}
}
return results;
});
console.log('\n[iHeartJane Test] === RESULTS ===');
console.log(`Total API responses captured: ${apiResponses.length}`);
// Group by type
const byType: Record<string, any[]> = {};
for (const r of apiResponses) {
byType[r.type] = byType[r.type] || [];
byType[r.type].push(r);
}
for (const [type, items] of Object.entries(byType)) {
console.log(`\n--- ${type} (${items.length} responses) ---`);
for (const item of items) {
console.log(`URL: ${item.url}`);
// Show structure
if (item.data.hits) {
console.log(` Products: ${item.data.hits.length} hits`);
if (item.data.hits[0]) {
console.log(` Fields: ${Object.keys(item.data.hits[0]).join(', ')}`);
}
} else if (item.data.store) {
console.log(` Store: ${JSON.stringify(item.data.store, null, 2).substring(0, 1000)}`);
} else {
console.log(` Keys: ${Object.keys(item.data).join(', ')}`);
}
}
}
// Write full data to file
const fs = await import('fs');
fs.writeFileSync('/tmp/iheartjane-data.json', JSON.stringify(apiResponses, null, 2));
console.log('\n[iHeartJane Test] Full data saved to /tmp/iheartjane-data.json');
// Take screenshot
await page.screenshot({ path: '/tmp/iheartjane-test.png', fullPage: false });
console.log('[iHeartJane Test] Screenshot saved to /tmp/iheartjane-test.png');
} catch (error: any) {
console.error('[iHeartJane Test] Error:', error.message);
await page.screenshot({ path: '/tmp/iheartjane-error.png' });
} finally {
await browser.close();
}
console.log('[iHeartJane Test] Done');
}
main().catch(console.error);

View File

@@ -1,224 +0,0 @@
/**
* Explore Jane API to understand data structure
* Usage: npx ts-node scripts/test-jane-api-explore.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
console.log('Exploring Jane API from browser context...\n');
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
// Intercept network requests to find store data API calls
const capturedResponses: Array<{ url: string; data: any }> = [];
await page.setRequestInterception(true);
page.on('request', (req) => req.continue());
page.on('response', async (response) => {
const url = response.url();
if (url.includes('iheartjane.com') &&
(url.includes('/stores') || url.includes('/search') || url.includes('algolia'))) {
try {
const text = await response.text();
if (text.startsWith('{') || text.startsWith('[')) {
const data = JSON.parse(text);
capturedResponses.push({ url, data });
console.log(`Captured: ${url.substring(0, 100)}...`);
}
} catch {
// Not JSON
}
}
});
// Visit Jane to establish session
console.log('Visiting Jane stores page to capture network requests...');
await page.goto('https://www.iheartjane.com/stores', {
waitUntil: 'networkidle2',
timeout: 60000,
});
console.log(`\nCaptured ${capturedResponses.length} API responses`);
for (const resp of capturedResponses) {
console.log(`\n--- ${resp.url.substring(0, 80)} ---`);
const keys = Object.keys(resp.data);
console.log('Keys:', keys);
// Check for stores array
if (resp.data.stores && Array.isArray(resp.data.stores)) {
console.log(`Stores count: ${resp.data.stores.length}`);
const firstStore = resp.data.stores[0];
if (firstStore) {
console.log('First store keys:', Object.keys(firstStore));
console.log('Sample:', JSON.stringify(firstStore, null, 2).substring(0, 500));
}
}
// Check for hits (Algolia)
if (resp.data.hits && Array.isArray(resp.data.hits)) {
console.log(`Hits count: ${resp.data.hits.length}`);
const firstHit = resp.data.hits[0];
if (firstHit) {
console.log('First hit keys:', Object.keys(firstHit));
}
}
}
// Look for __NEXT_DATA__ or similar embedded data
console.log('\n--- Checking for embedded page data ---');
const pageData = await page.evaluate(() => {
// Check for Next.js data
const nextData = (window as any).__NEXT_DATA__;
if (nextData?.props?.pageProps?.stores) {
return {
source: '__NEXT_DATA__',
storeCount: nextData.props.pageProps.stores.length,
firstStore: nextData.props.pageProps.stores[0],
};
}
// Check for any global store data
const win = window as any;
if (win.stores) return { source: 'window.stores', data: win.stores };
if (win.__stores) return { source: 'window.__stores', data: win.__stores };
return null;
});
if (pageData) {
console.log('Found embedded data:', pageData.source);
console.log('Store count:', pageData.storeCount);
if (pageData.firstStore) {
console.log('First store keys:', Object.keys(pageData.firstStore));
console.log('Sample:', JSON.stringify({
id: pageData.firstStore.id,
name: pageData.firstStore.name,
city: pageData.firstStore.city,
state: pageData.firstStore.state,
}, null, 2));
}
} else {
console.log('No embedded page data found');
}
// Try alternative API endpoints from browser context
console.log('\n--- Testing alternative API endpoints ---');
// Try the map endpoint
const mapData = await page.evaluate(async () => {
try {
const res = await fetch('https://api.iheartjane.com/v1/stores/map?per_page=100');
if (res.ok) return await res.json();
} catch {}
return null;
});
if (mapData) {
console.log('\n/v1/stores/map response:');
console.log('Keys:', Object.keys(mapData));
if (mapData.stores?.[0]) {
console.log('First store keys:', Object.keys(mapData.stores[0]));
}
}
// Try index endpoint
const indexData = await page.evaluate(async () => {
try {
const res = await fetch('https://api.iheartjane.com/v1/stores/index?per_page=10');
if (res.ok) return await res.json();
} catch {}
return null;
});
if (indexData) {
console.log('\n/v1/stores/index response:');
console.log('Keys:', Object.keys(indexData));
if (indexData.stores?.[0]) {
console.log('First store keys:', Object.keys(indexData.stores[0]));
}
}
// Try with state parameter
const stateData = await page.evaluate(async () => {
try {
const res = await fetch('https://api.iheartjane.com/v1/stores?state=AZ&per_page=10');
if (res.ok) return await res.json();
} catch {}
return null;
});
if (stateData) {
console.log('\n/v1/stores?state=AZ response:');
console.log('Keys:', Object.keys(stateData));
console.log('Stores count:', stateData.stores?.length);
if (stateData.stores?.[0]) {
console.log('First store keys:', Object.keys(stateData.stores[0]));
console.log('Sample:', JSON.stringify(stateData.stores[0], null, 2).substring(0, 300));
}
}
// Try Algolia directly for stores
console.log('\n--- Testing Algolia for stores ---');
const algoliaStores = await page.evaluate(async () => {
try {
// Common Algolia search pattern
const res = await fetch('https://search.iheartjane.com/1/indexes/stores-production/query', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Algolia-Application-Id': 'HKXSXRD7RA',
'X-Algolia-API-Key': 'YjZhYjQxZjU4ZTNjMTRhYzExZTk2YjU2MzliMGE4ZTE5YjJkMmZkZTI2ODllYTY2MThlMzQ3Y2QxOTFkMjI5Y3RhZ0ZpbHRlcnM9',
},
body: JSON.stringify({
query: 'Arizona',
hitsPerPage: 20,
}),
});
if (res.ok) return await res.json();
} catch {}
return null;
});
if (algoliaStores) {
console.log('Algolia stores-production response:');
console.log('Keys:', Object.keys(algoliaStores));
console.log('Hits count:', algoliaStores.hits?.length);
if (algoliaStores.hits?.[0]) {
console.log('First hit keys:', Object.keys(algoliaStores.hits[0]));
console.log('Sample:', JSON.stringify(algoliaStores.hits[0], null, 2).substring(0, 500));
}
}
// Check if there's a /v2 endpoint
const v2Data = await page.evaluate(async () => {
try {
const res = await fetch('https://api.iheartjane.com/v2/stores?per_page=10');
if (res.ok) return await res.json();
} catch {}
return null;
});
if (v2Data) {
console.log('\n/v2/stores response:');
console.log('Keys:', Object.keys(v2Data));
if (v2Data.stores?.[0]) {
console.log('First store keys:', Object.keys(v2Data.stores[0]));
}
}
await browser.close();
console.log('\nDone!');
}
main().catch(console.error);

View File

@@ -1,126 +0,0 @@
/**
* Test script for Jane platform client
* Tests the new Jane integration with The Flower Shop Mesa
*
* Usage: npx ts-node scripts/test-jane-client.ts
*/
import {
startSession,
endSession,
fetchProductsFromUrl,
resolveStoreFromUrl,
} from '../src/platforms/jane';
import { JaneNormalizer } from '../src/hydration/normalizers/jane';
const TEST_URL = 'https://theflowershopusa.com/mesa/menu/';
async function main() {
console.log('='.repeat(60));
console.log('Jane Platform Client Test');
console.log('='.repeat(60));
console.log(`Test URL: ${TEST_URL}`);
console.log('');
try {
// Test 1: Fetch products from URL
console.log('[Test 1] Fetching products from menu URL...');
const result = await fetchProductsFromUrl(TEST_URL);
console.log('');
console.log('[Results]');
console.log(` Store: ${result.store?.name || 'Not captured'}`);
console.log(` Store ID: ${result.store?.id || 'N/A'}`);
console.log(` Products captured: ${result.products.length}`);
console.log(` API responses: ${result.responses.length}`);
if (result.store) {
console.log('');
console.log('[Store Info]');
console.log(` Address: ${result.store.address}, ${result.store.city}, ${result.store.state} ${result.store.zip}`);
console.log(` Phone: ${result.store.phone}`);
console.log(` Coordinates: ${result.store.lat}, ${result.store.long}`);
console.log(` Medical: ${result.store.medical}, Recreational: ${result.store.recreational}`);
console.log(` Rating: ${result.store.rating} (${result.store.reviews_count} reviews)`);
console.log(` Product count (store): ${result.store.product_count}`);
}
if (result.products.length > 0) {
console.log('');
console.log('[Sample Products (first 5)]');
for (const p of result.products.slice(0, 5)) {
const price = p.price_gram || p.price_each || 'N/A';
console.log(` - ${p.name} (${p.brand}) - $${price}`);
console.log(` Kind: ${p.kind}, Category: ${p.category}, THC: ${p.percent_thc}%`);
}
// Test 2: Normalize products
console.log('');
console.log('[Test 2] Testing normalizer...');
const normalizer = new JaneNormalizer();
// Build a fake payload structure
const fakePayload = {
id: 'test-payload',
dispensary_id: 9999,
crawl_run_id: null,
platform: 'jane',
payload_version: 1,
raw_json: { hits: result.products.map(p => p.raw) },
product_count: result.products.length,
pricing_type: null,
crawl_mode: null,
fetched_at: new Date(),
processed: false,
normalized_at: null,
hydration_error: null,
hydration_attempts: 0,
created_at: new Date(),
};
const normalized = normalizer.normalize(fakePayload);
console.log(` Products normalized: ${normalized.products.length}`);
console.log(` Brands extracted: ${normalized.brands.length}`);
console.log(` Categories extracted: ${normalized.categories.length}`);
console.log(` Errors: ${normalized.errors.length}`);
if (normalized.products.length > 0) {
console.log('');
console.log('[Sample Normalized Product]');
const np = normalized.products[0];
console.log(` External ID: ${np.externalProductId}`);
console.log(` Name: ${np.name}`);
console.log(` Brand: ${np.brandName}`);
console.log(` Category: ${np.category}`);
console.log(` Type: ${np.type}`);
console.log(` Strain: ${np.strainType}`);
console.log(` THC: ${np.thcPercent}%`);
console.log(` CBD: ${np.cbdPercent}%`);
console.log(` Image: ${np.primaryImageUrl?.slice(0, 60)}...`);
const pricing = normalized.pricing.get(np.externalProductId);
if (pricing) {
console.log(` Price (cents): ${pricing.priceRec}`);
console.log(` On Special: ${pricing.isOnSpecial}`);
}
}
}
console.log('');
console.log('='.repeat(60));
console.log('TEST PASSED');
console.log('='.repeat(60));
} catch (error: any) {
console.error('');
console.error('='.repeat(60));
console.error('TEST FAILED');
console.error('='.repeat(60));
console.error(`Error: ${error.message}`);
console.error(error.stack);
process.exit(1);
}
}
main().catch(console.error);

View File

@@ -1,50 +0,0 @@
/**
* Smoke test: Discover Jane stores in Arizona
* Usage: npx ts-node scripts/test-jane-discovery-az.ts
*/
import { discoverStoresByState } from '../src/platforms/jane';
async function main() {
console.log('='.repeat(60));
console.log('Jane Store Discovery - Arizona Smoke Test');
console.log('='.repeat(60));
console.log('Using local IP (no proxy)\n');
try {
const stores = await discoverStoresByState('AZ');
console.log(`\n${'='.repeat(60)}`);
console.log(`RESULTS: Found ${stores.length} Jane stores in Arizona`);
console.log('='.repeat(60));
if (stores.length > 0) {
console.log('\nSample stores:');
for (const store of stores.slice(0, 10)) {
console.log(` - ${store.name}`);
console.log(` ID: ${store.storeId} | ${store.city}, AZ`);
console.log(` Types: ${store.storeTypes?.join(', ') || 'unknown'}`);
console.log(` Products: ${store.productCount || 'N/A'}`);
console.log('');
}
if (stores.length > 10) {
console.log(` ... and ${stores.length - 10} more stores`);
}
}
console.log('\n' + '='.repeat(60));
console.log('SMOKE TEST PASSED');
console.log('='.repeat(60));
} catch (error: any) {
console.error('\n' + '='.repeat(60));
console.error('SMOKE TEST FAILED');
console.error('='.repeat(60));
console.error(`Error: ${error.message}`);
console.error(error.stack);
process.exit(1);
}
}
main();

View File

@@ -1,55 +0,0 @@
/**
* Compare MED vs REC product menus for same location
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox'] });
const page = await browser.newPage();
await page.goto('https://www.iheartjane.com/stores', { waitUntil: 'domcontentloaded' });
await new Promise(r => setTimeout(r, 2000));
// Fetch REC products (store 3379)
const recProducts: number[] = await page.evaluate(async () => {
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=3379' }),
});
const data = await res.json();
return data.hits?.map((h: any) => h.product_id) || [];
});
// Fetch MED products (store 4540)
const medProducts: number[] = await page.evaluate(async () => {
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=4540' }),
});
const data = await res.json();
return data.hits?.map((h: any) => h.product_id) || [];
});
const recSet = new Set(recProducts);
const medSet = new Set(medProducts);
const recOnly = recProducts.filter(id => !medSet.has(id)).length;
const medOnly = medProducts.filter(id => !recSet.has(id)).length;
const shared = recProducts.filter(id => medSet.has(id)).length;
console.log('\nHana Phoenix - MED vs REC comparison (100 products each):');
console.log(' REC products fetched:', recProducts.length);
console.log(' MED products fetched:', medProducts.length);
console.log(' REC-only:', recOnly);
console.log(' MED-only:', medOnly);
console.log(' Shared:', shared);
console.log(' Menus are:', shared === 0 ? 'COMPLETELY DIFFERENT' : shared === recProducts.length ? 'IDENTICAL' : 'PARTIALLY OVERLAPPING');
await browser.close();
}
main().catch(console.error);

View File

@@ -1,79 +0,0 @@
/**
* Find ALL differing fields between MED and REC product payloads
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox'] });
const page = await browser.newPage();
await page.goto('https://www.iheartjane.com/stores', { waitUntil: 'domcontentloaded' });
await new Promise(r => setTimeout(r, 2000));
// Get full product payload from REC store
const recProduct = await page.evaluate(async () => {
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '', hitsPerPage: 1, filters: 'store_id=3379' }),
});
const data = await res.json();
return data.hits?.[0];
});
const productId = recProduct?.product_id;
// Get same product from MED store
const medProduct = await page.evaluate(async (pid: number) => {
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=4540' }),
});
const data = await res.json();
return data.hits?.find((h: any) => h.product_id === pid);
}, productId);
console.log('Product:', recProduct?.name, '(ID:', productId, ')\n');
// Get all keys
const allKeys = new Set([...Object.keys(recProduct || {}), ...Object.keys(medProduct || {})]);
const sortedKeys = [...allKeys].sort();
console.log('=== ALL KEYS IN PAYLOAD ===');
console.log(sortedKeys.join(', '));
console.log('\n=== FIELDS THAT DIFFER ===');
let diffCount = 0;
for (const key of sortedKeys) {
const recVal = JSON.stringify(recProduct?.[key]);
const medVal = JSON.stringify(medProduct?.[key]);
if (recVal !== medVal) {
diffCount++;
console.log(`${key}:`);
console.log(` REC: ${recVal?.substring(0, 100)}`);
console.log(` MED: ${medVal?.substring(0, 100)}`);
}
}
if (diffCount === 0) {
console.log('(none - payloads are identical)');
}
// Check for limit/allowance related fields
console.log('\n=== LIMIT-RELATED FIELDS ===');
const limitFields = sortedKeys.filter(k =>
k.includes('limit') || k.includes('max') || k.includes('allow') ||
k.includes('quantity') || k.includes('cart') || k.includes('medical') ||
k.includes('rec') || k.includes('weight')
);
for (const key of limitFields) {
console.log(`${key}: REC=${JSON.stringify(recProduct?.[key])} | MED=${JSON.stringify(medProduct?.[key])}`);
}
await browser.close();
}
main().catch(console.error);

View File

@@ -1,35 +0,0 @@
/**
* Test script to capture and save full Jane payload
* Usage: npx ts-node scripts/test-jane-payload.ts
*/
import * as fs from 'fs';
import { fetchProductsFromUrl } from '../src/platforms/jane';
const TEST_URL = 'https://theflowershopusa.com/mesa/menu/';
const OUTPUT_FILE = '/tmp/jane-test-payload.json';
async function main() {
console.log('Fetching Jane payload...');
const result = await fetchProductsFromUrl(TEST_URL);
// Build payload structure matching what would be saved
const payload = {
hits: result.products.map(p => p.raw),
store: result.store?.raw || null,
capturedAt: new Date().toISOString(),
platform: 'jane',
storeId: result.store?.id,
productCount: result.products.length,
responseCount: result.responses.length,
};
// Save to file
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(payload, null, 2));
console.log(`\nPayload saved to: ${OUTPUT_FILE}`);
console.log(`Products: ${result.products.length}`);
console.log(`Size: ${Math.round(fs.statSync(OUTPUT_FILE).size / 1024)}KB`);
}
main().catch(console.error);

View File

@@ -1,138 +0,0 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Capture ALL requests to treez.io
const treezRequests: any[] = [];
page.on('request', (req) => {
const url = req.url();
if (url.includes('treez.io') && !url.includes('.js') && !url.includes('.css')) {
treezRequests.push({
url: url,
method: req.method(),
});
}
});
// Also intercept and capture ES API responses
page.on('response', async (res) => {
const url = res.url();
if (url.includes('gapcommerceapi.com') && res.status() === 200) {
try {
const json = await res.json();
const total = json.hits?.total?.value;
const count = json.hits?.hits?.length;
if (total || count) {
console.log('\nES Response: total=' + total + ', returned=' + count);
if (json.hits?.hits?.[0]?._source) {
const src = json.hits.hits[0]._source;
console.log('First product fields: ' + Object.keys(src).slice(0, 20).join(', '));
}
}
} catch {}
}
});
console.log('Loading /shop page...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Click load more several times
console.log('\nClicking Load More...');
for (let i = 0; i < 5; i++) {
const btn = await page.$('button.collection__load-more');
if (!btn) break;
await btn.click();
await sleep(2000);
}
console.log('\n=== TREEZ API ENDPOINTS CALLED ===\n');
const uniqueUrls = [...new Set(treezRequests.map(r => r.url.split('?')[0]))];
uniqueUrls.forEach(url => console.log(url));
// Now intercept the ES response data by making a request from browser context
console.log('\n=== FETCHING ALL PRODUCTS VIA BROWSER ===\n');
const allProducts = await page.evaluate(async () => {
const apiKey = 'V3jHL9dFzi3Gj4UISM4lr38Nm0GSxcps5OBz1PbS';
const url = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
const query = {
from: 0,
size: 1000,
query: {
bool: {
must: [
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
]
}
}
};
try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': apiKey,
},
body: JSON.stringify(query),
});
const data = await response.json();
return {
total: data.hits?.total?.value,
count: data.hits?.hits?.length,
sample: data.hits?.hits?.[0]?._source,
allProducts: data.hits?.hits?.map((h: any) => h._source),
};
} catch (err: any) {
return { error: err.message };
}
});
if (allProducts.error) {
console.log('Error: ' + allProducts.error);
} else {
console.log('Total products: ' + allProducts.total);
console.log('Returned: ' + allProducts.count);
if (allProducts.sample) {
console.log('\n=== PRODUCT FIELDS ===\n');
console.log(Object.keys(allProducts.sample).sort().join('\n'));
console.log('\n=== SAMPLE PRODUCT ===\n');
console.log(JSON.stringify(allProducts.sample, null, 2));
}
}
await browser.close();
}
main();

View File

@@ -1,203 +0,0 @@
/**
* Extract ALL product elements and find unique products
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function main() {
console.log('='.repeat(60));
console.log('Extracting ALL product elements');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
// Get ALL elements with product_product__ class
console.log('\n[1] Counting all product_product__ elements...');
const elementAnalysis = await page.evaluate(() => {
const all = document.querySelectorAll('[class*="product_product__"]');
const byTag: Record<string, number> = {};
const anchorHrefs: string[] = [];
const imgAlts: string[] = [];
all.forEach(el => {
const tag = el.tagName;
byTag[tag] = (byTag[tag] || 0) + 1;
if (tag === 'A') {
const href = el.getAttribute('href');
if (href && href.includes('/product/')) {
anchorHrefs.push(href);
}
}
if (tag === 'IMG') {
const alt = el.getAttribute('alt');
if (alt) imgAlts.push(alt);
}
});
return {
total: all.length,
byTag,
anchorHrefs: anchorHrefs.slice(0, 20),
uniqueAnchors: new Set(anchorHrefs).size,
imgAlts: imgAlts.slice(0, 20),
uniqueImgAlts: new Set(imgAlts).size,
};
});
console.log(`Total elements: ${elementAnalysis.total}`);
console.log(`By tag:`, elementAnalysis.byTag);
console.log(`Unique anchor hrefs: ${elementAnalysis.uniqueAnchors}`);
console.log(`Unique image alts: ${elementAnalysis.uniqueImgAlts}`);
console.log(`\nSample anchor hrefs:`, elementAnalysis.anchorHrefs.slice(0, 5));
console.log(`Sample image alts:`, elementAnalysis.imgAlts.slice(0, 5));
// Try to extract using different approaches
console.log('\n[2] Testing extraction approaches...');
const approaches = await page.evaluate(() => {
const results: Record<string, { count: number; unique: number; sample: string[] }> = {};
// Approach 1: Anchor elements with product links
const anchors = document.querySelectorAll('a[href*="/product/"]');
const anchorNames = new Set<string>();
anchors.forEach(a => {
const img = a.querySelector('img');
const name = img?.getAttribute('alt') || a.textContent?.trim().split('\n')[0] || '';
if (name) anchorNames.add(name);
});
results['a[href*="/product/"]'] = {
count: anchors.length,
unique: anchorNames.size,
sample: Array.from(anchorNames).slice(0, 5),
};
// Approach 2: Images with alt text inside product areas
const productImgs = document.querySelectorAll('[class*="product_product__"] img[alt]');
const imgNames = new Set<string>();
productImgs.forEach(img => {
const alt = img.getAttribute('alt');
if (alt && alt.length > 2) imgNames.add(alt);
});
results['[class*="product_product__"] img[alt]'] = {
count: productImgs.length,
unique: imgNames.size,
sample: Array.from(imgNames).slice(0, 5),
};
// Approach 3: H5 elements (product names)
const h5s = document.querySelectorAll('h5.product_product__name__JcEk0, h5[class*="product__name"]');
const h5Names = new Set<string>();
h5s.forEach(h5 => {
const text = h5.textContent?.trim();
if (text) h5Names.add(text);
});
results['h5[class*="product__name"]'] = {
count: h5s.length,
unique: h5Names.size,
sample: Array.from(h5Names).slice(0, 5),
};
// Approach 4: Link class with product_product__
const links = document.querySelectorAll('a.product_product__ERWtJ, a[class*="product_product__"][class*="link"]');
const linkNames = new Set<string>();
links.forEach(link => {
const h5 = link.querySelector('h5');
const img = link.querySelector('img');
const name = h5?.textContent?.trim() || img?.getAttribute('alt') || '';
if (name) linkNames.add(name);
});
results['a.product_product__ERWtJ'] = {
count: links.length,
unique: linkNames.size,
sample: Array.from(linkNames).slice(0, 5),
};
return results;
});
Object.entries(approaches).forEach(([sel, data]) => {
console.log(`\n${sel}:`);
console.log(` Count: ${data.count}, Unique: ${data.unique}`);
console.log(` Sample: ${data.sample.join(', ')}`);
});
// The best approach: use images with alt as the source of truth
console.log('\n[3] Full product extraction using img[alt] approach...');
const products = await page.evaluate(() => {
const seen = new Set<string>();
const products: { name: string; href: string; price: string }[] = [];
// Get all product links
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
const img = a.querySelector('img');
const name = img?.getAttribute('alt') || '';
if (!name || seen.has(name)) return;
seen.add(name);
const href = a.getAttribute('href') || '';
// Get price from within the link or parent
let price = '';
const priceEl = a.querySelector('[class*="price"]');
if (priceEl) {
const priceMatch = priceEl.textContent?.match(/\$(\d+(?:\.\d{2})?)/);
price = priceMatch ? priceMatch[1] : '';
}
products.push({ name, href, price });
});
return products;
});
console.log(`Extracted ${products.length} unique products`);
console.log('\nSample products:');
products.slice(0, 10).forEach(p => {
console.log(` - ${p.name} | ${p.price ? '$' + p.price : 'N/A'} | ${p.href.slice(0, 40)}...`);
});
await browser.close();
}
main().catch(console.error);

View File

@@ -1,52 +0,0 @@
import axios from 'axios';
async function main() {
const url = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
const query = {
from: 0,
size: 500,
query: {
bool: {
must: [
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
]
}
}
};
console.log('Querying Treez Elasticsearch API...\n');
try {
const response = await axios.post(url, query, {
headers: { 'Content-Type': 'application/json' }
});
const data = response.data;
const total = data.hits?.total?.value || data.hits?.total;
const products = data.hits?.hits || [];
console.log('Total products: ' + total);
console.log('Products returned: ' + products.length + '\n');
if (products.length > 0) {
const first = products[0]._source;
console.log('=== PRODUCT FIELDS AVAILABLE ===\n');
console.log(Object.keys(first).sort().join('\n'));
console.log('\n=== SAMPLE PRODUCT ===\n');
console.log(JSON.stringify(first, null, 2));
}
} catch (err: any) {
console.log('Error: ' + err.message);
if (err.response) {
console.log('Status: ' + err.response.status);
console.log('Data: ' + JSON.stringify(err.response.data));
}
}
}
main();

View File

@@ -1,97 +0,0 @@
import axios from 'axios';
async function main() {
// Test Elasticsearch API with API key
console.log('=== ELASTICSEARCH API ===\n');
const esUrl = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
const apiKey = 'V3jHL9dFzi3Gj4UISM4lr38Nm0GSxcps5OBz1PbS';
const query = {
from: 0,
size: 1000,
query: {
bool: {
must: [
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
]
}
}
};
try {
const response = await axios.post(esUrl, query, {
headers: {
'Content-Type': 'application/json',
'x-api-key': apiKey,
'Origin': 'https://shop.bestdispensary.com',
'Referer': 'https://shop.bestdispensary.com/',
},
timeout: 30000,
});
const data = response.data;
const total = data.hits?.total?.value || data.hits?.total;
const products = data.hits?.hits || [];
console.log('Total products: ' + total);
console.log('Products returned: ' + products.length);
if (products.length > 0) {
const first = products[0]._source;
console.log('\n=== PRODUCT FIELDS ===\n');
console.log(Object.keys(first).sort().join('\n'));
console.log('\n=== SAMPLE PRODUCT ===\n');
console.log(JSON.stringify(first, null, 2));
}
} catch (err: any) {
console.log('Elasticsearch Error: ' + err.message);
if (err.response) {
console.log('Status: ' + err.response.status);
}
}
// Test Treez Headless API
console.log('\n\n=== TREEZ HEADLESS API ===\n');
const treezUrl = 'https://headless.treez.io/v2.0/dispensary/best/ecommerce/discounts?excludeInactive=true&hideUnset=true&includeProdInfo=true';
try {
const response = await axios.get(treezUrl, {
headers: {
'client_id': '29dce682258145c6b1cf71027282d083',
'client_secret': 'A57bB49AfD7F4233B1750a0B501B4E16',
'cache-control': 'max-age=0, no-cache, must-revalidate, proxy-revalidate',
'Origin': 'https://shop.bestdispensary.com',
'Referer': 'https://shop.bestdispensary.com/',
},
timeout: 30000,
});
const data = response.data;
console.log('Response type: ' + typeof data);
if (Array.isArray(data)) {
console.log('Array length: ' + data.length);
if (data.length > 0) {
console.log('First item: ' + JSON.stringify(data[0], null, 2).slice(0, 1000));
}
} else {
console.log('Keys: ' + Object.keys(data).join(', '));
console.log('Data: ' + JSON.stringify(data, null, 2).slice(0, 2000));
}
} catch (err: any) {
console.log('Treez Error: ' + err.message);
if (err.response) {
console.log('Status: ' + err.response.status);
console.log('Data: ' + JSON.stringify(err.response.data).slice(0, 500));
}
}
}
main();

View File

@@ -1,243 +0,0 @@
/**
* Visit each brand page and extract products
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function scrollToLoadAll(page: Page): Promise<void> {
let previousHeight = 0;
let sameCount = 0;
for (let i = 0; i < 30; i++) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
sameCount++;
if (sameCount >= 3) break;
} else {
sameCount = 0;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1000);
previousHeight = currentHeight;
}
}
async function extractProducts(page: Page): Promise<{ name: string; price: string; href: string }[]> {
return page.evaluate(() => {
const products: { name: string; price: string; href: string }[] = [];
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
const href = a.getAttribute('href') || '';
const img = a.querySelector('img');
const h5 = a.querySelector('h5');
const name = img?.getAttribute('alt') || h5?.textContent?.trim() || '';
if (!name || seen.has(name)) return;
seen.add(name);
const priceEl = a.querySelector('[class*="price"]');
const priceMatch = priceEl?.textContent?.match(/\$(\d+(?:\.\d{2})?)/);
const price = priceMatch ? priceMatch[1] : '';
products.push({ name, price, href });
});
return products;
});
}
async function main() {
console.log('='.repeat(60));
console.log('Extracting Products from All Brands');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
// Go to brands page and get all brand links
const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
console.log(`\n[1] Getting brand list from ${brandsUrl}`);
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
// The 142 items on /brands ARE brands (shown as product cards with brand info)
// Get the brand names from the product hrefs (they contain brand name in URL)
const brandInfo = await page.evaluate(() => {
const brands: { name: string; slug: string }[] = [];
const seen = new Set<string>();
// Extract brand info from product URLs
// URL pattern: /product/{brand}-{product}-{details}
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
const href = a.getAttribute('href') || '';
// Try to extract brand from URL - first segment before product name
const match = href.match(/\/product\/([^-]+(?:-[^-]+)?)-/);
if (match) {
const slug = match[1];
if (!seen.has(slug)) {
seen.add(slug);
// Also look for brand text in the card
const brandEl = a.querySelector('[class*="brand"], [class*="Brand"]');
const name = brandEl?.textContent?.trim() || slug;
brands.push({ name, slug });
}
}
});
return brands;
});
console.log(`Found ${brandInfo.length} potential brands from product URLs`);
console.log('Sample:', brandInfo.slice(0, 5));
// Actually, let's look for brand page links directly
console.log('\n[2] Looking for brand page links...');
const brandLinks = await page.evaluate(() => {
const links: { name: string; href: string }[] = [];
// Look for links to /brand/ pages
document.querySelectorAll('a[href*="/brand/"]').forEach(a => {
const href = a.getAttribute('href') || '';
const text = a.textContent?.trim() || '';
if (href && !links.some(l => l.href === href)) {
links.push({ name: text, href });
}
});
return links;
});
console.log(`Found ${brandLinks.length} brand page links`);
if (brandLinks.length > 0) {
console.log('Sample:', brandLinks.slice(0, 10));
}
// If no brand links, try to find them in section headers
console.log('\n[3] Looking for brand sections...');
const brandSections = await page.evaluate(() => {
const sections: { brandName: string; sampleProduct: string }[] = [];
document.querySelectorAll('[class*="products_product__section"]').forEach(section => {
const header = section.querySelector('h2, h3, [class*="heading"]');
const brandName = header?.textContent?.trim() || '';
const firstProduct = section.querySelector('a[href*="/product/"]');
const productName = firstProduct?.querySelector('h5')?.textContent?.trim() ||
firstProduct?.querySelector('img')?.getAttribute('alt') || '';
if (brandName) {
sections.push({ brandName, sampleProduct: productName });
}
});
return sections;
});
console.log(`Found ${brandSections.length} brand sections`);
brandSections.slice(0, 10).forEach(s => {
console.log(` - Brand: "${s.brandName}" | Sample: "${s.sampleProduct}"`);
});
// Try visiting a brand page directly using the section name
if (brandSections.length > 0) {
console.log('\n[4] Testing brand page URLs...');
// Try different URL patterns for first brand
const testBrand = brandSections[0].brandName;
const testSlug = testBrand.toLowerCase().replace(/[^a-z0-9]+/g, '-');
const urlPatterns = [
`/onlinemenu/brand/${encodeURIComponent(testBrand)}`,
`/onlinemenu/brand/${testSlug}`,
`/brand/${encodeURIComponent(testBrand)}`,
`/brand/${testSlug}`,
];
for (const path of urlPatterns) {
const testUrl = `https://${STORE_ID}.treez.io${path}?customerType=ADULT`;
try {
console.log(` Trying: ${testUrl}`);
await page.goto(testUrl, { waitUntil: 'networkidle2', timeout: 15000 });
await sleep(2000);
const products = await extractProducts(page);
console.log(` Products found: ${products.length}`);
if (products.length > 0) {
console.log(` ✓ Working URL pattern: ${path}`);
break;
}
} catch (e: any) {
console.log(` Error: ${e.message.slice(0, 50)}`);
}
}
}
// Check if clicking on a brand section leads to a brand page
console.log('\n[5] Checking if brand sections have clickable headers...');
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
const clickableHeaders = await page.evaluate(() => {
const results: { text: string; tag: string; href: string; clickable: boolean }[] = [];
document.querySelectorAll('[class*="products_product__section"] h2, [class*="products_product__section"] h3').forEach(header => {
const link = header.closest('a') || header.querySelector('a');
const text = header.textContent?.trim() || '';
const href = link?.getAttribute('href') || '';
results.push({
text,
tag: header.tagName,
href,
clickable: !!link,
});
});
return results;
});
console.log('Section headers:');
clickableHeaders.slice(0, 10).forEach(h => {
console.log(` [${h.tag}] "${h.text}" - ${h.clickable ? `Link: ${h.href}` : 'Not clickable'}`);
});
await browser.close();
}
main().catch(console.error);

View File

@@ -1,183 +0,0 @@
/**
* Detailed brand section analysis
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log(' Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function main() {
console.log('='.repeat(60));
console.log('Detailed Brand Section Analysis');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
console.log(`\nNavigating to ${url}`);
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
// Scroll multiple times to load all content
console.log('\n[1] Scrolling to load all content...');
let previousHeight = 0;
let scrollCount = 0;
for (let i = 0; i < 30; i++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
const productCount = await page.evaluate(() =>
document.querySelectorAll('a[href*="/product/"]').length
);
console.log(` Scroll ${i + 1}: height=${currentHeight}, products=${productCount}`);
if (currentHeight === previousHeight) {
scrollCount++;
if (scrollCount >= 3) break;
} else {
scrollCount = 0;
}
previousHeight = currentHeight;
}
// Look at ALL h2/h3 headers on page
console.log('\n[2] Finding ALL h2/h3 headers on page...');
const headers = await page.evaluate(() => {
const results: { tag: string; text: string; parentClass: string }[] = [];
document.querySelectorAll('h2, h3').forEach((el: Element) => {
results.push({
tag: el.tagName,
text: el.textContent?.trim().slice(0, 80) || '',
parentClass: el.parentElement?.className?.slice(0, 50) || '',
});
});
return results;
});
console.log(`Found ${headers.length} headers:`);
headers.forEach((h: { tag: string; text: string }) =>
console.log(` [${h.tag}] "${h.text}"`)
);
// Get products grouped by their section heading
console.log('\n[3] Getting products per section...');
const sectionProducts = await page.evaluate(() => {
const results: { heading: string; products: number }[] = [];
// Find all sections that contain products
document.querySelectorAll('[class*="products_product__section"]').forEach((section: Element) => {
const heading = section.querySelector('h2, h3');
const headingText = heading?.textContent?.trim() || 'Unknown';
const products = section.querySelectorAll('a[href*="/product/"]');
results.push({
heading: headingText,
products: products.length,
});
});
return results;
});
console.log(`Found ${sectionProducts.length} brand sections:`);
let totalProducts = 0;
sectionProducts.forEach((s: { heading: string; products: number }) => {
console.log(` ${s.heading}: ${s.products} products`);
totalProducts += s.products;
});
console.log(`\nTotal products across all sections: ${totalProducts}`);
// Also extract brand from each product's URL/card
console.log('\n[4] Extracting brand from product URLs/cards...');
const brandCounts = await page.evaluate(() => {
const byBrand: Record<string, number> = {};
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
const href = a.getAttribute('href') || '';
const img = a.querySelector('img');
const name = img?.getAttribute('alt') || '';
if (!name || seen.has(name)) return;
seen.add(name);
// Try to find brand from the card
const brandEl = a.querySelector('[class*="brand"], [class*="Brand"], span, p');
let brand = '';
// Try various methods to find brand
const allSpans = a.querySelectorAll('span, p');
allSpans.forEach((span: Element) => {
const text = span.textContent?.trim() || '';
if (text && text.length < 50 && text !== name && !text.includes('$')) {
if (!brand) brand = text;
}
});
// Fallback: get brand from parent section heading
if (!brand) {
const section = a.closest('[class*="products_product__section"]');
const heading = section?.querySelector('h2, h3');
brand = heading?.textContent?.trim() || 'Unknown';
}
byBrand[brand] = (byBrand[brand] || 0) + 1;
});
return byBrand;
});
console.log('Products by brand:');
Object.entries(brandCounts)
.sort((a, b) => (b[1] as number) - (a[1] as number))
.forEach(([brand, count]) => {
console.log(` ${brand}: ${count}`);
});
const uniqueTotal = Object.values(brandCounts).reduce((sum: number, c) => sum + (c as number), 0);
console.log(`\nTotal unique products: ${uniqueTotal}`);
await browser.close();
}
main().catch(console.error);

View File

@@ -1,257 +0,0 @@
/**
* Test Treez brand-based product extraction
* 1. Load /brands page
* 2. Click "load more brands" to get all brands
* 3. Extract brand URLs
* 4. Visit each brand and extract products
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('[AgeGate] Detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function loadAllBrands(page: Page): Promise<void> {
console.log('[Brands] Looking for "load more" option...');
// Look for select/dropdown with "load more" or "all brands" option
const selectInfo = await page.evaluate(() => {
const selects = document.querySelectorAll('select');
const info: { selector: string; options: string[] }[] = [];
selects.forEach((sel, i) => {
const options = Array.from(sel.options).map(o => o.text);
info.push({ selector: `select:nth-of-type(${i + 1})`, options });
});
return info;
});
console.log('[Brands] Found selects:', JSON.stringify(selectInfo, null, 2));
// Look for any button or link with "load more" or "show all"
const loadMoreButtons = await page.evaluate(() => {
const elements = document.querySelectorAll('button, a, [role="button"]');
const matches: { text: string; tag: string }[] = [];
elements.forEach(el => {
const text = el.textContent?.toLowerCase() || '';
if (text.includes('load more') || text.includes('show all') || text.includes('view all')) {
matches.push({ text: el.textContent?.trim() || '', tag: el.tagName });
}
});
return matches;
});
console.log('[Brands] Found load more buttons:', loadMoreButtons);
// Try to find and interact with the brands dropdown
// First, let's see all interactive elements with "brand" in them
const brandElements = await page.evaluate(() => {
const all = document.querySelectorAll('*');
const matches: { tag: string; class: string; text: string }[] = [];
all.forEach(el => {
const className = el.className?.toString?.() || '';
const text = el.textContent?.trim().slice(0, 100) || '';
if (className.toLowerCase().includes('brand') || className.toLowerCase().includes('select')) {
matches.push({
tag: el.tagName,
class: className.slice(0, 100),
text: text.slice(0, 50),
});
}
});
return matches.slice(0, 20);
});
console.log('[Brands] Brand-related elements:', JSON.stringify(brandElements.slice(0, 10), null, 2));
}
async function extractBrandLinks(page: Page): Promise<{ name: string; url: string }[]> {
const brands = await page.evaluate(() => {
const links: { name: string; url: string }[] = [];
// Look for brand cards/links
const selectors = [
'a[href*="/brand/"]',
'a[href*="/brands/"]',
'[class*="brand"] a',
'[class*="Brand"] a',
];
selectors.forEach(sel => {
document.querySelectorAll(sel).forEach(el => {
const href = el.getAttribute('href');
const name = el.textContent?.trim() || '';
if (href && name && !links.some(l => l.url === href)) {
links.push({ name, url: href });
}
});
});
return links;
});
return brands;
}
async function extractProductsFromBrandPage(page: Page): Promise<any[]> {
// Scroll to load all products
let previousHeight = 0;
let scrollCount = 0;
let sameHeightCount = 0;
while (scrollCount < 20) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
sameHeightCount++;
if (sameHeightCount >= 3) break;
} else {
sameHeightCount = 0;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1000);
previousHeight = currentHeight;
scrollCount++;
}
// Extract products
const products = await page.evaluate(() => {
const results: any[] = [];
const seen = new Set<string>();
document.querySelectorAll('[class*="product_product__"]').forEach(el => {
const nameEl = el.querySelector('[class*="product__name"], [class*="name__"]');
const name = nameEl?.textContent?.trim() || '';
if (!name || seen.has(name)) return;
seen.add(name);
const priceEl = el.querySelector('[class*="price"]');
const priceText = priceEl?.textContent || '';
const priceMatch = priceText.match(/\$(\d+(?:\.\d{2})?)/);
const price = priceMatch ? parseFloat(priceMatch[1]) : null;
const linkEl = el.querySelector('a[href*="/product/"]');
let productId = '';
if (linkEl) {
const href = linkEl.getAttribute('href') || '';
const match = href.match(/\/product\/([^\/?]+)/);
productId = match ? match[1] : '';
}
results.push({
productId: productId || `treez_${name.replace(/\s+/g, '_').toLowerCase().slice(0, 30)}`,
name,
price,
});
});
return results;
});
return products;
}
async function main() {
console.log('='.repeat(60));
console.log('Testing Treez Brand-Based Extraction');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Block images
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
try {
// Navigate to brands page
const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
console.log(`\n[1] Navigating to ${brandsUrl}`);
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(2000);
await bypassAgeGate(page);
await sleep(1000);
// Screenshot to see what we're working with
await page.screenshot({ path: '/tmp/treez-brands-page.png', fullPage: false });
console.log('[1] Screenshot saved to /tmp/treez-brands-page.png');
// Try to load all brands
console.log('\n[2] Exploring brand selection options...');
await loadAllBrands(page);
// Extract brand links
console.log('\n[3] Extracting brand links...');
const brandLinks = await extractBrandLinks(page);
console.log(`Found ${brandLinks.length} brand links:`);
brandLinks.slice(0, 10).forEach(b => console.log(` - ${b.name}: ${b.url}`));
// If we found brand links, visit a couple to test
if (brandLinks.length > 0) {
console.log('\n[4] Testing product extraction from first 3 brands...');
let totalProducts = 0;
const allProducts: any[] = [];
for (const brand of brandLinks.slice(0, 3)) {
const brandUrl = brand.url.startsWith('http')
? brand.url
: `https://${STORE_ID}.treez.io${brand.url}`;
console.log(`\n Visiting brand: ${brand.name}`);
console.log(` URL: ${brandUrl}`);
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
const products = await extractProductsFromBrandPage(page);
console.log(` Products found: ${products.length}`);
allProducts.push(...products.map(p => ({ ...p, brand: brand.name })));
totalProducts += products.length;
}
console.log(`\n[5] Summary from 3 brands: ${totalProducts} products`);
console.log(`Estimated total (${brandLinks.length} brands): ~${Math.round(totalProducts / 3 * brandLinks.length)} products`);
}
} catch (error: any) {
console.error('Error:', error.message);
} finally {
await browser.close();
}
}
main().catch(console.error);

View File

@@ -1,113 +0,0 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Capture request headers for API calls
const apiRequests: any[] = [];
page.on('request', (req) => {
const url = req.url();
if (url.includes('treez.io') || url.includes('gapcommerce')) {
apiRequests.push({
url: url,
method: req.method(),
headers: req.headers(),
postData: req.postData(),
});
}
});
console.log('Loading page to capture API auth headers...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
console.log('=== API REQUESTS WITH HEADERS ===\n');
apiRequests.forEach((req, i) => {
console.log((i+1) + '. ' + req.method + ' ' + req.url.slice(0, 100));
console.log(' Headers:');
Object.entries(req.headers).forEach(([k, v]) => {
if (k.toLowerCase().includes('auth') ||
k.toLowerCase().includes('token') ||
k.toLowerCase().includes('key') ||
k.toLowerCase().includes('api') ||
k.toLowerCase() === 'authorization' ||
k.toLowerCase() === 'x-api-key') {
console.log(' >>> ' + k + ': ' + v);
}
});
// Show all headers for treez.io requests
if (req.url.includes('headless.treez.io')) {
console.log(' ALL HEADERS:');
Object.entries(req.headers).forEach(([k, v]) => {
console.log(' ' + k + ': ' + String(v).slice(0, 80));
});
}
console.log('');
});
// Also check for API keys in page scripts
console.log('=== CHECKING FOR API KEYS IN PAGE ===\n');
const pageData = await page.evaluate(() => {
const data: any = {};
// Check window object for API keys
const win = window as any;
if (win.__NEXT_DATA__) {
data.nextData = win.__NEXT_DATA__;
}
// Check for any global config
if (win.config || win.CONFIG) {
data.config = win.config || win.CONFIG;
}
// Look for treez-related globals
Object.keys(win).forEach(key => {
if (key.toLowerCase().includes('treez') ||
key.toLowerCase().includes('api') ||
key.toLowerCase().includes('config')) {
try {
data[key] = JSON.stringify(win[key]).slice(0, 500);
} catch {}
}
});
return data;
});
if (pageData.nextData?.props?.pageProps) {
console.log('Next.js pageProps keys: ' + Object.keys(pageData.nextData.props.pageProps).join(', '));
}
if (pageData.nextData?.runtimeConfig) {
console.log('Runtime config: ' + JSON.stringify(pageData.nextData.runtimeConfig).slice(0, 500));
}
await browser.close();
}
main();

View File

@@ -1,100 +0,0 @@
import puppeteer from 'puppeteer';
import fs from 'fs';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Capture ES API responses
let allProductData: any[] = [];
page.on('response', async (res) => {
const url = res.url();
if (url.includes('gapcommerceapi.com/product/search') && res.status() === 200) {
try {
const json = await res.json();
const products = json.hits?.hits?.map((h: any) => h._source) || [];
allProductData = allProductData.concat(products);
console.log('Captured ' + products.length + ' products (total: ' + allProductData.length + ')');
} catch {}
}
});
console.log('Loading /shop page to capture product data...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Click load more many times to get all products
console.log('\nClicking Load More to capture all products...');
for (let i = 0; i < 50; i++) {
const btn = await page.$('button.collection__load-more');
if (!btn) {
console.log('No more Load More button');
break;
}
const isVisible = await page.evaluate((b) => {
const rect = b.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
}, btn);
if (!isVisible) {
console.log('Load More not visible');
break;
}
await btn.click();
await sleep(1500);
console.log('Click ' + (i+1) + ': ' + allProductData.length + ' total products');
}
console.log('\n=== RESULTS ===\n');
console.log('Total products captured: ' + allProductData.length);
if (allProductData.length > 0) {
// Dedupe by some ID
const seen = new Set();
const unique = allProductData.filter(p => {
const id = p.id || p.productId || p.name;
if (seen.has(id)) return false;
seen.add(id);
return true;
});
console.log('Unique products: ' + unique.length);
console.log('\n=== PRODUCT FIELDS ===\n');
console.log(Object.keys(unique[0]).sort().join('\n'));
console.log('\n=== SAMPLE PRODUCT ===\n');
console.log(JSON.stringify(unique[0], null, 2));
// Save to file
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(unique, null, 2));
console.log('\nSaved to /tmp/treez-products.json');
}
await browser.close();
}
main();

View File

@@ -1,88 +0,0 @@
import puppeteer from 'puppeteer';
import fs from 'fs';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Capture ES API responses as text
let allProducts: any[] = [];
page.on('response', async (res) => {
const url = res.url();
if (url.includes('gapcommerceapi.com/product/search')) {
console.log('ES Response: status=' + res.status());
if (res.status() === 200) {
try {
const text = await res.text();
console.log('Response length: ' + text.length);
const json = JSON.parse(text);
const products = json.hits?.hits?.map((h: any) => h._source) || [];
allProducts = allProducts.concat(products);
console.log('Got ' + products.length + ' products (total: ' + allProducts.length + ')');
} catch (err: any) {
console.log('Parse error: ' + err.message);
}
}
}
});
console.log('Loading page...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(5000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Bypassing age gate...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(3000);
}
// Wait for initial products to load
await sleep(3000);
console.log('\nInitial products captured: ' + allProducts.length);
// Try scrolling to trigger more loads
console.log('\nScrolling...');
for (let i = 0; i < 20; i++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
// Also click load more if present
try {
await page.click('button.collection__load-more');
console.log('Clicked load more');
} catch {}
}
console.log('\n=== FINAL RESULTS ===\n');
console.log('Total products: ' + allProducts.length);
if (allProducts.length > 0) {
console.log('\nFields: ' + Object.keys(allProducts[0]).sort().join(', '));
console.log('\nSample:\n' + JSON.stringify(allProducts[0], null, 2));
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(allProducts, null, 2));
console.log('\nSaved to /tmp/treez-products.json');
}
await browser.close();
}
main();

View File

@@ -1,192 +0,0 @@
/**
* Navigate to each category page and count products
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function scrollToLoadAll(page: Page): Promise<void> {
let previousHeight = 0;
let scrollCount = 0;
let sameCount = 0;
while (scrollCount < 50) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
sameCount++;
if (sameCount >= 3) break;
} else {
sameCount = 0;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
previousHeight = currentHeight;
scrollCount++;
}
}
async function countProducts(page: Page): Promise<number> {
return page.evaluate(() => {
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
const img = a.querySelector('img');
const name = img?.getAttribute('alt') || a.querySelector('h5')?.textContent?.trim() || '';
if (name) seen.add(name);
});
return seen.size;
});
}
async function main() {
console.log('='.repeat(60));
console.log('Testing Treez Category Pages');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
// Categories from the nav menu
const categories = [
'cartridges',
'flower',
'pre-rolls',
'edibles',
'extracts',
'tinctures',
'capsules',
'topicals',
'accessories',
'drink',
];
const results: { category: string; products: number }[] = [];
let ageGateBypassed = false;
for (const category of categories) {
// Try different URL patterns
const urls = [
`https://${STORE_ID}.treez.io/onlinemenu/${category}?customerType=ADULT`,
`https://${STORE_ID}.treez.io/onlinemenu/category/${category}?customerType=ADULT`,
`https://${STORE_ID}.treez.io/${category}?customerType=ADULT`,
];
for (const url of urls) {
try {
console.log(`\nTrying: ${url}`);
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
if (!ageGateBypassed) {
await bypassAgeGate(page);
ageGateBypassed = true;
await sleep(1000);
}
const initialCount = await countProducts(page);
if (initialCount > 0) {
console.log(` Initial: ${initialCount} products`);
await scrollToLoadAll(page);
const finalCount = await countProducts(page);
console.log(` After scroll: ${finalCount} products`);
results.push({ category, products: finalCount });
break; // Found working URL, move to next category
} else {
console.log(` No products found`);
}
} catch (error: any) {
console.log(` Error: ${error.message}`);
}
}
}
// Also try the main shop page
console.log('\nTrying main shop page...');
try {
const shopUrl = `https://${STORE_ID}.treez.io/onlinemenu/shop?customerType=ADULT`;
await page.goto(shopUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
const initialCount = await countProducts(page);
console.log(`Shop page initial: ${initialCount} products`);
if (initialCount > 0) {
await scrollToLoadAll(page);
const finalCount = await countProducts(page);
console.log(`Shop page after scroll: ${finalCount} products`);
results.push({ category: 'shop', products: finalCount });
}
} catch (error: any) {
console.log(`Shop page error: ${error.message}`);
}
// Try to find and click on category links from the nav
console.log('\n[Alternative] Trying to find nav category links...');
const homeUrl = `https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`;
await page.goto(homeUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(1000);
const navLinks = await page.evaluate(() => {
const links: { text: string; href: string }[] = [];
document.querySelectorAll('nav a, [class*="nav"] a').forEach(a => {
const text = a.textContent?.trim() || '';
const href = a.getAttribute('href') || '';
if (href && text && !links.some(l => l.href === href)) {
links.push({ text, href });
}
});
return links;
});
console.log('Nav links found:');
navLinks.forEach(l => console.log(` - "${l.text}" → ${l.href}`));
// Summary
console.log('\n' + '='.repeat(60));
console.log('Summary');
console.log('='.repeat(60));
let total = 0;
results.forEach(r => {
console.log(`${r.category}: ${r.products} products`);
total += r.products;
});
console.log(`\nTotal across categories: ${total} products`);
await browser.close();
}
main().catch(console.error);

View File

@@ -1,178 +0,0 @@
/**
* ============================================================
* TREEZ CLIENT TEST SCRIPT
* ============================================================
*
* Tests the Treez CDP interception client using Best Dispensary.
*
* This verifies:
* - Stealth plugin bypasses headless detection
* - CDP intercepts Elasticsearch API responses
* - Products are captured and normalized correctly
* - Inventory data is available
*
* Usage: npx ts-node scripts/test-treez-client.ts
*
* ============================================================
*/
import { fetchProductsFromUrl } from '../src/platforms/treez';
const TEST_URL = 'https://shop.bestdispensary.com/shop';
async function main() {
console.log('='.repeat(60));
console.log('TREEZ CLIENT TEST - CDP INTERCEPTION');
console.log('='.repeat(60));
console.log(`URL: ${TEST_URL}`);
console.log('Method: Puppeteer + Stealth + CDP response capture');
console.log('');
try {
console.log('[Starting] Launching browser with Stealth plugin...\n');
const result = await fetchProductsFromUrl(TEST_URL);
console.log('\n' + '='.repeat(60));
console.log('RESULTS');
console.log('='.repeat(60));
console.log(`Total products: ${result.totalCaptured}`);
console.log(`Store ID: ${result.storeId || 'N/A (custom domain)'}`);
console.log(`Source URL: ${result.sourceUrl}`);
console.log(`Fetched at: ${result.fetchedAt.toISOString()}`);
if (result.products.length === 0) {
console.log('\n[WARNING] No products captured!');
console.log('This could mean:');
console.log(' - Stealth plugin is not bypassing detection');
console.log(' - CDP is not intercepting the correct URLs');
console.log(' - Page structure has changed');
process.exit(1);
}
// Show sample raw product
console.log('\n' + '='.repeat(60));
console.log('SAMPLE RAW PRODUCT (from Elasticsearch)');
console.log('='.repeat(60));
const raw = result.products[0];
console.log(JSON.stringify({
id: raw.id,
name: raw.name,
menuTitle: raw.menuTitle,
brand: raw.brand,
category: raw.category,
subtype: raw.subtype,
status: raw.status,
availableUnits: raw.availableUnits,
customMinPrice: raw.customMinPrice,
customMaxPrice: raw.customMaxPrice,
isActive: raw.isActive,
isAboveThreshold: raw.isAboveThreshold,
}, null, 2));
// Show sample normalized product
console.log('\n' + '='.repeat(60));
console.log('SAMPLE NORMALIZED PRODUCT');
console.log('='.repeat(60));
const normalized = result.normalized[0];
console.log(JSON.stringify({
id: normalized.id,
name: normalized.name,
brand: normalized.brand,
category: normalized.category,
subtype: normalized.subtype,
price: normalized.price,
priceMin: normalized.priceMin,
priceMax: normalized.priceMax,
discountedPrice: normalized.discountedPrice,
discountPercent: normalized.discountPercent,
availableUnits: normalized.availableUnits,
inStock: normalized.inStock,
thcPercent: normalized.thcPercent,
cbdPercent: normalized.cbdPercent,
strainType: normalized.strainType,
effects: normalized.effects,
flavors: normalized.flavors,
imageUrl: normalized.imageUrl,
images: normalized.images?.slice(0, 2),
}, null, 2));
// Brand breakdown
console.log('\n' + '='.repeat(60));
console.log('BRANDS (top 15)');
console.log('='.repeat(60));
const brandCounts = new Map<string, number>();
for (const p of result.normalized) {
const brand = p.brand || 'Unknown';
brandCounts.set(brand, (brandCounts.get(brand) || 0) + 1);
}
const sorted = [...brandCounts.entries()].sort((a, b) => b[1] - a[1]);
console.log(`Total unique brands: ${sorted.length}\n`);
sorted.slice(0, 15).forEach(([brand, count]) => {
console.log(` ${brand}: ${count} products`);
});
// Category breakdown
console.log('\n' + '='.repeat(60));
console.log('CATEGORIES');
console.log('='.repeat(60));
const categoryCounts = new Map<string, number>();
for (const p of result.normalized) {
const cat = p.category || 'Unknown';
categoryCounts.set(cat, (categoryCounts.get(cat) || 0) + 1);
}
const catSorted = [...categoryCounts.entries()].sort((a, b) => b[1] - a[1]);
catSorted.forEach(([cat, count]) => {
console.log(` ${cat}: ${count} products`);
});
// Inventory stats
console.log('\n' + '='.repeat(60));
console.log('INVENTORY STATS');
console.log('='.repeat(60));
const inStock = result.normalized.filter(p => p.inStock).length;
const outOfStock = result.normalized.filter(p => !p.inStock).length;
const hasInventoryData = result.normalized.filter(p => p.availableUnits > 0).length;
console.log(`In stock: ${inStock}`);
console.log(`Out of stock: ${outOfStock}`);
console.log(`With inventory levels: ${hasInventoryData}`);
// Show inventory examples
if (hasInventoryData > 0) {
console.log('\nSample inventory levels:');
result.normalized
.filter(p => p.availableUnits > 0)
.slice(0, 5)
.forEach(p => {
console.log(` ${p.name}: ${p.availableUnits} units`);
});
}
// Check for THC/CBD data
const hasThc = result.normalized.filter(p => p.thcPercent !== null).length;
const hasCbd = result.normalized.filter(p => p.cbdPercent !== null).length;
console.log(`\nWith THC data: ${hasThc} (${Math.round(hasThc / result.totalCaptured * 100)}%)`);
console.log(`With CBD data: ${hasCbd} (${Math.round(hasCbd / result.totalCaptured * 100)}%)`);
// Check for images
const hasImages = result.normalized.filter(p => p.imageUrl).length;
console.log(`With images: ${hasImages} (${Math.round(hasImages / result.totalCaptured * 100)}%)`);
console.log('\n' + '='.repeat(60));
console.log('TEST PASSED');
console.log('='.repeat(60));
} catch (error: any) {
console.error('\n' + '='.repeat(60));
console.error('TEST FAILED');
console.error('='.repeat(60));
console.error(`Error: ${error.message}`);
console.error(error.stack);
process.exit(1);
}
}
main().catch(console.error);

View File

@@ -1,160 +0,0 @@
/**
* Find the correct product card container selector
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function main() {
console.log('Finding Treez product card containers...\n');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
// Find product card containers by looking for elements that contain both name AND price
const analysis = await page.evaluate(() => {
// Strategy: find all H5 elements (which contain names), then get their parent containers
const nameElements = document.querySelectorAll('h5.product_product__name__JcEk0');
const containers: Map<string, { count: number; sample: string }> = new Map();
nameElements.forEach(nameEl => {
// Walk up to find the product card container
let current = nameEl.parentElement;
let depth = 0;
while (current && depth < 10) {
const className = current.className?.toString?.() || '';
// Look for ProductCard in the class name
if (className.includes('ProductCard')) {
const key = className.slice(0, 100);
const existing = containers.get(key) || { count: 0, sample: '' };
existing.count++;
if (!existing.sample) {
existing.sample = current.outerHTML.slice(0, 300);
}
containers.set(key, existing);
break;
}
current = current.parentElement;
depth++;
}
});
return Array.from(containers.entries()).map(([cls, data]) => ({
class: cls,
count: data.count,
sample: data.sample,
}));
});
console.log('Product card containers found:');
analysis.forEach(({ class: cls, count, sample }) => {
console.log(`\n[${count}x] ${cls}`);
console.log(`Sample: ${sample.slice(0, 200)}...`);
});
// Now test various container selectors
console.log('\n\n--- Testing container selectors ---');
const selectorTests = await page.evaluate(() => {
const tests: Record<string, { total: number; withName: number; withPrice: number }> = {};
const selectors = [
'[class*="ProductCardWithBtn"]',
'[class*="ProductCard_product"]',
'[class*="ProductCard__"]',
'article[class*="product"]',
'div[class*="ProductCard"]',
'a[class*="ProductCard"]',
'[class*="product_product__"][class*="link"]',
'article',
];
selectors.forEach(sel => {
const elements = document.querySelectorAll(sel);
let withName = 0;
let withPrice = 0;
elements.forEach(el => {
if (el.querySelector('h5, [class*="product__name"]')) withName++;
if (el.querySelector('[class*="price"]')) withPrice++;
});
tests[sel] = { total: elements.length, withName, withPrice };
});
return tests;
});
Object.entries(selectorTests).forEach(([sel, { total, withName, withPrice }]) => {
console.log(`${sel}: ${total} total, ${withName} with name, ${withPrice} with price`);
});
// Get the actual product card class pattern
console.log('\n\n--- Finding exact product card class ---');
const exactClasses = await page.evaluate(() => {
// Find elements that have both h5 name AND price child
const allElements = document.querySelectorAll('*');
const matches: { tag: string; class: string }[] = [];
allElements.forEach(el => {
const hasName = el.querySelector('h5.product_product__name__JcEk0');
const hasPrice = el.querySelector('[class*="price__ins"], [class*="price__"]');
if (hasName && hasPrice) {
const className = el.className?.toString?.() || '';
if (className && !matches.some(m => m.class === className)) {
matches.push({ tag: el.tagName, class: className.slice(0, 150) });
}
}
});
return matches;
});
console.log('Elements containing both name and price:');
exactClasses.forEach(({ tag, class: cls }) => {
console.log(` [${tag}] ${cls}`);
});
await browser.close();
}
main().catch(console.error);

Some files were not shown because too many files have changed in this diff Show More