Compare commits
1 Commits
master
...
fix/ci-fil
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cdab44c757 |
7
.gitignore
vendored
7
.gitignore
vendored
@@ -51,10 +51,3 @@ coverage/
|
||||
*.tmp
|
||||
*.temp
|
||||
llm-scraper/
|
||||
|
||||
# Claude Code
|
||||
.claude/
|
||||
|
||||
# Test/debug scripts
|
||||
backend/scripts/test-*.ts
|
||||
backend/scripts/run-*.ts
|
||||
|
||||
189
.woodpecker.yml
189
.woodpecker.yml
@@ -1,189 +0,0 @@
|
||||
steps:
|
||||
# ===========================================
|
||||
# PR VALIDATION: Parallel type checks (PRs only)
|
||||
# ===========================================
|
||||
typecheck-backend:
|
||||
image: node:22
|
||||
commands:
|
||||
- cd backend
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
typecheck-cannaiq:
|
||||
image: node:22
|
||||
commands:
|
||||
- cd cannaiq
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
typecheck-findadispo:
|
||||
image: node:22
|
||||
commands:
|
||||
- cd findadispo/frontend
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit 2>/dev/null || true
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
typecheck-findagram:
|
||||
image: node:22
|
||||
commands:
|
||||
- cd findagram/frontend
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit 2>/dev/null || true
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
# ===========================================
|
||||
# AUTO-MERGE: Merge PR after all checks pass
|
||||
# ===========================================
|
||||
auto-merge:
|
||||
image: alpine:latest
|
||||
environment:
|
||||
GITEA_TOKEN:
|
||||
from_secret: gitea_token
|
||||
commands:
|
||||
- apk add --no-cache curl
|
||||
- |
|
||||
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||
curl -s -X POST \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"Do":"merge"}' \
|
||||
"https://git.spdy.io/api/v1/repos/Creationshop/cannaiq/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||
depends_on:
|
||||
- typecheck-backend
|
||||
- typecheck-cannaiq
|
||||
- typecheck-findadispo
|
||||
- typecheck-findagram
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
# ===========================================
|
||||
# DOCKER: Multi-stage builds with layer caching
|
||||
# ===========================================
|
||||
docker-backend:
|
||||
image: gcr.io/kaniko-project/executor:debug
|
||||
commands:
|
||||
- /kaniko/executor
|
||||
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/backend
|
||||
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/backend/Dockerfile
|
||||
--destination=registry.spdy.io/cannaiq/backend:latest
|
||||
--destination=registry.spdy.io/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8}
|
||||
--build-arg=APP_BUILD_VERSION=sha-${CI_COMMIT_SHA:0:8}
|
||||
--build-arg=APP_GIT_SHA=${CI_COMMIT_SHA}
|
||||
--build-arg=APP_BUILD_TIME=${CI_PIPELINE_CREATED}
|
||||
--cache=true
|
||||
--cache-repo=registry.spdy.io/cannaiq/cache-backend
|
||||
--cache-ttl=168h
|
||||
depends_on: []
|
||||
when:
|
||||
branch: [master, develop]
|
||||
event: push
|
||||
|
||||
docker-cannaiq:
|
||||
image: gcr.io/kaniko-project/executor:debug
|
||||
commands:
|
||||
- /kaniko/executor
|
||||
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/cannaiq
|
||||
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/cannaiq/Dockerfile
|
||||
--destination=registry.spdy.io/cannaiq/frontend:latest
|
||||
--destination=registry.spdy.io/cannaiq/frontend:sha-${CI_COMMIT_SHA:0:8}
|
||||
--cache=true
|
||||
--cache-repo=registry.spdy.io/cannaiq/cache-cannaiq
|
||||
--cache-ttl=168h
|
||||
depends_on: []
|
||||
when:
|
||||
branch: [master, develop]
|
||||
event: push
|
||||
|
||||
docker-findadispo:
|
||||
image: gcr.io/kaniko-project/executor:debug
|
||||
commands:
|
||||
- /kaniko/executor
|
||||
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findadispo/frontend
|
||||
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findadispo/frontend/Dockerfile
|
||||
--destination=registry.spdy.io/cannaiq/findadispo:latest
|
||||
--destination=registry.spdy.io/cannaiq/findadispo:sha-${CI_COMMIT_SHA:0:8}
|
||||
--cache=true
|
||||
--cache-repo=registry.spdy.io/cannaiq/cache-findadispo
|
||||
--cache-ttl=168h
|
||||
depends_on: []
|
||||
when:
|
||||
branch: [master, develop]
|
||||
event: push
|
||||
|
||||
docker-findagram:
|
||||
image: gcr.io/kaniko-project/executor:debug
|
||||
commands:
|
||||
- /kaniko/executor
|
||||
--context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findagram/frontend
|
||||
--dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findagram/frontend/Dockerfile
|
||||
--destination=registry.spdy.io/cannaiq/findagram:latest
|
||||
--destination=registry.spdy.io/cannaiq/findagram:sha-${CI_COMMIT_SHA:0:8}
|
||||
--cache=true
|
||||
--cache-repo=registry.spdy.io/cannaiq/cache-findagram
|
||||
--cache-ttl=168h
|
||||
depends_on: []
|
||||
when:
|
||||
branch: [master, develop]
|
||||
event: push
|
||||
|
||||
# ===========================================
|
||||
# DEPLOY: Pull from local registry
|
||||
# ===========================================
|
||||
deploy:
|
||||
image: bitnami/kubectl:latest
|
||||
environment:
|
||||
K8S_TOKEN:
|
||||
from_secret: k8s_token
|
||||
commands:
|
||||
- mkdir -p ~/.kube
|
||||
- |
|
||||
cat > ~/.kube/config << KUBEEOF
|
||||
apiVersion: v1
|
||||
kind: Config
|
||||
clusters:
|
||||
- cluster:
|
||||
certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkakNDQVIyZ0F3SUJBZ0lCQURBS0JnZ3Foa2pPUFFRREFqQWpNU0V3SHdZRFZRUUREQmhyTTNNdGMyVnkKZG1WeUxXTmhRREUzTmpVM05UUTNPRE13SGhjTk1qVXhNakUwTWpNeU5qSXpXaGNOTXpVeE1qRXlNak15TmpJegpXakFqTVNFd0h3WURWUVFEREJock0zTXRjMlZ5ZG1WeUxXTmhRREUzTmpVM05UUTNPRE13V1RBVEJnY3Foa2pPClBRSUJCZ2dxaGtqT1BRTUJCd05DQUFRWDRNdFJRTW5lWVJVV0s2cjZ3VEV2WjAxNnV4T3NUR3JJZ013TXVnNGwKajQ1bHZ6ZkM1WE1NY1pESnUxZ0t1dVJhVGxlb0xVOVJnSERIUUI4TUwzNTJvMEl3UURBT0JnTlZIUThCQWY4RQpCQU1DQXFRd0R3WURWUjBUQVFIL0JBVXdBd0VCL3pBZEJnTlZIUTRFRmdRVXIzNDZpNE42TFhzaEZsREhvSlU0CjJ1RjZseGN3Q2dZSUtvWkl6ajBFQXdJRFJ3QXdSQUlnVUtqdWRFQWJyS1JDVHROVXZTc1Rmb3FEaHFSeDM5MkYKTFFSVWlKK0hCVElDSUJqOFIxbG1zSnFSRkRHMEpwMGN4OG5ZZnFCaElRQzh6WWdRdTdBZmR4L3IKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
|
||||
server: https://10.100.6.10:6443
|
||||
name: spdy-k3s
|
||||
contexts:
|
||||
- context:
|
||||
cluster: spdy-k3s
|
||||
namespace: cannaiq
|
||||
user: cannaiq-admin
|
||||
name: cannaiq
|
||||
current-context: cannaiq
|
||||
users:
|
||||
- name: cannaiq-admin
|
||||
user:
|
||||
token: $K8S_TOKEN
|
||||
KUBEEOF
|
||||
- chmod 600 ~/.kube/config
|
||||
# Apply manifests to ensure probes and resource limits are set
|
||||
- kubectl apply -f /woodpecker/src/git.spdy.io/Creationshop/cannaiq/k8s/scraper.yaml
|
||||
- kubectl apply -f /woodpecker/src/git.spdy.io/Creationshop/cannaiq/k8s/scraper-worker.yaml
|
||||
- kubectl set image deployment/scraper scraper=registry.spdy.io/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||
- kubectl rollout status deployment/scraper -n cannaiq --timeout=300s
|
||||
- kubectl set image deployment/scraper-worker worker=registry.spdy.io/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=registry.spdy.io/cannaiq/frontend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=registry.spdy.io/cannaiq/findadispo:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||
- kubectl set image deployment/findagram-frontend findagram-frontend=registry.spdy.io/cannaiq/findagram:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
|
||||
- kubectl rollout status deployment/cannaiq-frontend -n cannaiq --timeout=300s
|
||||
depends_on:
|
||||
- docker-backend
|
||||
- docker-cannaiq
|
||||
- docker-findadispo
|
||||
- docker-findagram
|
||||
when:
|
||||
branch: [master, develop]
|
||||
event: push
|
||||
213
.woodpecker/ci.yml
Normal file
213
.woodpecker/ci.yml
Normal file
@@ -0,0 +1,213 @@
|
||||
steps:
|
||||
# ===========================================
|
||||
# PR VALIDATION: Parallel type checks (PRs only)
|
||||
# ===========================================
|
||||
typecheck-backend:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- cd backend
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
typecheck-cannaiq:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- cd cannaiq
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
typecheck-findadispo:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- cd findadispo/frontend
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit 2>/dev/null || true
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
typecheck-findagram:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- cd findagram/frontend
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit 2>/dev/null || true
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
# ===========================================
|
||||
# AUTO-MERGE: Merge PR after all checks pass
|
||||
# ===========================================
|
||||
auto-merge:
|
||||
image: alpine:latest
|
||||
environment:
|
||||
GITEA_TOKEN:
|
||||
from_secret: gitea_token
|
||||
commands:
|
||||
- apk add --no-cache curl
|
||||
- |
|
||||
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||
curl -s -X POST \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"Do":"merge"}' \
|
||||
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||
depends_on:
|
||||
- typecheck-backend
|
||||
- typecheck-cannaiq
|
||||
- typecheck-findadispo
|
||||
- typecheck-findagram
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
# ===========================================
|
||||
# MASTER DEPLOY: Parallel Docker builds
|
||||
# ===========================================
|
||||
docker-backend:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: backend/Dockerfile
|
||||
context: backend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
build_args:
|
||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
||||
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
||||
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-cannaiq:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: cannaiq/Dockerfile
|
||||
context: cannaiq
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-findadispo:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: findadispo/frontend/Dockerfile
|
||||
context: findadispo/frontend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-findagram:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/findagram-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: findagram/frontend/Dockerfile
|
||||
context: findagram/frontend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
# ===========================================
|
||||
# STAGE 3: Run Database Migrations (before deploy)
|
||||
# ===========================================
|
||||
migrate:
|
||||
image: code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8}
|
||||
environment:
|
||||
CANNAIQ_DB_HOST:
|
||||
from_secret: db_host
|
||||
CANNAIQ_DB_PORT:
|
||||
from_secret: db_port
|
||||
CANNAIQ_DB_NAME:
|
||||
from_secret: db_name
|
||||
CANNAIQ_DB_USER:
|
||||
from_secret: db_user
|
||||
CANNAIQ_DB_PASS:
|
||||
from_secret: db_pass
|
||||
commands:
|
||||
- cd /app
|
||||
- node dist/db/migrate.js
|
||||
depends_on:
|
||||
- docker-backend
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
# ===========================================
|
||||
# STAGE 4: Deploy (after migrations)
|
||||
# ===========================================
|
||||
deploy:
|
||||
image: bitnami/kubectl:latest
|
||||
environment:
|
||||
KUBECONFIG_CONTENT:
|
||||
from_secret: kubeconfig_data
|
||||
commands:
|
||||
- mkdir -p ~/.kube
|
||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||
- chmod 600 ~/.kube/config
|
||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||
depends_on:
|
||||
- migrate
|
||||
- docker-cannaiq
|
||||
- docker-findadispo
|
||||
- docker-findagram
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
@@ -1,33 +1,17 @@
|
||||
# Build stage
|
||||
# Image: git.spdy.io/creationshop/dispensary-scraper
|
||||
FROM node:22-slim AS builder
|
||||
|
||||
# Install build tools for native modules (bcrypt, sharp)
|
||||
RUN apt-get update && apt-get install -y \
|
||||
python3 \
|
||||
build-essential \
|
||||
--no-install-recommends \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
# Image: code.cannabrands.app/creationshop/dispensary-scraper
|
||||
FROM code.cannabrands.app/creationshop/node:20-slim AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json ./
|
||||
|
||||
# Install dependencies with retry and fallback registry
|
||||
RUN npm config set fetch-retries 3 && \
|
||||
npm config set fetch-retry-mintimeout 20000 && \
|
||||
npm config set fetch-retry-maxtimeout 120000 && \
|
||||
npm install || \
|
||||
(npm config set registry https://registry.npmmirror.com && npm install)
|
||||
RUN npm install
|
||||
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
# Prune dev dependencies for smaller production image
|
||||
RUN npm prune --production
|
||||
|
||||
# Production stage
|
||||
FROM node:22-slim
|
||||
FROM code.cannabrands.app/creationshop/node:20-slim
|
||||
|
||||
# Build arguments for version info
|
||||
ARG APP_BUILD_VERSION=dev
|
||||
@@ -41,10 +25,9 @@ ENV APP_GIT_SHA=${APP_GIT_SHA}
|
||||
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
||||
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
||||
|
||||
# Install Chromium dependencies, curl, and CA certificates for HTTPS
|
||||
# Install Chromium dependencies and curl for HTTP requests
|
||||
RUN apt-get update && apt-get install -y \
|
||||
curl \
|
||||
ca-certificates \
|
||||
chromium \
|
||||
fonts-liberation \
|
||||
libnss3 \
|
||||
@@ -61,7 +44,8 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json ./
|
||||
COPY --from=builder /app/node_modules ./node_modules
|
||||
RUN npm install --omit=dev
|
||||
|
||||
COPY --from=builder /app/dist ./dist
|
||||
|
||||
# Copy migrations for auto-migrate on startup
|
||||
|
||||
@@ -1,268 +0,0 @@
|
||||
# CannaiQ Backend Codebase Map
|
||||
|
||||
**Last Updated:** 2025-12-12
|
||||
**Purpose:** Help Claude and developers understand which code is current vs deprecated
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference: What to Use
|
||||
|
||||
### For Crawling/Scraping
|
||||
| Task | Use This | NOT This |
|
||||
|------|----------|----------|
|
||||
| Fetch products | `src/tasks/handlers/payload-fetch.ts` | `src/hydration/*` |
|
||||
| Process products | `src/tasks/handlers/product-refresh.ts` | `src/scraper-v2/*` |
|
||||
| GraphQL client | `src/platforms/dutchie/client.ts` | `src/dutchie-az/services/graphql-client.ts` |
|
||||
| Worker system | `src/tasks/task-worker.ts` | `src/dutchie-az/services/worker.ts` |
|
||||
|
||||
### For Database
|
||||
| Task | Use This | NOT This |
|
||||
|------|----------|----------|
|
||||
| Get DB pool | `src/db/pool.ts` | `src/dutchie-az/db/connection.ts` |
|
||||
| Run migrations | `src/db/migrate.ts` (CLI only) | Never import at runtime |
|
||||
| Query products | `store_products` table | `products`, `dutchie_products` |
|
||||
| Query stores | `dispensaries` table | `stores` table |
|
||||
|
||||
### For Discovery
|
||||
| Task | Use This |
|
||||
|------|----------|
|
||||
| Discover stores | `src/discovery/*.ts` |
|
||||
| Run discovery | `npx tsx src/scripts/run-discovery.ts` |
|
||||
|
||||
---
|
||||
|
||||
## Directory Status
|
||||
|
||||
### ACTIVE DIRECTORIES (Use These)
|
||||
|
||||
```
|
||||
src/
|
||||
├── auth/ # JWT/session auth, middleware
|
||||
├── db/ # Database pool, migrations
|
||||
├── discovery/ # Dutchie store discovery pipeline
|
||||
├── middleware/ # Express middleware
|
||||
├── multi-state/ # Multi-state query support
|
||||
├── platforms/ # Platform-specific clients (Dutchie, Jane, etc)
|
||||
│ └── dutchie/ # THE Dutchie client - use this one
|
||||
├── routes/ # Express API routes
|
||||
├── services/ # Core services (logger, scheduler, etc)
|
||||
├── tasks/ # Task system (workers, handlers, scheduler)
|
||||
│ └── handlers/ # Task handlers (payload_fetch, product_refresh, etc)
|
||||
├── types/ # TypeScript types
|
||||
└── utils/ # Utilities (storage, image processing)
|
||||
```
|
||||
|
||||
### DEPRECATED DIRECTORIES (DO NOT USE)
|
||||
|
||||
```
|
||||
src/
|
||||
├── hydration/ # DEPRECATED - Old pipeline approach
|
||||
├── scraper-v2/ # DEPRECATED - Old scraper engine
|
||||
├── canonical-hydration/# DEPRECATED - Merged into tasks/handlers
|
||||
├── dutchie-az/ # PARTIAL - Some parts deprecated, some active
|
||||
│ ├── db/ # DEPRECATED - Use src/db/pool.ts
|
||||
│ └── services/ # PARTIAL - worker.ts still runs, graphql-client.ts deprecated
|
||||
├── portals/ # FUTURE - Not yet implemented
|
||||
├── seo/ # PARTIAL - Settings work, templates WIP
|
||||
└── system/ # DEPRECATED - Old orchestration system
|
||||
```
|
||||
|
||||
### DEPRECATED FILES (DO NOT USE)
|
||||
|
||||
```
|
||||
src/dutchie-az/db/connection.ts # Use src/db/pool.ts instead
|
||||
src/dutchie-az/services/graphql-client.ts # Use src/platforms/dutchie/client.ts
|
||||
src/hydration/*.ts # Entire directory deprecated
|
||||
src/scraper-v2/*.ts # Entire directory deprecated
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Key Files Reference
|
||||
|
||||
### Entry Points
|
||||
| File | Purpose | Status |
|
||||
|------|---------|--------|
|
||||
| `src/index.ts` | Main Express server | ACTIVE |
|
||||
| `src/dutchie-az/services/worker.ts` | Worker process entry | ACTIVE |
|
||||
| `src/tasks/task-worker.ts` | Task worker (new system) | ACTIVE |
|
||||
|
||||
### Dutchie Integration
|
||||
| File | Purpose | Status |
|
||||
|------|---------|--------|
|
||||
| `src/platforms/dutchie/client.ts` | GraphQL client, hashes, curl | **PRIMARY** |
|
||||
| `src/platforms/dutchie/queries.ts` | High-level query functions | ACTIVE |
|
||||
| `src/platforms/dutchie/index.ts` | Re-exports | ACTIVE |
|
||||
|
||||
### Task Handlers
|
||||
| File | Purpose | Status |
|
||||
|------|---------|--------|
|
||||
| `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
|
||||
| `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
|
||||
| `src/tasks/handlers/entry-point-discovery.ts` | Resolve platform IDs (auto-healing) | **PRIMARY** |
|
||||
| `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
|
||||
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs (legacy) | LEGACY |
|
||||
| `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
|
||||
|
||||
---
|
||||
|
||||
## Transport Rules (CRITICAL)
|
||||
|
||||
**Browser-based (Puppeteer) is the DEFAULT transport. curl is ONLY allowed when explicitly specified.**
|
||||
|
||||
### Transport Selection
|
||||
| `task.method` | Transport Used | Notes |
|
||||
|---------------|----------------|-------|
|
||||
| `null` | Browser (Puppeteer) | DEFAULT - use this for most tasks |
|
||||
| `'http'` | Browser (Puppeteer) | Explicit browser request |
|
||||
| `'curl'` | curl-impersonate | ONLY when explicitly needed |
|
||||
|
||||
### Why Browser-First?
|
||||
1. **Anti-detection**: Puppeteer with StealthPlugin evades bot detection
|
||||
2. **Session cookies**: Browser maintains session state automatically
|
||||
3. **Fingerprinting**: Real browser fingerprint (TLS, headers, etc.)
|
||||
4. **Age gates**: Browser can click through age verification
|
||||
|
||||
### Entry Point Discovery Auto-Healing
|
||||
The `entry_point_discovery` handler uses a healing strategy:
|
||||
|
||||
```
|
||||
1. FIRST: Check dutchie_discovery_locations for existing platform_location_id
|
||||
- By linked dutchie_discovery_id
|
||||
- By slug match in discovery data
|
||||
→ If found, NO network call needed
|
||||
|
||||
2. SECOND: Browser-based GraphQL (Puppeteer)
|
||||
- 5x retries for network/proxy failures
|
||||
- On HTTP 403: rotate proxy and retry
|
||||
- On HTTP 404 after 2 attempts: mark as 'removed'
|
||||
|
||||
3. HARD FAILURE: After exhausting options → 'needs_investigation'
|
||||
```
|
||||
|
||||
### DO NOT Use curl Unless:
|
||||
- Task explicitly has `method = 'curl'`
|
||||
- You're testing curl-impersonate binaries
|
||||
- The API explicitly requires curl fingerprinting
|
||||
|
||||
### Files
|
||||
| File | Transport | Purpose |
|
||||
|------|-----------|---------|
|
||||
| `src/services/puppeteer-preflight.ts` | Browser | Preflight check |
|
||||
| `src/services/curl-preflight.ts` | curl | Preflight check |
|
||||
| `src/tasks/handlers/entry-point-discovery.ts` | Browser | Platform ID resolution |
|
||||
| `src/tasks/handlers/payload-fetch.ts` | Both | Product fetching |
|
||||
|
||||
### Database
|
||||
| File | Purpose | Status |
|
||||
|------|---------|--------|
|
||||
| `src/db/pool.ts` | Canonical DB pool | **PRIMARY** |
|
||||
| `src/db/migrate.ts` | Migration runner (CLI only) | CLI ONLY |
|
||||
| `src/db/auto-migrate.ts` | Auto-run migrations on startup | ACTIVE |
|
||||
|
||||
### Configuration
|
||||
| File | Purpose | Status |
|
||||
|------|---------|--------|
|
||||
| `.env` | Environment variables | ACTIVE |
|
||||
| `package.json` | Dependencies | ACTIVE |
|
||||
| `tsconfig.json` | TypeScript config | ACTIVE |
|
||||
|
||||
---
|
||||
|
||||
## GraphQL Hashes (CRITICAL)
|
||||
|
||||
The correct hashes are in `src/platforms/dutchie/client.ts`:
|
||||
|
||||
```typescript
|
||||
export const GRAPHQL_HASHES = {
|
||||
FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0',
|
||||
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
||||
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
|
||||
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
|
||||
};
|
||||
```
|
||||
|
||||
**ALWAYS** use `Status: 'Active'` for FilteredProducts (not `null` or `'All'`).
|
||||
|
||||
---
|
||||
|
||||
## Scripts Reference
|
||||
|
||||
### Useful Scripts (in `src/scripts/`)
|
||||
| Script | Purpose |
|
||||
|--------|---------|
|
||||
| `run-discovery.ts` | Run Dutchie discovery |
|
||||
| `crawl-single-store.ts` | Test crawl a single store |
|
||||
| `test-dutchie-graphql.ts` | Test GraphQL queries |
|
||||
|
||||
### One-Off Scripts (probably don't need)
|
||||
| Script | Purpose |
|
||||
|--------|---------|
|
||||
| `harmonize-az-dispensaries.ts` | One-time data cleanup |
|
||||
| `bootstrap-stores-for-dispensaries.ts` | One-time migration |
|
||||
| `backfill-*.ts` | Historical backfill scripts |
|
||||
|
||||
---
|
||||
|
||||
## API Routes
|
||||
|
||||
### Active Routes (in `src/routes/`)
|
||||
| Route File | Mount Point | Purpose |
|
||||
|------------|-------------|---------|
|
||||
| `auth.ts` | `/api/auth` | Login/logout/session |
|
||||
| `stores.ts` | `/api/stores` | Store CRUD |
|
||||
| `dashboard.ts` | `/api/dashboard` | Dashboard stats |
|
||||
| `workers.ts` | `/api/workers` | Worker monitoring |
|
||||
| `pipeline.ts` | `/api/pipeline` | Crawl triggers |
|
||||
| `discovery.ts` | `/api/discovery` | Discovery management |
|
||||
| `analytics.ts` | `/api/analytics` | Analytics queries |
|
||||
| `wordpress.ts` | `/api/v1/wordpress` | WordPress plugin API |
|
||||
|
||||
---
|
||||
|
||||
## Documentation Files
|
||||
|
||||
### Current Docs (in `backend/docs/`)
|
||||
| Doc | Purpose | Currency |
|
||||
|-----|---------|----------|
|
||||
| `TASK_WORKFLOW_2024-12-10.md` | Task system architecture | CURRENT |
|
||||
| `WORKER_TASK_ARCHITECTURE.md` | Worker/task design | CURRENT |
|
||||
| `CRAWL_PIPELINE.md` | Crawl pipeline overview | CURRENT |
|
||||
| `ORGANIC_SCRAPING_GUIDE.md` | Browser-based scraping | CURRENT |
|
||||
| `CODEBASE_MAP.md` | This file | CURRENT |
|
||||
| `ANALYTICS_V2_EXAMPLES.md` | Analytics API examples | CURRENT |
|
||||
| `BRAND_INTELLIGENCE_API.md` | Brand API docs | CURRENT |
|
||||
|
||||
### Root Docs
|
||||
| Doc | Purpose | Currency |
|
||||
|-----|---------|----------|
|
||||
| `CLAUDE.md` | Claude instructions | **PRIMARY** |
|
||||
| `README.md` | Project overview | NEEDS UPDATE |
|
||||
|
||||
---
|
||||
|
||||
## Common Mistakes to Avoid
|
||||
|
||||
1. **Don't use `src/hydration/`** - It's an old approach that was superseded by the task system
|
||||
|
||||
2. **Don't use `src/dutchie-az/db/connection.ts`** - Use `src/db/pool.ts` instead
|
||||
|
||||
3. **Don't import `src/db/migrate.ts` at runtime** - It will crash. Only use for CLI migrations.
|
||||
|
||||
4. **Don't query `stores` table** - It's empty. Use `dispensaries`.
|
||||
|
||||
5. **Don't query `products` table** - It's empty. Use `store_products`.
|
||||
|
||||
6. **Don't use wrong GraphQL hash** - Always get hash from `GRAPHQL_HASHES` in client.ts
|
||||
|
||||
7. **Don't use `Status: null`** - It returns 0 products. Use `Status: 'Active'`.
|
||||
|
||||
---
|
||||
|
||||
## When in Doubt
|
||||
|
||||
1. Check if the file is imported in `src/index.ts` - if not, it may be deprecated
|
||||
2. Check the last modified date - older files may be stale
|
||||
3. Look for `DEPRECATED` comments in the code
|
||||
4. Ask: "Is there a newer version of this in `src/tasks/` or `src/platforms/`?"
|
||||
5. Read the relevant doc in `docs/` before modifying code
|
||||
@@ -1,343 +0,0 @@
|
||||
# CannaiQ Query API
|
||||
|
||||
Query raw crawl payload data with flexible filters, sorting, and aggregation.
|
||||
|
||||
## Base URL
|
||||
|
||||
```
|
||||
https://cannaiq.co/api/payloads
|
||||
```
|
||||
|
||||
## Authentication
|
||||
|
||||
Include your API key in the header:
|
||||
```
|
||||
X-API-Key: your-api-key
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Endpoints
|
||||
|
||||
### 1. Query Products
|
||||
|
||||
Filter and search products from a store's latest crawl data.
|
||||
|
||||
```
|
||||
GET /api/payloads/store/{dispensaryId}/query
|
||||
```
|
||||
|
||||
#### Query Parameters
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `brand` | string | Filter by brand name (partial match) |
|
||||
| `category` | string | Filter by category (flower, vape, edible, etc.) |
|
||||
| `subcategory` | string | Filter by subcategory |
|
||||
| `strain_type` | string | Filter by strain (indica, sativa, hybrid, cbd) |
|
||||
| `in_stock` | boolean | Filter by stock status (true/false) |
|
||||
| `price_min` | number | Minimum price |
|
||||
| `price_max` | number | Maximum price |
|
||||
| `thc_min` | number | Minimum THC percentage |
|
||||
| `thc_max` | number | Maximum THC percentage |
|
||||
| `search` | string | Search product name (partial match) |
|
||||
| `fields` | string | Comma-separated fields to return |
|
||||
| `limit` | number | Max results (default 100, max 1000) |
|
||||
| `offset` | number | Skip results for pagination |
|
||||
| `sort` | string | Sort by: name, price, thc, brand |
|
||||
| `order` | string | Sort order: asc, desc |
|
||||
|
||||
#### Available Fields
|
||||
|
||||
When using `fields` parameter, you can request:
|
||||
- `id` - Product ID
|
||||
- `name` - Product name
|
||||
- `brand` - Brand name
|
||||
- `category` - Product category
|
||||
- `subcategory` - Product subcategory
|
||||
- `strain_type` - Indica/Sativa/Hybrid/CBD
|
||||
- `price` - Current price
|
||||
- `price_med` - Medical price
|
||||
- `price_rec` - Recreational price
|
||||
- `thc` - THC percentage
|
||||
- `cbd` - CBD percentage
|
||||
- `weight` - Product weight/size
|
||||
- `status` - Stock status
|
||||
- `in_stock` - Boolean in-stock flag
|
||||
- `image_url` - Product image
|
||||
- `description` - Product description
|
||||
|
||||
#### Examples
|
||||
|
||||
**Get all flower products under $40:**
|
||||
```
|
||||
GET /api/payloads/store/112/query?category=flower&price_max=40
|
||||
```
|
||||
|
||||
**Search for "Blue Dream" with high THC:**
|
||||
```
|
||||
GET /api/payloads/store/112/query?search=blue+dream&thc_min=20
|
||||
```
|
||||
|
||||
**Get only name and price for Alien Labs products:**
|
||||
```
|
||||
GET /api/payloads/store/112/query?brand=Alien+Labs&fields=name,price,thc
|
||||
```
|
||||
|
||||
**Get top 10 highest THC products:**
|
||||
```
|
||||
GET /api/payloads/store/112/query?sort=thc&order=desc&limit=10
|
||||
```
|
||||
|
||||
**Paginate through in-stock products:**
|
||||
```
|
||||
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=0
|
||||
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=50
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"dispensaryId": 112,
|
||||
"payloadId": 45,
|
||||
"fetchedAt": "2025-12-11T10:30:00Z",
|
||||
"query": {
|
||||
"filters": {
|
||||
"brand": "Alien Labs",
|
||||
"category": null,
|
||||
"price_max": null
|
||||
},
|
||||
"sort": "price",
|
||||
"order": "asc",
|
||||
"limit": 100,
|
||||
"offset": 0
|
||||
},
|
||||
"pagination": {
|
||||
"total": 15,
|
||||
"returned": 15,
|
||||
"limit": 100,
|
||||
"offset": 0,
|
||||
"has_more": false
|
||||
},
|
||||
"products": [
|
||||
{
|
||||
"id": "507f1f77bcf86cd799439011",
|
||||
"name": "Alien Labs - Baklava 3.5g",
|
||||
"brand": "Alien Labs",
|
||||
"category": "flower",
|
||||
"strain_type": "hybrid",
|
||||
"price": 55,
|
||||
"thc": "28.5",
|
||||
"in_stock": true
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. Aggregate Data
|
||||
|
||||
Group products and calculate metrics.
|
||||
|
||||
```
|
||||
GET /api/payloads/store/{dispensaryId}/aggregate
|
||||
```
|
||||
|
||||
#### Query Parameters
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `group_by` | string | **Required.** Field to group by: brand, category, subcategory, strain_type |
|
||||
| `metrics` | string | Comma-separated metrics (default: count) |
|
||||
|
||||
#### Available Metrics
|
||||
|
||||
- `count` - Number of products
|
||||
- `avg_price` - Average price
|
||||
- `min_price` - Lowest price
|
||||
- `max_price` - Highest price
|
||||
- `avg_thc` - Average THC percentage
|
||||
- `in_stock_count` - Number of in-stock products
|
||||
|
||||
#### Examples
|
||||
|
||||
**Count products by brand:**
|
||||
```
|
||||
GET /api/payloads/store/112/aggregate?group_by=brand
|
||||
```
|
||||
|
||||
**Get price stats by category:**
|
||||
```
|
||||
GET /api/payloads/store/112/aggregate?group_by=category&metrics=count,avg_price,min_price,max_price
|
||||
```
|
||||
|
||||
**Get THC averages by strain type:**
|
||||
```
|
||||
GET /api/payloads/store/112/aggregate?group_by=strain_type&metrics=count,avg_thc
|
||||
```
|
||||
|
||||
**Brand analysis with stock info:**
|
||||
```
|
||||
GET /api/payloads/store/112/aggregate?group_by=brand&metrics=count,avg_price,in_stock_count
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"dispensaryId": 112,
|
||||
"payloadId": 45,
|
||||
"fetchedAt": "2025-12-11T10:30:00Z",
|
||||
"groupBy": "brand",
|
||||
"metrics": ["count", "avg_price"],
|
||||
"totalProducts": 450,
|
||||
"groupCount": 85,
|
||||
"aggregations": [
|
||||
{
|
||||
"brand": "Alien Labs",
|
||||
"count": 15,
|
||||
"avg_price": 52.33
|
||||
},
|
||||
{
|
||||
"brand": "Connected",
|
||||
"count": 12,
|
||||
"avg_price": 48.50
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3. Compare Stores (Price Comparison)
|
||||
|
||||
Query the same data from multiple stores and compare in your app:
|
||||
|
||||
```javascript
|
||||
// Get flower prices from Store A
|
||||
const storeA = await fetch('/api/payloads/store/112/query?category=flower&fields=name,brand,price');
|
||||
|
||||
// Get flower prices from Store B
|
||||
const storeB = await fetch('/api/payloads/store/115/query?category=flower&fields=name,brand,price');
|
||||
|
||||
// Compare in your app
|
||||
const dataA = await storeA.json();
|
||||
const dataB = await storeB.json();
|
||||
|
||||
// Find matching products and compare prices
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4. Price History
|
||||
|
||||
For historical price data, use the snapshots endpoint:
|
||||
|
||||
```
|
||||
GET /api/v1/products/{productId}/history?days=30
|
||||
```
|
||||
|
||||
Or compare payloads over time:
|
||||
|
||||
```
|
||||
GET /api/payloads/store/{dispensaryId}/diff?from={payloadId1}&to={payloadId2}
|
||||
```
|
||||
|
||||
The diff endpoint shows:
|
||||
- Products added
|
||||
- Products removed
|
||||
- Price changes
|
||||
- Stock changes
|
||||
|
||||
---
|
||||
|
||||
### 5. List Stores
|
||||
|
||||
Get available dispensaries to query:
|
||||
|
||||
```
|
||||
GET /api/stores
|
||||
```
|
||||
|
||||
Returns all stores with their IDs, names, and locations.
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Price Comparison App
|
||||
|
||||
```javascript
|
||||
// 1. Get stores in Arizona
|
||||
const stores = await fetch('/api/stores?state=AZ').then(r => r.json());
|
||||
|
||||
// 2. Query flower prices from each store
|
||||
const prices = await Promise.all(
|
||||
stores.map(store =>
|
||||
fetch(`/api/payloads/store/${store.id}/query?category=flower&fields=name,brand,price`)
|
||||
.then(r => r.json())
|
||||
)
|
||||
);
|
||||
|
||||
// 3. Build comparison matrix in your app
|
||||
```
|
||||
|
||||
### Brand Analytics Dashboard
|
||||
|
||||
```javascript
|
||||
// Get brand presence across stores
|
||||
const brandData = await Promise.all(
|
||||
storeIds.map(id =>
|
||||
fetch(`/api/payloads/store/${id}/aggregate?group_by=brand&metrics=count,avg_price`)
|
||||
.then(r => r.json())
|
||||
)
|
||||
);
|
||||
|
||||
// Aggregate brand presence across all stores
|
||||
```
|
||||
|
||||
### Deal Finder
|
||||
|
||||
```javascript
|
||||
// Find high-THC flower under $30
|
||||
const deals = await fetch(
|
||||
'/api/payloads/store/112/query?category=flower&price_max=30&thc_min=20&in_stock=true&sort=thc&order=desc'
|
||||
).then(r => r.json());
|
||||
```
|
||||
|
||||
### Inventory Tracker
|
||||
|
||||
```javascript
|
||||
// Get products that went out of stock
|
||||
const diff = await fetch('/api/payloads/store/112/diff').then(r => r.json());
|
||||
|
||||
const outOfStock = diff.details.stockChanges.filter(
|
||||
p => p.newStatus !== 'Active'
|
||||
);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rate Limits
|
||||
|
||||
- Default: 100 requests/minute per API key
|
||||
- Contact support for higher limits
|
||||
|
||||
## Error Responses
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"error": "Error message here"
|
||||
}
|
||||
```
|
||||
|
||||
Common errors:
|
||||
- `404` - Store or payload not found
|
||||
- `400` - Missing required parameter
|
||||
- `401` - Invalid or missing API key
|
||||
- `429` - Rate limit exceeded
|
||||
@@ -362,245 +362,6 @@ SET status = 'pending', retry_count = retry_count + 1
|
||||
WHERE status = 'failed' AND retry_count < max_retries;
|
||||
```
|
||||
|
||||
## Concurrent Task Processing (Added 2024-12)
|
||||
|
||||
Workers can now process multiple tasks concurrently within a single worker instance. This improves throughput by utilizing async I/O efficiently.
|
||||
|
||||
### Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Pod (K8s) │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ TaskWorker │ │
|
||||
│ │ │ │
|
||||
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
|
||||
│ │ │ Task 1 │ │ Task 2 │ │ Task 3 │ (concurrent)│ │
|
||||
│ │ └─────────┘ └─────────┘ └─────────┘ │ │
|
||||
│ │ │ │
|
||||
│ │ Resource Monitor │ │
|
||||
│ │ ├── Memory: 65% (threshold: 85%) │ │
|
||||
│ │ ├── CPU: 45% (threshold: 90%) │ │
|
||||
│ │ └── Status: Normal │ │
|
||||
│ └─────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `MAX_CONCURRENT_TASKS` | 3 | Maximum tasks a worker will run concurrently |
|
||||
| `MEMORY_BACKOFF_THRESHOLD` | 0.85 | Back off when heap memory exceeds 85% |
|
||||
| `CPU_BACKOFF_THRESHOLD` | 0.90 | Back off when CPU exceeds 90% |
|
||||
| `BACKOFF_DURATION_MS` | 10000 | How long to wait when backing off (10s) |
|
||||
|
||||
### How It Works
|
||||
|
||||
1. **Main Loop**: Worker continuously tries to fill up to `MAX_CONCURRENT_TASKS`
|
||||
2. **Resource Monitoring**: Before claiming a new task, worker checks memory and CPU
|
||||
3. **Backoff**: If resources exceed thresholds, worker pauses and stops claiming new tasks
|
||||
4. **Concurrent Execution**: Tasks run in parallel using `Promise` - they don't block each other
|
||||
5. **Graceful Shutdown**: On SIGTERM/decommission, worker stops claiming but waits for active tasks
|
||||
|
||||
### Resource Monitoring
|
||||
|
||||
```typescript
|
||||
// ResourceStats interface
|
||||
interface ResourceStats {
|
||||
memoryPercent: number; // Current heap usage as decimal (0.0-1.0)
|
||||
memoryMb: number; // Current heap used in MB
|
||||
memoryTotalMb: number; // Total heap available in MB
|
||||
cpuPercent: number; // CPU usage as percentage (0-100)
|
||||
isBackingOff: boolean; // True if worker is in backoff state
|
||||
backoffReason: string; // Why the worker is backing off
|
||||
}
|
||||
```
|
||||
|
||||
### Heartbeat Data
|
||||
|
||||
Workers report the following in their heartbeat:
|
||||
|
||||
```json
|
||||
{
|
||||
"worker_id": "worker-abc123",
|
||||
"current_task_id": 456,
|
||||
"current_task_ids": [456, 457, 458],
|
||||
"active_task_count": 3,
|
||||
"max_concurrent_tasks": 3,
|
||||
"status": "active",
|
||||
"resources": {
|
||||
"memory_mb": 256,
|
||||
"memory_total_mb": 512,
|
||||
"memory_rss_mb": 320,
|
||||
"memory_percent": 50,
|
||||
"cpu_user_ms": 12500,
|
||||
"cpu_system_ms": 3200,
|
||||
"cpu_percent": 45,
|
||||
"is_backing_off": false,
|
||||
"backoff_reason": null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Backoff Behavior
|
||||
|
||||
When resources exceed thresholds:
|
||||
|
||||
1. Worker logs the backoff reason:
|
||||
```
|
||||
[TaskWorker] MyWorker backing off: Memory at 87.3% (threshold: 85%)
|
||||
```
|
||||
|
||||
2. Worker stops claiming new tasks but continues existing tasks
|
||||
|
||||
3. After `BACKOFF_DURATION_MS`, worker rechecks resources
|
||||
|
||||
4. When resources return to normal:
|
||||
```
|
||||
[TaskWorker] MyWorker resuming normal operation
|
||||
```
|
||||
|
||||
### UI Display
|
||||
|
||||
The Workers Dashboard shows:
|
||||
|
||||
- **Tasks Column**: `2/3 tasks` (active/max concurrent)
|
||||
- **Resources Column**: Memory % and CPU % with color coding
|
||||
- Green: < 50%
|
||||
- Yellow: 50-74%
|
||||
- Amber: 75-89%
|
||||
- Red: 90%+
|
||||
- **Backing Off**: Orange warning badge when worker is in backoff state
|
||||
|
||||
### Task Count Badge Details
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ Worker: "MyWorker" │
|
||||
│ Tasks: 2/3 tasks #456, #457 │
|
||||
│ Resources: 🧠 65% 💻 45% │
|
||||
│ Status: ● Active │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Best Practices
|
||||
|
||||
1. **Start Conservative**: Use `MAX_CONCURRENT_TASKS=3` initially
|
||||
2. **Monitor Resources**: Watch for frequent backoffs in logs
|
||||
3. **Tune Per Workload**: I/O-bound tasks benefit from higher concurrency
|
||||
4. **Scale Horizontally**: Add more pods rather than cranking concurrency too high
|
||||
|
||||
### Code References
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `src/tasks/task-worker.ts:68-71` | Concurrency environment variables |
|
||||
| `src/tasks/task-worker.ts:104-111` | ResourceStats interface |
|
||||
| `src/tasks/task-worker.ts:149-179` | getResourceStats() method |
|
||||
| `src/tasks/task-worker.ts:184-196` | shouldBackOff() method |
|
||||
| `src/tasks/task-worker.ts:462-516` | mainLoop() with concurrent claiming |
|
||||
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
|
||||
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
|
||||
|
||||
## Browser Task Memory Limits (Updated 2025-12)
|
||||
|
||||
Browser-based tasks (Puppeteer/Chrome) have strict memory constraints that limit concurrency.
|
||||
|
||||
### Why Browser Tasks Are Different
|
||||
|
||||
Each browser task launches a Chrome process. Unlike I/O-bound API calls, browsers consume significant RAM:
|
||||
|
||||
| Component | RAM Usage |
|
||||
|-----------|-----------|
|
||||
| Node.js runtime | ~150 MB |
|
||||
| Chrome browser (base) | ~200-250 MB |
|
||||
| Dutchie menu page (loaded) | ~100-150 MB |
|
||||
| **Per browser total** | **~350-450 MB** |
|
||||
|
||||
### Memory Math for Pod Limits
|
||||
|
||||
```
|
||||
Pod memory limit: 2 GB (2000 MB)
|
||||
Node.js runtime: -150 MB
|
||||
Safety buffer: -100 MB
|
||||
────────────────────────────────
|
||||
Available for browsers: 1750 MB
|
||||
|
||||
Per browser + page: ~400 MB
|
||||
|
||||
Max browsers: 1750 ÷ 400 = ~4 browsers
|
||||
|
||||
Recommended: 3 browsers (leaves headroom for spikes)
|
||||
```
|
||||
|
||||
### MAX_CONCURRENT_TASKS for Browser Tasks
|
||||
|
||||
| Browsers per Pod | RAM Used | Risk Level |
|
||||
|------------------|----------|------------|
|
||||
| 1 | ~500 MB | Very safe |
|
||||
| 2 | ~900 MB | Safe |
|
||||
| **3** | **~1.3 GB** | **Recommended** |
|
||||
| 4 | ~1.7 GB | Tight (may OOM) |
|
||||
| 5+ | >2 GB | Will OOM crash |
|
||||
|
||||
**CRITICAL**: `MAX_CONCURRENT_TASKS=3` is the maximum safe value for browser tasks with current pod limits.
|
||||
|
||||
### Scaling Strategy
|
||||
|
||||
Scale **horizontally** (more pods) rather than vertically (more concurrency per pod):
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────┐
|
||||
│ Cluster: 8 pods × 3 browsers = 24 concurrent tasks │
|
||||
│ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ Pod 0 │ │ Pod 1 │ │ Pod 2 │ │ Pod 3 │ │
|
||||
│ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ Pod 4 │ │ Pod 5 │ │ Pod 6 │ │ Pod 7 │ │
|
||||
│ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Browser Lifecycle Per Task
|
||||
|
||||
Each task gets a fresh browser with fresh IP/identity:
|
||||
|
||||
```
|
||||
1. Claim task from queue
|
||||
2. Get fresh proxy from pool
|
||||
3. Launch browser with proxy
|
||||
4. Run preflight (verify IP)
|
||||
5. Execute scrape
|
||||
6. Close browser
|
||||
7. Repeat
|
||||
```
|
||||
|
||||
This ensures:
|
||||
- Fresh IP per task (proxy rotation)
|
||||
- Fresh fingerprint per task (UA rotation)
|
||||
- No cookie/session bleed between tasks
|
||||
- Predictable memory usage
|
||||
|
||||
### Increasing Capacity
|
||||
|
||||
To handle more concurrent tasks:
|
||||
|
||||
1. **Add more pods** (up to 8 per CLAUDE.md limit)
|
||||
2. **Increase pod memory** (allows 4 browsers per pod):
|
||||
```yaml
|
||||
resources:
|
||||
limits:
|
||||
memory: "2.5Gi" # from 2Gi
|
||||
```
|
||||
|
||||
**DO NOT** simply increase `MAX_CONCURRENT_TASKS` without also increasing pod memory limits.
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Logs
|
||||
@@ -1,297 +0,0 @@
|
||||
# Organic Browser-Based Scraping Guide
|
||||
|
||||
**Last Updated:** 2025-12-12
|
||||
**Status:** Production-ready proof of concept
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
This document describes the "organic" browser-based approach to scraping Dutchie dispensary menus. Unlike direct curl/axios requests, this method uses a real browser session to make API calls, making requests appear natural and reducing detection risk.
|
||||
|
||||
---
|
||||
|
||||
## Why Organic Scraping?
|
||||
|
||||
| Approach | Detection Risk | Speed | Complexity |
|
||||
|----------|---------------|-------|------------|
|
||||
| Direct curl | Higher | Fast | Low |
|
||||
| curl-impersonate | Medium | Fast | Medium |
|
||||
| **Browser-based (organic)** | **Lowest** | Slower | Higher |
|
||||
|
||||
Direct curl requests can be fingerprinted via:
|
||||
- TLS fingerprint (cipher suites, extensions)
|
||||
- Header order and values
|
||||
- Missing cookies/session data
|
||||
- Request patterns
|
||||
|
||||
Browser-based requests inherit:
|
||||
- Real Chrome TLS fingerprint
|
||||
- Session cookies from page visit
|
||||
- Natural header order
|
||||
- JavaScript execution environment
|
||||
|
||||
---
|
||||
|
||||
## Implementation
|
||||
|
||||
### Dependencies
|
||||
|
||||
```bash
|
||||
npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
|
||||
```
|
||||
|
||||
### Core Script: `test-intercept.js`
|
||||
|
||||
Located at: `backend/test-intercept.js`
|
||||
|
||||
```javascript
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||
const fs = require('fs');
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
async function capturePayload(config) {
|
||||
const { dispensaryId, platformId, cName, outputPath } = config;
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
// STEP 1: Establish session by visiting the menu
|
||||
const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
|
||||
await page.goto(embedUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
|
||||
// STEP 2: Fetch ALL products using GraphQL from browser context
|
||||
const result = await page.evaluate(async (platformId) => {
|
||||
const allProducts = [];
|
||||
let pageNum = 0;
|
||||
const perPage = 100;
|
||||
let totalCount = 0;
|
||||
const sessionId = 'browser-session-' + Date.now();
|
||||
|
||||
while (pageNum < 30) {
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: platformId,
|
||||
pricingType: 'rec',
|
||||
Status: 'Active', // CRITICAL: Must be 'Active', not null
|
||||
types: [],
|
||||
useCache: true,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page: pageNum,
|
||||
perPage: perPage,
|
||||
};
|
||||
|
||||
const extensions = {
|
||||
persistedQuery: {
|
||||
version: 1,
|
||||
sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
|
||||
}
|
||||
};
|
||||
|
||||
const qs = new URLSearchParams({
|
||||
operationName: 'FilteredProducts',
|
||||
variables: JSON.stringify(variables),
|
||||
extensions: JSON.stringify(extensions)
|
||||
});
|
||||
|
||||
const response = await fetch(`https://dutchie.com/api-3/graphql?${qs}`, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'content-type': 'application/json',
|
||||
'x-dutchie-session': sessionId,
|
||||
'apollographql-client-name': 'Marketplace (production)',
|
||||
},
|
||||
credentials: 'include'
|
||||
});
|
||||
|
||||
const json = await response.json();
|
||||
const data = json?.data?.filteredProducts;
|
||||
if (!data?.products) break;
|
||||
|
||||
allProducts.push(...data.products);
|
||||
if (pageNum === 0) totalCount = data.queryInfo?.totalCount || 0;
|
||||
if (allProducts.length >= totalCount) break;
|
||||
|
||||
pageNum++;
|
||||
await new Promise(r => setTimeout(r, 200)); // Polite delay
|
||||
}
|
||||
|
||||
return { products: allProducts, totalCount };
|
||||
}, platformId);
|
||||
|
||||
await browser.close();
|
||||
|
||||
// STEP 3: Save payload
|
||||
const payload = {
|
||||
dispensaryId,
|
||||
platformId,
|
||||
cName,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
productCount: result.products.length,
|
||||
products: result.products,
|
||||
};
|
||||
|
||||
fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
|
||||
return payload;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Critical Parameters
|
||||
|
||||
### GraphQL Hash (FilteredProducts)
|
||||
|
||||
```
|
||||
ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0
|
||||
```
|
||||
|
||||
**WARNING:** Using the wrong hash returns HTTP 400.
|
||||
|
||||
### Status Parameter
|
||||
|
||||
| Value | Result |
|
||||
|-------|--------|
|
||||
| `'Active'` | Returns in-stock products (1019 in test) |
|
||||
| `null` | Returns 0 products |
|
||||
| `'All'` | Returns HTTP 400 |
|
||||
|
||||
**ALWAYS use `Status: 'Active'`**
|
||||
|
||||
### Required Headers
|
||||
|
||||
```javascript
|
||||
{
|
||||
'Accept': 'application/json',
|
||||
'content-type': 'application/json',
|
||||
'x-dutchie-session': 'unique-session-id',
|
||||
'apollographql-client-name': 'Marketplace (production)',
|
||||
}
|
||||
```
|
||||
|
||||
### Endpoint
|
||||
|
||||
```
|
||||
https://dutchie.com/api-3/graphql
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Benchmarks
|
||||
|
||||
Test store: AZ-Deeply-Rooted (1019 products)
|
||||
|
||||
| Metric | Value |
|
||||
|--------|-------|
|
||||
| Total products | 1019 |
|
||||
| Time | 18.5 seconds |
|
||||
| Payload size | 11.8 MB |
|
||||
| Pages fetched | 11 (100 per page) |
|
||||
| Success rate | 100% |
|
||||
|
||||
---
|
||||
|
||||
## Payload Format
|
||||
|
||||
The output matches the existing `payload-fetch.ts` handler format:
|
||||
|
||||
```json
|
||||
{
|
||||
"dispensaryId": 123,
|
||||
"platformId": "6405ef617056e8014d79101b",
|
||||
"cName": "AZ-Deeply-Rooted",
|
||||
"fetchedAt": "2025-12-12T05:05:19.837Z",
|
||||
"productCount": 1019,
|
||||
"products": [
|
||||
{
|
||||
"id": "6927508db4851262f629a869",
|
||||
"Name": "Product Name",
|
||||
"brand": { "name": "Brand Name", ... },
|
||||
"type": "Flower",
|
||||
"THC": "25%",
|
||||
"Prices": [...],
|
||||
"Options": [...],
|
||||
...
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Integration Points
|
||||
|
||||
### As a Task Handler
|
||||
|
||||
The organic approach can be integrated as an alternative to curl-based fetching:
|
||||
|
||||
```typescript
|
||||
// In src/tasks/handlers/organic-payload-fetch.ts
|
||||
export async function handleOrganicPayloadFetch(ctx: TaskContext): Promise<TaskResult> {
|
||||
// Use puppeteer-based capture
|
||||
// Save to same payload storage
|
||||
// Queue product_refresh task
|
||||
}
|
||||
```
|
||||
|
||||
### Worker Configuration
|
||||
|
||||
Add to job_schedules:
|
||||
```sql
|
||||
INSERT INTO job_schedules (name, role, cron_expression)
|
||||
VALUES ('organic_product_crawl', 'organic_payload_fetch', '0 */6 * * *');
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### HTTP 400 Bad Request
|
||||
- Check hash is correct: `ee29c060...`
|
||||
- Verify Status is `'Active'` (string, not null)
|
||||
|
||||
### 0 Products Returned
|
||||
- Status was likely `null` or `'All'` - use `'Active'`
|
||||
- Check platformId is valid MongoDB ObjectId
|
||||
|
||||
### Session Not Established
|
||||
- Increase timeout on initial page.goto()
|
||||
- Check cName is valid (matches embedded-menu URL)
|
||||
|
||||
### Detection/Blocking
|
||||
- StealthPlugin should handle most cases
|
||||
- Add random delays between pages
|
||||
- Use headless: 'new' (not true/false)
|
||||
|
||||
---
|
||||
|
||||
## Files Reference
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `backend/test-intercept.js` | Proof of concept script |
|
||||
| `backend/src/platforms/dutchie/client.ts` | GraphQL hashes, curl implementation |
|
||||
| `backend/src/tasks/handlers/payload-fetch.ts` | Current curl-based handler |
|
||||
| `backend/src/utils/payload-storage.ts` | Payload save/load utilities |
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- `DUTCHIE_CRAWL_WORKFLOW.md` - Full crawl pipeline documentation
|
||||
- `TASK_WORKFLOW_2024-12-10.md` - Task system architecture
|
||||
- `CLAUDE.md` - Project rules and constraints
|
||||
@@ -1,25 +0,0 @@
|
||||
# ARCHIVED DOCUMENTATION
|
||||
|
||||
**WARNING: These docs may be outdated or inaccurate.**
|
||||
|
||||
The code has evolved significantly. These docs are kept for historical reference only.
|
||||
|
||||
## What to Use Instead
|
||||
|
||||
**The single source of truth is:**
|
||||
- `CLAUDE.md` (root) - Essential rules and quick reference
|
||||
- `docs/CODEBASE_MAP.md` - Current file/directory reference
|
||||
|
||||
## Why Archive?
|
||||
|
||||
These docs were written during development iterations and may reference:
|
||||
- Old file paths that no longer exist
|
||||
- Deprecated approaches (hydration, scraper-v2)
|
||||
- APIs that have changed
|
||||
- Database schemas that evolved
|
||||
|
||||
## If You Need Details
|
||||
|
||||
1. First check CODEBASE_MAP.md for current file locations
|
||||
2. Then read the actual source code
|
||||
3. Only use archive docs as a last resort for historical context
|
||||
@@ -1,77 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: scraper-worker
|
||||
namespace: cannaiq
|
||||
labels:
|
||||
app: scraper-worker
|
||||
spec:
|
||||
clusterIP: None # Headless service required for StatefulSet
|
||||
selector:
|
||||
app: scraper-worker
|
||||
ports:
|
||||
- port: 3010
|
||||
name: http
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: scraper-worker
|
||||
namespace: cannaiq
|
||||
spec:
|
||||
serviceName: scraper-worker
|
||||
replicas: 8
|
||||
podManagementPolicy: Parallel # Start all pods at once
|
||||
updateStrategy:
|
||||
type: OnDelete # Pods only update when manually deleted - no automatic restarts
|
||||
selector:
|
||||
matchLabels:
|
||||
app: scraper-worker
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: scraper-worker
|
||||
spec:
|
||||
terminationGracePeriodSeconds: 60
|
||||
imagePullSecrets:
|
||||
- name: regcred
|
||||
containers:
|
||||
- name: worker
|
||||
image: git.spdy.io/creationshop/cannaiq:latest
|
||||
imagePullPolicy: Always
|
||||
command: ["node"]
|
||||
args: ["dist/tasks/task-worker.js"]
|
||||
env:
|
||||
- name: WORKER_MODE
|
||||
value: "true"
|
||||
- name: POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: MAX_CONCURRENT_TASKS
|
||||
value: "50"
|
||||
- name: API_BASE_URL
|
||||
value: http://scraper
|
||||
- name: NODE_OPTIONS
|
||||
value: --max-old-space-size=1500
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: scraper-config
|
||||
- secretRef:
|
||||
name: scraper-secrets
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 1Gi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 2Gi
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- pgrep -f 'task-worker' > /dev/null
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
failureThreshold: 3
|
||||
@@ -1,27 +0,0 @@
|
||||
-- Migration: Worker Commands Table
|
||||
-- Purpose: Store commands for workers (decommission, etc.)
|
||||
-- Workers poll this table after each task to check for commands
|
||||
|
||||
CREATE TABLE IF NOT EXISTS worker_commands (
|
||||
id SERIAL PRIMARY KEY,
|
||||
worker_id TEXT NOT NULL,
|
||||
command TEXT NOT NULL, -- 'decommission', 'pause', 'resume'
|
||||
reason TEXT,
|
||||
issued_by TEXT,
|
||||
issued_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
acknowledged_at TIMESTAMPTZ,
|
||||
executed_at TIMESTAMPTZ,
|
||||
status TEXT DEFAULT 'pending' -- 'pending', 'acknowledged', 'executed', 'cancelled'
|
||||
);
|
||||
|
||||
-- Index for worker lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_commands_worker_id ON worker_commands(worker_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_commands_pending ON worker_commands(worker_id, status) WHERE status = 'pending';
|
||||
|
||||
-- Add decommission_requested column to worker_registry for quick checks
|
||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_reason TEXT;
|
||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested_at TIMESTAMPTZ;
|
||||
|
||||
-- Comment
|
||||
COMMENT ON TABLE worker_commands IS 'Commands issued to workers (decommission after task, pause, etc.)';
|
||||
@@ -1,88 +0,0 @@
|
||||
-- Migration 083: Discovery Run Tracking
|
||||
-- Tracks progress of store discovery runs step-by-step
|
||||
|
||||
-- Main discovery runs table
|
||||
CREATE TABLE IF NOT EXISTS discovery_runs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
platform VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
task_id INTEGER REFERENCES worker_task_queue(id),
|
||||
|
||||
-- Totals
|
||||
states_total INTEGER DEFAULT 0,
|
||||
states_completed INTEGER DEFAULT 0,
|
||||
locations_discovered INTEGER DEFAULT 0,
|
||||
locations_promoted INTEGER DEFAULT 0,
|
||||
new_store_ids INTEGER[] DEFAULT '{}',
|
||||
|
||||
-- Error info
|
||||
error_message TEXT,
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Per-state progress within a run
|
||||
CREATE TABLE IF NOT EXISTS discovery_run_states (
|
||||
id SERIAL PRIMARY KEY,
|
||||
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
||||
state_code VARCHAR(2) NOT NULL,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, completed, failed
|
||||
started_at TIMESTAMPTZ,
|
||||
finished_at TIMESTAMPTZ,
|
||||
|
||||
-- Results
|
||||
cities_found INTEGER DEFAULT 0,
|
||||
locations_found INTEGER DEFAULT 0,
|
||||
locations_upserted INTEGER DEFAULT 0,
|
||||
new_dispensary_ids INTEGER[] DEFAULT '{}',
|
||||
|
||||
-- Error info
|
||||
error_message TEXT,
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
UNIQUE(run_id, state_code)
|
||||
);
|
||||
|
||||
-- Step-by-step log for detailed progress tracking
|
||||
CREATE TABLE IF NOT EXISTS discovery_run_steps (
|
||||
id SERIAL PRIMARY KEY,
|
||||
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
||||
state_code VARCHAR(2),
|
||||
step_name VARCHAR(100) NOT NULL,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'started', -- started, completed, failed
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
|
||||
-- Details (JSON for flexibility)
|
||||
details JSONB DEFAULT '{}',
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Indexes for querying
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_status ON discovery_runs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_platform ON discovery_runs(platform);
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_started_at ON discovery_runs(started_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_run_states_run_id ON discovery_run_states(run_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_run_steps_run_id ON discovery_run_steps(run_id);
|
||||
|
||||
-- View for latest run status per platform
|
||||
CREATE OR REPLACE VIEW v_latest_discovery_runs AS
|
||||
SELECT DISTINCT ON (platform)
|
||||
id,
|
||||
platform,
|
||||
status,
|
||||
started_at,
|
||||
finished_at,
|
||||
states_total,
|
||||
states_completed,
|
||||
locations_discovered,
|
||||
locations_promoted,
|
||||
array_length(new_store_ids, 1) as new_stores_count,
|
||||
error_message,
|
||||
EXTRACT(EPOCH FROM (COALESCE(finished_at, NOW()) - started_at)) as duration_seconds
|
||||
FROM discovery_runs
|
||||
ORDER BY platform, started_at DESC;
|
||||
@@ -1,253 +0,0 @@
|
||||
-- Migration 084: Dual Transport Preflight System
|
||||
-- Workers run both curl and http (Puppeteer) preflights on startup
|
||||
-- Tasks can require a specific transport method
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 1: Add preflight columns to worker_registry
|
||||
-- ===================================================================
|
||||
|
||||
-- Preflight status for curl/axios transport (proxy-based)
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_curl_status VARCHAR(20) DEFAULT 'pending';
|
||||
|
||||
-- Preflight status for http/Puppeteer transport (browser-based)
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_http_status VARCHAR(20) DEFAULT 'pending';
|
||||
|
||||
-- Timestamps for when each preflight completed
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_curl_at TIMESTAMPTZ;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_http_at TIMESTAMPTZ;
|
||||
|
||||
-- Error messages for failed preflights
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_curl_error TEXT;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_http_error TEXT;
|
||||
|
||||
-- Response time for successful preflights (ms)
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_curl_ms INTEGER;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_http_ms INTEGER;
|
||||
|
||||
-- Constraints for preflight status values
|
||||
ALTER TABLE worker_registry
|
||||
DROP CONSTRAINT IF EXISTS valid_preflight_curl_status;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD CONSTRAINT valid_preflight_curl_status
|
||||
CHECK (preflight_curl_status IN ('pending', 'passed', 'failed', 'skipped'));
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
DROP CONSTRAINT IF EXISTS valid_preflight_http_status;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD CONSTRAINT valid_preflight_http_status
|
||||
CHECK (preflight_http_status IN ('pending', 'passed', 'failed', 'skipped'));
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 2: Add method column to worker_tasks
|
||||
-- ===================================================================
|
||||
|
||||
-- Transport method requirement for the task
|
||||
-- NULL = no preference (any worker can claim)
|
||||
-- 'curl' = requires curl/axios transport (proxy-based, fast)
|
||||
-- 'http' = requires http/Puppeteer transport (browser-based, anti-detect)
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS method VARCHAR(10);
|
||||
|
||||
-- Constraint for valid method values
|
||||
ALTER TABLE worker_tasks
|
||||
DROP CONSTRAINT IF EXISTS valid_task_method;
|
||||
|
||||
ALTER TABLE worker_tasks
|
||||
ADD CONSTRAINT valid_task_method
|
||||
CHECK (method IS NULL OR method IN ('curl', 'http'));
|
||||
|
||||
-- Index for method-based task claiming
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_tasks_method
|
||||
ON worker_tasks(method)
|
||||
WHERE status = 'pending';
|
||||
|
||||
-- Set default method for all existing pending tasks to 'http'
|
||||
-- ALL current tasks require Puppeteer/browser-based transport
|
||||
UPDATE worker_tasks
|
||||
SET method = 'http'
|
||||
WHERE method IS NULL;
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 3: Update claim_task function for method compatibility
|
||||
-- ===================================================================
|
||||
|
||||
CREATE OR REPLACE FUNCTION claim_task(
|
||||
p_role VARCHAR(50),
|
||||
p_worker_id VARCHAR(100),
|
||||
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||
p_http_passed BOOLEAN DEFAULT FALSE
|
||||
) RETURNS worker_tasks AS $$
|
||||
DECLARE
|
||||
claimed_task worker_tasks;
|
||||
BEGIN
|
||||
UPDATE worker_tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
worker_id = p_worker_id,
|
||||
claimed_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT id FROM worker_tasks
|
||||
WHERE role = p_role
|
||||
AND status = 'pending'
|
||||
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||
-- Method compatibility: worker must have passed the required preflight
|
||||
AND (
|
||||
method IS NULL -- No preference, any worker can claim
|
||||
OR (method = 'curl' AND p_curl_passed = TRUE)
|
||||
OR (method = 'http' AND p_http_passed = TRUE)
|
||||
)
|
||||
-- Exclude stores that already have an active task
|
||||
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||
SELECT dispensary_id FROM worker_tasks
|
||||
WHERE status IN ('claimed', 'running')
|
||||
AND dispensary_id IS NOT NULL
|
||||
))
|
||||
ORDER BY priority DESC, created_at ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_task;
|
||||
|
||||
RETURN claimed_task;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 4: Update v_active_workers view
|
||||
-- ===================================================================
|
||||
|
||||
DROP VIEW IF EXISTS v_active_workers;
|
||||
|
||||
CREATE VIEW v_active_workers AS
|
||||
SELECT
|
||||
wr.id,
|
||||
wr.worker_id,
|
||||
wr.friendly_name,
|
||||
wr.role,
|
||||
wr.status,
|
||||
wr.pod_name,
|
||||
wr.hostname,
|
||||
wr.started_at,
|
||||
wr.last_heartbeat_at,
|
||||
wr.last_task_at,
|
||||
wr.tasks_completed,
|
||||
wr.tasks_failed,
|
||||
wr.current_task_id,
|
||||
-- Preflight status
|
||||
wr.preflight_curl_status,
|
||||
wr.preflight_http_status,
|
||||
wr.preflight_curl_at,
|
||||
wr.preflight_http_at,
|
||||
wr.preflight_curl_error,
|
||||
wr.preflight_http_error,
|
||||
wr.preflight_curl_ms,
|
||||
wr.preflight_http_ms,
|
||||
-- Computed fields
|
||||
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||
CASE
|
||||
WHEN wr.status = 'offline' THEN 'offline'
|
||||
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||
ELSE 'ready'
|
||||
END as health_status,
|
||||
-- Capability flags (can this worker handle curl/http tasks?)
|
||||
(wr.preflight_curl_status = 'passed') as can_curl,
|
||||
(wr.preflight_http_status = 'passed') as can_http
|
||||
FROM worker_registry wr
|
||||
WHERE wr.status != 'terminated'
|
||||
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 5: View for task queue with method info
|
||||
-- ===================================================================
|
||||
|
||||
DROP VIEW IF EXISTS v_task_history;
|
||||
|
||||
CREATE VIEW v_task_history AS
|
||||
SELECT
|
||||
t.id,
|
||||
t.role,
|
||||
t.dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
t.platform,
|
||||
t.status,
|
||||
t.priority,
|
||||
t.method,
|
||||
t.worker_id,
|
||||
t.scheduled_for,
|
||||
t.claimed_at,
|
||||
t.started_at,
|
||||
t.completed_at,
|
||||
t.error_message,
|
||||
t.retry_count,
|
||||
t.created_at,
|
||||
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
|
||||
FROM worker_tasks t
|
||||
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
||||
ORDER BY t.created_at DESC;
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 6: Helper function to update worker preflight status
|
||||
-- ===================================================================
|
||||
|
||||
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
||||
p_worker_id VARCHAR(100),
|
||||
p_transport VARCHAR(10), -- 'curl' or 'http'
|
||||
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
||||
p_response_ms INTEGER DEFAULT NULL,
|
||||
p_error TEXT DEFAULT NULL
|
||||
) RETURNS VOID AS $$
|
||||
BEGIN
|
||||
IF p_transport = 'curl' THEN
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_curl_status = p_status,
|
||||
preflight_curl_at = NOW(),
|
||||
preflight_curl_ms = p_response_ms,
|
||||
preflight_curl_error = p_error,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
ELSIF p_transport = 'http' THEN
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_http_status = p_status,
|
||||
preflight_http_at = NOW(),
|
||||
preflight_http_ms = p_response_ms,
|
||||
preflight_http_error = p_error,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
END IF;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ===================================================================
|
||||
-- Comments
|
||||
-- ===================================================================
|
||||
|
||||
COMMENT ON COLUMN worker_registry.preflight_curl_status IS 'Status of curl/axios preflight: pending, passed, failed, skipped';
|
||||
COMMENT ON COLUMN worker_registry.preflight_http_status IS 'Status of http/Puppeteer preflight: pending, passed, failed, skipped';
|
||||
COMMENT ON COLUMN worker_registry.preflight_curl_at IS 'When curl preflight completed';
|
||||
COMMENT ON COLUMN worker_registry.preflight_http_at IS 'When http preflight completed';
|
||||
COMMENT ON COLUMN worker_registry.preflight_curl_error IS 'Error message if curl preflight failed';
|
||||
COMMENT ON COLUMN worker_registry.preflight_http_error IS 'Error message if http preflight failed';
|
||||
COMMENT ON COLUMN worker_registry.preflight_curl_ms IS 'Response time of successful curl preflight (ms)';
|
||||
COMMENT ON COLUMN worker_registry.preflight_http_ms IS 'Response time of successful http preflight (ms)';
|
||||
|
||||
COMMENT ON COLUMN worker_tasks.method IS 'Transport method required: NULL=any, curl=proxy-based, http=browser-based';
|
||||
|
||||
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task, respecting method requirements and per-store locking';
|
||||
COMMENT ON FUNCTION update_worker_preflight IS 'Update a workers preflight status for a given transport';
|
||||
@@ -1,168 +0,0 @@
|
||||
-- Migration 085: Add IP and fingerprint columns for preflight reporting
|
||||
-- These columns were missing from migration 084
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 1: Add IP address columns to worker_registry
|
||||
-- ===================================================================
|
||||
|
||||
-- IP address detected during curl/axios preflight
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS curl_ip VARCHAR(45);
|
||||
|
||||
-- IP address detected during http/Puppeteer preflight
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS http_ip VARCHAR(45);
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 2: Add fingerprint data column
|
||||
-- ===================================================================
|
||||
|
||||
-- Browser fingerprint data captured during Puppeteer preflight
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS fingerprint_data JSONB;
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 3: Add combined preflight status/timestamp for convenience
|
||||
-- ===================================================================
|
||||
|
||||
-- Overall preflight status (computed from both transports)
|
||||
-- Values: 'pending', 'passed', 'partial', 'failed'
|
||||
-- - 'pending': neither transport tested
|
||||
-- - 'passed': both transports passed (or http passed for browser-only)
|
||||
-- - 'partial': at least one passed
|
||||
-- - 'failed': no transport passed
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_status VARCHAR(20) DEFAULT 'pending';
|
||||
|
||||
-- Most recent preflight completion timestamp
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS preflight_at TIMESTAMPTZ;
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 4: Update function to set preflight status
|
||||
-- ===================================================================
|
||||
|
||||
CREATE OR REPLACE FUNCTION update_worker_preflight(
|
||||
p_worker_id VARCHAR(100),
|
||||
p_transport VARCHAR(10), -- 'curl' or 'http'
|
||||
p_status VARCHAR(20), -- 'passed', 'failed', 'skipped'
|
||||
p_ip VARCHAR(45) DEFAULT NULL,
|
||||
p_response_ms INTEGER DEFAULT NULL,
|
||||
p_error TEXT DEFAULT NULL,
|
||||
p_fingerprint JSONB DEFAULT NULL
|
||||
) RETURNS VOID AS $$
|
||||
DECLARE
|
||||
v_curl_status VARCHAR(20);
|
||||
v_http_status VARCHAR(20);
|
||||
v_overall_status VARCHAR(20);
|
||||
BEGIN
|
||||
IF p_transport = 'curl' THEN
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_curl_status = p_status,
|
||||
preflight_curl_at = NOW(),
|
||||
preflight_curl_ms = p_response_ms,
|
||||
preflight_curl_error = p_error,
|
||||
curl_ip = p_ip,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
ELSIF p_transport = 'http' THEN
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_http_status = p_status,
|
||||
preflight_http_at = NOW(),
|
||||
preflight_http_ms = p_response_ms,
|
||||
preflight_http_error = p_error,
|
||||
http_ip = p_ip,
|
||||
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
END IF;
|
||||
|
||||
-- Update overall preflight status
|
||||
SELECT preflight_curl_status, preflight_http_status
|
||||
INTO v_curl_status, v_http_status
|
||||
FROM worker_registry
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
-- Compute overall status
|
||||
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
|
||||
v_overall_status := 'passed';
|
||||
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
|
||||
v_overall_status := 'partial';
|
||||
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
|
||||
v_overall_status := 'failed';
|
||||
ELSE
|
||||
v_overall_status := 'pending';
|
||||
END IF;
|
||||
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_status = v_overall_status,
|
||||
preflight_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ===================================================================
|
||||
-- PART 5: Update v_active_workers view
|
||||
-- ===================================================================
|
||||
|
||||
DROP VIEW IF EXISTS v_active_workers;
|
||||
|
||||
CREATE VIEW v_active_workers AS
|
||||
SELECT
|
||||
wr.id,
|
||||
wr.worker_id,
|
||||
wr.friendly_name,
|
||||
wr.role,
|
||||
wr.status,
|
||||
wr.pod_name,
|
||||
wr.hostname,
|
||||
wr.started_at,
|
||||
wr.last_heartbeat_at,
|
||||
wr.last_task_at,
|
||||
wr.tasks_completed,
|
||||
wr.tasks_failed,
|
||||
wr.current_task_id,
|
||||
-- IP addresses from preflights
|
||||
wr.curl_ip,
|
||||
wr.http_ip,
|
||||
-- Combined preflight status
|
||||
wr.preflight_status,
|
||||
wr.preflight_at,
|
||||
-- Detailed preflight status per transport
|
||||
wr.preflight_curl_status,
|
||||
wr.preflight_http_status,
|
||||
wr.preflight_curl_at,
|
||||
wr.preflight_http_at,
|
||||
wr.preflight_curl_error,
|
||||
wr.preflight_http_error,
|
||||
wr.preflight_curl_ms,
|
||||
wr.preflight_http_ms,
|
||||
-- Fingerprint data
|
||||
wr.fingerprint_data,
|
||||
-- Computed fields
|
||||
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||
CASE
|
||||
WHEN wr.status = 'offline' THEN 'offline'
|
||||
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||
ELSE 'ready'
|
||||
END as health_status,
|
||||
-- Capability flags (can this worker handle curl/http tasks?)
|
||||
(wr.preflight_curl_status = 'passed') as can_curl,
|
||||
(wr.preflight_http_status = 'passed') as can_http
|
||||
FROM worker_registry wr
|
||||
WHERE wr.status != 'terminated'
|
||||
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||
|
||||
-- ===================================================================
|
||||
-- Comments
|
||||
-- ===================================================================
|
||||
|
||||
COMMENT ON COLUMN worker_registry.curl_ip IS 'IP address detected during curl/axios preflight';
|
||||
COMMENT ON COLUMN worker_registry.http_ip IS 'IP address detected during Puppeteer preflight';
|
||||
COMMENT ON COLUMN worker_registry.fingerprint_data IS 'Browser fingerprint captured during Puppeteer preflight';
|
||||
COMMENT ON COLUMN worker_registry.preflight_status IS 'Overall preflight status: pending, passed, partial, failed';
|
||||
COMMENT ON COLUMN worker_registry.preflight_at IS 'Most recent preflight completion timestamp';
|
||||
@@ -1,59 +0,0 @@
|
||||
-- Migration 085: Trusted Origins Management
|
||||
-- Allows admin to manage trusted IPs and domains via UI instead of hardcoded values
|
||||
|
||||
-- Trusted origins table (IPs and domains that bypass API key auth)
|
||||
CREATE TABLE IF NOT EXISTS trusted_origins (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Origin type: 'ip', 'domain', 'pattern'
|
||||
origin_type VARCHAR(20) NOT NULL CHECK (origin_type IN ('ip', 'domain', 'pattern')),
|
||||
|
||||
-- The actual value
|
||||
-- For ip: '127.0.0.1', '::1', '192.168.1.0/24'
|
||||
-- For domain: 'cannaiq.co', 'findadispo.com'
|
||||
-- For pattern: '^https://.*\.cannabrands\.app$' (regex)
|
||||
origin_value VARCHAR(255) NOT NULL,
|
||||
|
||||
-- Description for admin reference
|
||||
description TEXT,
|
||||
|
||||
-- Active flag
|
||||
active BOOLEAN DEFAULT true,
|
||||
|
||||
-- Audit
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_by INTEGER REFERENCES users(id),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
UNIQUE(origin_type, origin_value)
|
||||
);
|
||||
|
||||
-- Index for quick lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active ON trusted_origins(active) WHERE active = true;
|
||||
CREATE INDEX IF NOT EXISTS idx_trusted_origins_type ON trusted_origins(origin_type, active);
|
||||
|
||||
-- Seed with current hardcoded values
|
||||
INSERT INTO trusted_origins (origin_type, origin_value, description) VALUES
|
||||
-- Trusted IPs (localhost)
|
||||
('ip', '127.0.0.1', 'Localhost IPv4'),
|
||||
('ip', '::1', 'Localhost IPv6'),
|
||||
('ip', '::ffff:127.0.0.1', 'Localhost IPv4-mapped IPv6'),
|
||||
|
||||
-- Trusted domains
|
||||
('domain', 'cannaiq.co', 'CannaiQ production'),
|
||||
('domain', 'www.cannaiq.co', 'CannaiQ production (www)'),
|
||||
('domain', 'findadispo.com', 'FindADispo production'),
|
||||
('domain', 'www.findadispo.com', 'FindADispo production (www)'),
|
||||
('domain', 'findagram.co', 'Findagram production'),
|
||||
('domain', 'www.findagram.co', 'Findagram production (www)'),
|
||||
('domain', 'localhost:3010', 'Local backend dev'),
|
||||
('domain', 'localhost:8080', 'Local admin dev'),
|
||||
('domain', 'localhost:5173', 'Local Vite dev'),
|
||||
|
||||
-- Pattern-based (regex)
|
||||
('pattern', '^https://.*\.cannabrands\.app$', 'All cannabrands.app subdomains'),
|
||||
('pattern', '^https://.*\.cannaiq\.co$', 'All cannaiq.co subdomains')
|
||||
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||
|
||||
-- Add comment
|
||||
COMMENT ON TABLE trusted_origins IS 'IPs and domains that bypass API key authentication. Managed via /admin.';
|
||||
@@ -1,10 +0,0 @@
|
||||
-- Migration 086: Add proxy_url column for alternative URL formats
|
||||
-- Some proxy providers use non-standard URL formats (e.g., host:port:user:pass)
|
||||
-- This column allows storing the raw URL directly
|
||||
|
||||
-- Add proxy_url column - if set, used directly instead of constructing from parts
|
||||
ALTER TABLE proxies
|
||||
ADD COLUMN IF NOT EXISTS proxy_url TEXT;
|
||||
|
||||
-- Add comment
|
||||
COMMENT ON COLUMN proxies.proxy_url IS 'Raw proxy URL (if provider uses non-standard format). Takes precedence over constructed URL from host/port/user/pass.';
|
||||
@@ -1,30 +0,0 @@
|
||||
-- Migration 088: Extend raw_crawl_payloads for discovery payloads
|
||||
--
|
||||
-- Enables saving raw store data from Dutchie discovery crawls.
|
||||
-- Store discovery returns raw dispensary objects - save them for historical analysis.
|
||||
|
||||
-- Add payload_type to distinguish product crawls from discovery crawls
|
||||
ALTER TABLE raw_crawl_payloads
|
||||
ADD COLUMN IF NOT EXISTS payload_type VARCHAR(32) NOT NULL DEFAULT 'product';
|
||||
|
||||
-- Add state_code for discovery payloads (null for product payloads)
|
||||
ALTER TABLE raw_crawl_payloads
|
||||
ADD COLUMN IF NOT EXISTS state_code VARCHAR(10);
|
||||
|
||||
-- Add store_count for discovery payloads (alternative to product_count)
|
||||
ALTER TABLE raw_crawl_payloads
|
||||
ADD COLUMN IF NOT EXISTS store_count INTEGER;
|
||||
|
||||
-- Make dispensary_id nullable for discovery payloads
|
||||
ALTER TABLE raw_crawl_payloads
|
||||
ALTER COLUMN dispensary_id DROP NOT NULL;
|
||||
|
||||
-- Add index for discovery payload queries
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_type_state
|
||||
ON raw_crawl_payloads(payload_type, state_code)
|
||||
WHERE payload_type = 'store_discovery';
|
||||
|
||||
-- Comments
|
||||
COMMENT ON COLUMN raw_crawl_payloads.payload_type IS 'Type: product (default), store_discovery';
|
||||
COMMENT ON COLUMN raw_crawl_payloads.state_code IS 'State code for discovery payloads (e.g., AZ, MI)';
|
||||
COMMENT ON COLUMN raw_crawl_payloads.store_count IS 'Number of stores in discovery payload';
|
||||
@@ -1,105 +0,0 @@
|
||||
-- Migration 089: Immutable Schedules with Per-State Product Discovery
|
||||
--
|
||||
-- Key changes:
|
||||
-- 1. Add is_immutable column - schedules can be edited but not deleted
|
||||
-- 2. Add method column - all tasks use 'http' (Puppeteer transport)
|
||||
-- 3. Store discovery weekly (168h)
|
||||
-- 4. Per-state product_discovery schedules (4h default)
|
||||
-- 5. Remove old payload_fetch schedules
|
||||
|
||||
-- =====================================================
|
||||
-- 1) Add new columns to task_schedules
|
||||
-- =====================================================
|
||||
ALTER TABLE task_schedules
|
||||
ADD COLUMN IF NOT EXISTS is_immutable BOOLEAN DEFAULT FALSE;
|
||||
|
||||
ALTER TABLE task_schedules
|
||||
ADD COLUMN IF NOT EXISTS method VARCHAR(10) DEFAULT 'http';
|
||||
|
||||
-- =====================================================
|
||||
-- 2) Update store_discovery to weekly and immutable
|
||||
-- =====================================================
|
||||
UPDATE task_schedules
|
||||
SET interval_hours = 168, -- 7 days
|
||||
is_immutable = TRUE,
|
||||
method = 'http',
|
||||
description = 'Discover new Dutchie stores weekly (HTTP transport)'
|
||||
WHERE name IN ('store_discovery_dutchie', 'Store Discovery');
|
||||
|
||||
-- Insert if doesn't exist
|
||||
INSERT INTO task_schedules (name, role, interval_hours, priority, description, is_immutable, method, platform, next_run_at)
|
||||
VALUES ('Store Discovery', 'store_discovery', 168, 5, 'Discover new Dutchie stores weekly (HTTP transport)', TRUE, 'http', 'dutchie', NOW())
|
||||
ON CONFLICT (name) DO UPDATE SET
|
||||
interval_hours = 168,
|
||||
is_immutable = TRUE,
|
||||
method = 'http',
|
||||
description = 'Discover new Dutchie stores weekly (HTTP transport)';
|
||||
|
||||
-- =====================================================
|
||||
-- 3) Remove old payload_fetch and product_refresh_all schedules
|
||||
-- =====================================================
|
||||
DELETE FROM task_schedules WHERE name IN ('payload_fetch_all', 'product_refresh_all');
|
||||
|
||||
-- =====================================================
|
||||
-- 4) Create per-state product_discovery schedules
|
||||
-- =====================================================
|
||||
-- One schedule per state that has dispensaries with active cannabis programs
|
||||
INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
|
||||
SELECT
|
||||
'product_discovery_' || lower(s.code) AS name,
|
||||
'product_discovery' AS role,
|
||||
s.code AS state_code,
|
||||
4 AS interval_hours, -- 4 hours default, editable
|
||||
10 AS priority,
|
||||
'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
|
||||
TRUE AS is_immutable, -- Can edit but not delete
|
||||
'http' AS method,
|
||||
CASE WHEN s.is_active THEN TRUE ELSE FALSE END AS enabled,
|
||||
-- Stagger start times: each state starts 5 minutes after the previous
|
||||
NOW() + (ROW_NUMBER() OVER (ORDER BY s.code) * INTERVAL '5 minutes') AS next_run_at
|
||||
FROM states s
|
||||
WHERE EXISTS (
|
||||
SELECT 1 FROM dispensaries d
|
||||
WHERE d.state_id = s.id AND d.crawl_enabled = true
|
||||
)
|
||||
ON CONFLICT (name) DO UPDATE SET
|
||||
is_immutable = TRUE,
|
||||
method = 'http',
|
||||
description = EXCLUDED.description;
|
||||
|
||||
-- Also create schedules for states that might have stores discovered later
|
||||
INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
|
||||
SELECT
|
||||
'product_discovery_' || lower(s.code) AS name,
|
||||
'product_discovery' AS role,
|
||||
s.code AS state_code,
|
||||
4 AS interval_hours,
|
||||
10 AS priority,
|
||||
'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
|
||||
TRUE AS is_immutable,
|
||||
'http' AS method,
|
||||
FALSE AS enabled, -- Disabled until stores exist
|
||||
NOW() + INTERVAL '1 hour'
|
||||
FROM states s
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM task_schedules ts WHERE ts.name = 'product_discovery_' || lower(s.code)
|
||||
)
|
||||
ON CONFLICT (name) DO NOTHING;
|
||||
|
||||
-- =====================================================
|
||||
-- 5) Make analytics_refresh immutable
|
||||
-- =====================================================
|
||||
UPDATE task_schedules
|
||||
SET is_immutable = TRUE, method = 'http'
|
||||
WHERE name = 'analytics_refresh';
|
||||
|
||||
-- =====================================================
|
||||
-- 6) Add index for schedule lookups
|
||||
-- =====================================================
|
||||
CREATE INDEX IF NOT EXISTS idx_task_schedules_state_code
|
||||
ON task_schedules(state_code)
|
||||
WHERE state_code IS NOT NULL;
|
||||
|
||||
-- Comments
|
||||
COMMENT ON COLUMN task_schedules.is_immutable IS 'If TRUE, schedule cannot be deleted (only edited)';
|
||||
COMMENT ON COLUMN task_schedules.method IS 'Transport method: http (Puppeteer/browser) or curl (axios)';
|
||||
@@ -1,66 +0,0 @@
|
||||
-- Migration 090: Add modification tracking columns
|
||||
--
|
||||
-- Tracks when records were last modified and by which task.
|
||||
-- Enables debugging, auditing, and understanding data freshness.
|
||||
--
|
||||
-- Columns added:
|
||||
-- last_modified_at - When the record was last modified by a task
|
||||
-- last_modified_by_task - Which task role modified it (e.g., 'product_refresh')
|
||||
-- last_modified_task_id - The specific task ID that modified it
|
||||
|
||||
-- ============================================================
|
||||
-- dispensaries table
|
||||
-- ============================================================
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
|
||||
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
|
||||
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
|
||||
|
||||
-- Index for querying recently modified records
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_last_modified
|
||||
ON dispensaries(last_modified_at DESC)
|
||||
WHERE last_modified_at IS NOT NULL;
|
||||
|
||||
-- Index for querying by task type
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_modified_by_task
|
||||
ON dispensaries(last_modified_by_task)
|
||||
WHERE last_modified_by_task IS NOT NULL;
|
||||
|
||||
COMMENT ON COLUMN dispensaries.last_modified_at IS 'Timestamp when this record was last modified by a task';
|
||||
COMMENT ON COLUMN dispensaries.last_modified_by_task IS 'Task role that last modified this record (e.g., store_discovery_state, entry_point_discovery)';
|
||||
COMMENT ON COLUMN dispensaries.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';
|
||||
|
||||
-- ============================================================
|
||||
-- store_products table
|
||||
-- ============================================================
|
||||
ALTER TABLE store_products
|
||||
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
|
||||
|
||||
ALTER TABLE store_products
|
||||
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
|
||||
|
||||
ALTER TABLE store_products
|
||||
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
|
||||
|
||||
-- Index for querying recently modified products
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_last_modified
|
||||
ON store_products(last_modified_at DESC)
|
||||
WHERE last_modified_at IS NOT NULL;
|
||||
|
||||
-- Index for querying by task type
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_modified_by_task
|
||||
ON store_products(last_modified_by_task)
|
||||
WHERE last_modified_by_task IS NOT NULL;
|
||||
|
||||
-- Composite index for finding products modified by a specific task
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_task_modified
|
||||
ON store_products(dispensary_id, last_modified_at DESC)
|
||||
WHERE last_modified_at IS NOT NULL;
|
||||
|
||||
COMMENT ON COLUMN store_products.last_modified_at IS 'Timestamp when this record was last modified by a task';
|
||||
COMMENT ON COLUMN store_products.last_modified_by_task IS 'Task role that last modified this record (e.g., product_refresh, product_discovery)';
|
||||
COMMENT ON COLUMN store_products.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';
|
||||
@@ -1,26 +0,0 @@
|
||||
-- Migration 091: Add store discovery tracking columns
|
||||
-- Per auto-healing scheme (2025-12-12):
|
||||
-- Track when store_discovery last updated each dispensary
|
||||
-- Track when last payload was saved
|
||||
|
||||
-- Add last_store_discovery_at to track when store_discovery updated this record
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS last_store_discovery_at TIMESTAMPTZ;
|
||||
|
||||
-- Add last_payload_at to track when last product payload was saved
|
||||
-- (Complements last_fetch_at which tracks API fetch time)
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS last_payload_at TIMESTAMPTZ;
|
||||
|
||||
-- Add index for finding stale discovery data
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_store_discovery_at
|
||||
ON dispensaries (last_store_discovery_at DESC NULLS LAST)
|
||||
WHERE crawl_enabled = true;
|
||||
|
||||
-- Add index for finding dispensaries without recent payloads
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_payload_at
|
||||
ON dispensaries (last_payload_at DESC NULLS LAST)
|
||||
WHERE crawl_enabled = true;
|
||||
|
||||
COMMENT ON COLUMN dispensaries.last_store_discovery_at IS 'When store_discovery task last updated this record';
|
||||
COMMENT ON COLUMN dispensaries.last_payload_at IS 'When last product payload was saved for this dispensary';
|
||||
@@ -1,30 +0,0 @@
|
||||
-- Fix 3 Trulieve/Harvest stores with incorrect menu URLs
|
||||
-- These records have NULL or mismatched platform_dispensary_id so store_discovery
|
||||
-- ON CONFLICT can't update them automatically
|
||||
|
||||
UPDATE dispensaries
|
||||
SET
|
||||
menu_url = 'https://dutchie.com/dispensary/svaccha-llc-nirvana-center-apache-junction',
|
||||
updated_at = NOW()
|
||||
WHERE id = 224;
|
||||
|
||||
UPDATE dispensaries
|
||||
SET
|
||||
menu_url = 'https://dutchie.com/dispensary/trulieve-of-phoenix-tatum',
|
||||
updated_at = NOW()
|
||||
WHERE id = 76;
|
||||
|
||||
UPDATE dispensaries
|
||||
SET
|
||||
menu_url = 'https://dutchie.com/dispensary/harvest-of-havasu',
|
||||
updated_at = NOW()
|
||||
WHERE id = 403;
|
||||
|
||||
-- Queue entry_point_discovery tasks to resolve their platform_dispensary_id
|
||||
-- method='http' ensures only workers that passed http preflight can claim these
|
||||
INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for, method)
|
||||
VALUES
|
||||
('entry_point_discovery', 224, 5, NOW(), 'http'),
|
||||
('entry_point_discovery', 76, 5, NOW(), 'http'),
|
||||
('entry_point_discovery', 403, 5, NOW(), 'http')
|
||||
ON CONFLICT DO NOTHING;
|
||||
@@ -1,35 +0,0 @@
|
||||
-- Migration 092: Store Intelligence Cache
|
||||
-- Pre-computed store intelligence data refreshed by analytics_refresh task
|
||||
-- Eliminates costly aggregation queries on /intelligence/stores endpoint
|
||||
|
||||
CREATE TABLE IF NOT EXISTS store_intelligence_cache (
|
||||
dispensary_id INTEGER PRIMARY KEY REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
|
||||
-- Basic counts
|
||||
sku_count INTEGER NOT NULL DEFAULT 0,
|
||||
brand_count INTEGER NOT NULL DEFAULT 0,
|
||||
snapshot_count INTEGER NOT NULL DEFAULT 0,
|
||||
|
||||
-- Pricing
|
||||
avg_price_rec NUMERIC(10,2),
|
||||
avg_price_med NUMERIC(10,2),
|
||||
min_price NUMERIC(10,2),
|
||||
max_price NUMERIC(10,2),
|
||||
|
||||
-- Category breakdown (JSONB for flexibility)
|
||||
category_counts JSONB DEFAULT '{}',
|
||||
|
||||
-- Timestamps
|
||||
last_crawl_at TIMESTAMPTZ,
|
||||
last_refresh_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Metadata
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for fast lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_store_intelligence_cache_refresh
|
||||
ON store_intelligence_cache (last_refresh_at DESC);
|
||||
|
||||
COMMENT ON TABLE store_intelligence_cache IS 'Pre-computed store intelligence metrics, refreshed by analytics_refresh task';
|
||||
COMMENT ON COLUMN store_intelligence_cache.category_counts IS 'JSON object mapping category_raw to product count';
|
||||
@@ -1,43 +0,0 @@
|
||||
-- Migration: 093_fix_mv_state_metrics.sql
|
||||
-- Purpose: Fix mv_state_metrics to use brand_name_raw and show correct store counts
|
||||
-- Issues fixed:
|
||||
-- 1. unique_brands used brand_id (often NULL), now uses brand_name_raw
|
||||
-- 2. Added out_of_stock_products column
|
||||
-- 3. dispensary_count now correctly named
|
||||
|
||||
-- Drop and recreate the materialized view with correct definition
|
||||
DROP MATERIALIZED VIEW IF EXISTS mv_state_metrics;
|
||||
|
||||
CREATE MATERIALIZED VIEW mv_state_metrics AS
|
||||
SELECT
|
||||
d.state,
|
||||
s.name AS state_name,
|
||||
COUNT(DISTINCT d.id) AS dispensary_count,
|
||||
COUNT(DISTINCT CASE WHEN d.menu_type = 'dutchie' THEN d.id END) AS dutchie_stores,
|
||||
COUNT(DISTINCT CASE WHEN d.crawl_enabled = true THEN d.id END) AS active_stores,
|
||||
COUNT(sp.id) AS total_products,
|
||||
COUNT(CASE WHEN COALESCE(sp.is_in_stock, true) THEN sp.id END) AS in_stock_products,
|
||||
COUNT(CASE WHEN sp.is_in_stock = false THEN sp.id END) AS out_of_stock_products,
|
||||
COUNT(CASE WHEN sp.is_on_special THEN sp.id END) AS on_special_products,
|
||||
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != '') AS unique_brands,
|
||||
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS unique_categories,
|
||||
ROUND(AVG(sp.price_rec) FILTER (WHERE sp.price_rec > 0)::NUMERIC, 2) AS avg_price_rec,
|
||||
MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) AS min_price_rec,
|
||||
MAX(sp.price_rec) FILTER (WHERE sp.price_rec > 0) AS max_price_rec,
|
||||
NOW() AS refreshed_at
|
||||
FROM dispensaries d
|
||||
LEFT JOIN states s ON d.state = s.code
|
||||
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
|
||||
WHERE d.state IS NOT NULL
|
||||
GROUP BY d.state, s.name;
|
||||
|
||||
-- Create unique index for CONCURRENTLY refresh support
|
||||
CREATE UNIQUE INDEX idx_mv_state_metrics_state ON mv_state_metrics(state);
|
||||
|
||||
-- Update refresh function
|
||||
CREATE OR REPLACE FUNCTION refresh_state_metrics()
|
||||
RETURNS void AS $$
|
||||
BEGIN
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_state_metrics;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
@@ -1,516 +0,0 @@
|
||||
-- Migration: Import 500 Evomi residential proxies
|
||||
-- These are sticky-session rotating proxies where password contains session ID
|
||||
-- Active is set to false - run Test All to verify and activate
|
||||
|
||||
-- First, drop the old unique constraint that doesn't account for username/password
|
||||
ALTER TABLE proxies DROP CONSTRAINT IF EXISTS proxies_host_port_protocol_key;
|
||||
|
||||
-- Add new unique constraint that includes username and password
|
||||
-- This allows multiple entries for the same host:port with different credentials (sessions)
|
||||
ALTER TABLE proxies ADD CONSTRAINT proxies_host_port_protocol_username_password_key
|
||||
UNIQUE(host, port, protocol, username, password);
|
||||
|
||||
-- Now insert all 500 proxies
|
||||
INSERT INTO proxies (host, port, protocol, username, password, active, max_connections)
|
||||
VALUES
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4XRRPF1UQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5UNGX7N7K', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9PSKYP1GU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GZBKKYL2S', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YHJHM0XZU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ESDYQ34CJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GAXUMFKQI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2FF66K4CI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SUYM0R49B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A8VHZMEFP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WNRLH6NXR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SPSB3IUX6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-85N76UU5Q', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-189P3LH2F', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-47DQOAGWY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IBT0QO7M2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UPXOUOH8X', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BFQ1PH75D', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KNTFKRY1J', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5L8IG6DZX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9YE13X0BA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6KBHCHF0I', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CETHHFHZ6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A06J8ST3I', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YFS93P1YR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RB74B3R6C', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2JW27O3EU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCUX84BL0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1A2KSG6HO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4QW8ILV0E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0Q09GH2VL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-16BRXBCYC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9W02B3R4L', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CVAEH76YT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CATOG0Q5I', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F81625L74', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DO4AVTPK4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SBZPXORD5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JA1AWOX03', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0FUJTRSYT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CM1R2RSTB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EHPJZCK1S', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYLKORNAF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-05A8BUD25', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RHM1Q6O4M', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ES5VPCE6Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-P0JEGLP4O', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OC4AX88D0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3BN54IEBV', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ABSC7S550', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LNIJU6R2V', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OYGQPPCOV', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-32YBOHQWR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7KGEMK4SL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FAW8T2EBW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GPV69KI9T', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JPBHSN8M2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VZ1JQOF15', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7DJXXPK1E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JXKQ7JVZ1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-88Q5UQX3B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HAI5K0JFO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-65SUKG0QH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1XFJETX1F', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7ZNUCVCBW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O1DCK15LA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WLTEA65WB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCHAFNK2P', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6ODSZ6CUT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SZ8R2EFH4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9EPPYQREC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MPCBES7UI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FCCPL0XWZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GJ23UYEGI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RQT80689I', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TDQO2AP5E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D5Q5SEUEO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DZN4ZTENM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4HVQ33VK9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F1HJ7GPHA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RM708QD2Y', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-K36N27GM5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O73TS0DAE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-54QXRWEA8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1P6LP0365', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WMZ2ST34E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-175UYF58T', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-W0HTK6F28', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D5275CTIM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IH2IWVZOH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C4VFW7GSA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O9XGULSNA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PJ1W1P5L9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MQQU30KPC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BNPIBZTYV', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7BNRCH922', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5AZLU117B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3PPJ49VJC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FMC8CQO74', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VCHW23CXJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1S4749PCB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0T9DJFZPK', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-L0RMV65W3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FZ1ZZUQNA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6IFJD23DI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZKUEP5XM0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z8KU62CLT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LO77J78X1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-27FBKYRJ4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0TDQTESGW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IMKI89WQ1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ANS65MIJS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O3T2OTT0Y', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MWW6Z1QVM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TT47MX0BB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-59CFKTM14', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DOD61TVZN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RH9Y298WS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X98AATJ7B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C3UMES1W8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8O3J7G3PT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3K4OH78OJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N4A3JMVL1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HK1SRLAC9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y9VLJJXVU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KTTH7R0EC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JKVX01E8T', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HW2VPAHJO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7WZ9UHBH8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JTKFK0CP7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-G3F27NXG5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-K7I2JWYSP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CTUU8UQ0T', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ISHMAP6RQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LVWNZ1LHP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N5CQ1YG2Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XL2XY2SLZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UCRZVFIV1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VLGQFYNEL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YPCDM9O5Y', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R6VA2S25E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4W8X8BBUL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5INDC8M80', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q8RKKOF29', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B5ED3EFBC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8IC5ZXAX1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCGM25D75', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1MO06IRID', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4QWGUGN6W', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5T9M5KEHT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9KG7W7NZF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NYGN5R2CL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H61OXFCJ2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-30WSQ4EFH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-J36NG6MY2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TZU34ZA7A', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LPWNYL74G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DDJTXOS4Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HFOS4S185', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2MLGIFL1M', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CI5AHX0TC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WSXVCH1WN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8F0C3D06T', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3YZR0664F', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1L2VMWTM0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KPMCB57O7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N6QXQDZV3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-35FAYFWDP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TVZWE2JR8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0WK86IKLF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8WBU6ESHJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGU6UNM01', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-86CXNEQZC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NZ4LFCHE3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZKB6D72RF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BKXNG77NS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3MJ332POD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SL9VEYNJ0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LY8KO43Z8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8KGF1XR1L', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WT6FB54HW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7UQ9JMG5E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KX3L2040U', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HL809F9WU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-T9GU40ERH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I5O2NX3G9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RVOUYU3NO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2T3ETNUKS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SW0B93DZZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PQ55UF3K6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VNRWWHHJB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Q26FZ7EP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZWD9FA90J', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QSGMQX3RZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-83NZ9MEAC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q9QQ4AL37', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QBE9KD60Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NRNUXUO44', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8F0XKQ9P8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-095JV1CJN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WRRSIRUTZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DTUD7IDQI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ASCEAI9LD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YOUM7BJZH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PEG2ZH9J3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WAUW31F78', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GIBZ6U7AQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-63TD9LFBG', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0MH1N9MJB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YFP9RNQIK', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SW4N5162D', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-53MWFB2MP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QWLUKBMIN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JHS6QIX9G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6R04HZ5UD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OUJLT31VN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6BMKW933S', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R4GG84E4Q', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-00XAP630X', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AK97MC2A0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NBS2GKGO5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NVFEWK4S5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MTV3WSYS1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JS8RM4JGW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6NL4QR1XN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4BUUQVSN6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-56WEAAU3M', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WCA56PFTF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TK1QAZP0B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SYZ5ADFXP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-S3VLOUW6G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V2K1V1JWJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MZ6VHV5PQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DRZDQDPN3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-231VVRYYA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-06G3MC88G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WS52I2ZVD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QTNQD55U', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EX7ALECU3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DQN8TVQY6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FJT54OQFI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BLTYUF7QR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8DL2JXDSO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KBAOXIJ4Y', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYL28R5UW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NCRDA8LYB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BQYKXQLXU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PSHCS65MR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-90Y1WFVYZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4GG33NUPW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5Y0A79GED', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RMZHTAD6J', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XBSOJ5I36', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AAJW53VNE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9NYSPSEL6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-94WMY337S', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-35Y3BJQFW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R7WY3TMRC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RXAQVH0F3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EFQ2AVFSB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XPOUJSAVD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RSHPF5NTT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z9402336V', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OI36C5WOJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XEOGV1LVS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QIQDXG9NC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9IY242GGT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PQTEUT52E', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-18NKI3WPS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-34U3QAA49', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-S05TYKBBF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B4J8WCWDD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HR377WC28', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PNRR7S1T2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UNR0N0KJ9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NARQQANBE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8PUL1MYUU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KJPCT1FP3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGC80N0AM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Y1JN8DH3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y56M31T07', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NHYHXQSV1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V30RZVG7L', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CR6V2GSOU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VSAF5O0LJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4F4BF2LFH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ERSMQHXNX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q0TFLZQWS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZXCS6SMHD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JHXYAUGRA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IT2XYWES2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-22UCD94OG', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VGDLQ3K35', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O8AFL8RGX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9RBIZ8G9X', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9JIU0SVBV', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PWRBG0GWU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZME1MX12T', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A7LWRKSJP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5XISX0HD4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5T6EXKD3Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-10ILV351B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FDULBZDIY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SFVR6I980', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FKV8DCZGT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ECRK3M3IZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WMKSLOF39', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HGE60O6AL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RGCWDJOT8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DESWK5KVN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RD593HJ92', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XWNCAO39B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AQ4XGDLX8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2ZOVEA1PL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JF4FUX83X', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CQ228GK3B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XCTMU9I7U', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-M3F37T22W', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ASZUXM9M9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CJVHX24WW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KZT4T898V', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RI128R5TE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HCAG6X9MJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XOQENWBP7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1LTQGM497', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZLVZT4O1G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FTIXTXCIA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O2YE6QNHY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0JPDDBF47', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H1FP1IFJI', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FYBPBMY5B', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F7BWDVC97', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MLENB1LQ4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FT9YNU8UP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5W21Q2O5L', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YM61QWPR3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XXFQJJHZM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H52YKCM9X', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NT56ZNZ54', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DRJY7BMB5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-P6886RPXX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PBXW2EY5K', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5VQCJTM36', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NMM3GGM1J', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1JQQ0CDSA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R89YI91K4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7L7L9MXOT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-50Z7MXKZS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EGADRZTIB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1DR7H46H6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O28QZL994', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EYTRWVERM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HAJZAUWJV', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AGYO3AB89', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V224329ZM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4YTMSFWYK', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QP40RL1N1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CB1BVAMAH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9VGXUY02O', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BCPVVKCZ3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VDC3CWZX7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7HWLI21FA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5QWIUJEFM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4C3PBMAIZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QC7DM7PH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A6R5G3FWV', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3A6WDE12Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0F2LZA9RU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGBJXMXRX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5YOGR8PQ1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LPBFBUF3N', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TUSPGR2AY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-G05I8M2FQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H5NDXJIAQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X8FJL8WQZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KIB2FQRUP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VNV0OYWR7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GKBPM3PB2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XVPI30KE7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y3PRMJP51', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KEPP5SBML', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0PDUZ6QEQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1GHWWFLLE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-149S2TO8O', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1ZB6FSIGE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VCRQTXDZL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-645JVC3XL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2HJ00JBSR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7FZDG2W65', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HD6ANE3LN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2HS1B1J8V', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IHOHYMDF5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYZMAFEKF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JO85WX5JE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RURJDCURW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FZC3BLXPJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B0YR2LOZ1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6ZFP58ZRK', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UMZDLHQ78', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8A2IHDXY3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EDYEPWUMT', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X3TM99R12', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DLV0UTQ72', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SFU0ZYIM0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YAJ6A66NH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X8CFU41AU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CJ3Z4WP32', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UJBLRQKXA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-T78R8EBGH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DDIH55GNZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F1SSD4NWF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4BE55FKRD', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BG2DFBL46', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MKVMNR7W4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C3Z4JUGU5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NVP8EEEGQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MQFWP2LU7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BH873JG6H', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3D76651SM', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KZ7V6KWMP', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CD8NEJFJN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PWXE9L30H', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1RT95F5LR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q7CEEROE5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q08APOAEG', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NNKREGLXE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YQEG33MKX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VRD9G7H5K', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-68R86GQ1G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BXZUKQL2M', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QM13UD73C', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I7OOGJLNS', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GXDBO1IQJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JJZPRFMWN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DBTDFITGW', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VYHL6ASIJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F61NNU332', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6Z9H72KMC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WVOONDMA9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CXTSTBXN3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CSMZLC921', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3FTBSARZJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ESHGKBXLY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-E0YLXW5H4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QFI6UMWE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-23VOWHO88', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-02Q9U5QCH', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3POMNSMB0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NTT8OWUFQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MT5XEHJWX', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ILDOY0PCQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MN9HU4DGO', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1YOPU7GLL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZC5BM5MYB', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UD3FXK3I9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LMDJOV52Y', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N45X16BSL', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1CBY3Z7QC', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F0D3AO9E6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YQA8GUOD1', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2EE999233', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D6GD5WT2Y', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7DFBMLTMY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-J6TJKC6VJ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2AWQ3ZRF4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4KOVIF5W3', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3489SXI1U', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F37VKUHVE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GHBMAVCE4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-W64U46547', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1GUJV1MGQ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-M13IOZVI9', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TX7EVZN1Z', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2PTS2ML8J', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VTG83RVX7', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2IOE6BR66', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I68XZMR23', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q940UN6MU', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Y9NFR0N0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MYP341DZ8', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WJ68VGKAZ', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-819MSDR9H', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-27CGND4VG', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YYDOD47BF', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YU7F6J8G5', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HMY16WTCA', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FPWEBRLG2', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FGE79X0DE', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-551LMZ84R', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UWMBDCTX4', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BNHQXW9HY', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WB0P5LCN6', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z4P9E1SVG', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UVW2G9IRN', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OO93WVLB0', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NTRIK82TG', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8TXV42S74', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z74LKL50G', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QQEXNIPTR', false, 1),
|
||||
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WGK2VD34L', false, 1)
|
||||
ON CONFLICT DO NOTHING;
|
||||
@@ -1,81 +0,0 @@
|
||||
-- Migration: Auto-retry failed proxies after cooldown period
|
||||
-- Proxies that fail will be retried after a configurable interval
|
||||
|
||||
-- Add last_failed_at column to track when proxy last failed
|
||||
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS last_failed_at TIMESTAMP;
|
||||
|
||||
-- Add retry settings
|
||||
INSERT INTO settings (key, value, description)
|
||||
VALUES
|
||||
('proxy_retry_interval_hours', '4', 'Hours to wait before retrying a failed proxy'),
|
||||
('proxy_max_failures_before_permanent', '10', 'Max failures before proxy is permanently disabled')
|
||||
ON CONFLICT (key) DO NOTHING;
|
||||
|
||||
-- Create function to get eligible proxies (active OR failed but past retry interval)
|
||||
CREATE OR REPLACE FUNCTION get_eligible_proxy_ids()
|
||||
RETURNS TABLE(proxy_id INT) AS $$
|
||||
DECLARE
|
||||
retry_hours INT;
|
||||
BEGIN
|
||||
-- Get retry interval from settings (default 4 hours)
|
||||
SELECT COALESCE(value::int, 4) INTO retry_hours
|
||||
FROM settings WHERE key = 'proxy_retry_interval_hours';
|
||||
|
||||
RETURN QUERY
|
||||
SELECT p.id
|
||||
FROM proxies p
|
||||
WHERE p.active = true
|
||||
OR (
|
||||
p.active = false
|
||||
AND p.last_failed_at IS NOT NULL
|
||||
AND p.last_failed_at < NOW() - (retry_hours || ' hours')::interval
|
||||
AND p.failure_count < 10 -- Don't retry if too many failures
|
||||
)
|
||||
ORDER BY
|
||||
p.active DESC, -- Prefer active proxies
|
||||
p.failure_count ASC, -- Then prefer proxies with fewer failures
|
||||
RANDOM();
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create scheduled job to periodically re-enable proxies past their retry window
|
||||
-- This runs every hour and marks proxies as active if they're past retry interval
|
||||
CREATE OR REPLACE FUNCTION auto_reenable_proxies()
|
||||
RETURNS INT AS $$
|
||||
DECLARE
|
||||
retry_hours INT;
|
||||
max_failures INT;
|
||||
reenabled_count INT;
|
||||
BEGIN
|
||||
-- Get settings
|
||||
SELECT COALESCE(value::int, 4) INTO retry_hours
|
||||
FROM settings WHERE key = 'proxy_retry_interval_hours';
|
||||
|
||||
SELECT COALESCE(value::int, 10) INTO max_failures
|
||||
FROM settings WHERE key = 'proxy_max_failures_before_permanent';
|
||||
|
||||
-- Re-enable proxies that have cooled down
|
||||
UPDATE proxies
|
||||
SET active = true,
|
||||
updated_at = NOW()
|
||||
WHERE active = false
|
||||
AND last_failed_at IS NOT NULL
|
||||
AND last_failed_at < NOW() - (retry_hours || ' hours')::interval
|
||||
AND failure_count < max_failures;
|
||||
|
||||
GET DIAGNOSTICS reenabled_count = ROW_COUNT;
|
||||
|
||||
IF reenabled_count > 0 THEN
|
||||
RAISE NOTICE 'Auto-reenabled % proxies after % hour cooldown', reenabled_count, retry_hours;
|
||||
END IF;
|
||||
|
||||
RETURN reenabled_count;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Add index for efficient querying
|
||||
CREATE INDEX IF NOT EXISTS idx_proxies_retry
|
||||
ON proxies(active, last_failed_at, failure_count);
|
||||
|
||||
COMMENT ON COLUMN proxies.last_failed_at IS 'Timestamp of last failure - used for auto-retry logic';
|
||||
COMMENT ON FUNCTION auto_reenable_proxies() IS 'Call periodically to re-enable failed proxies that have cooled down';
|
||||
@@ -1,20 +0,0 @@
|
||||
-- Migration: Add trigram indexes for fast ILIKE product searches
|
||||
-- Enables fast searches on name_raw, brand_name_raw, and description
|
||||
|
||||
-- Enable pg_trgm extension if not already enabled
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
|
||||
-- Create GIN trigram indexes for fast ILIKE searches
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_name_trgm
|
||||
ON store_products USING gin (name_raw gin_trgm_ops);
|
||||
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_brand_name_trgm
|
||||
ON store_products USING gin (brand_name_raw gin_trgm_ops);
|
||||
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_description_trgm
|
||||
ON store_products USING gin (description gin_trgm_ops);
|
||||
|
||||
-- Add comment
|
||||
COMMENT ON INDEX idx_store_products_name_trgm IS 'Trigram index for fast ILIKE searches on product name';
|
||||
COMMENT ON INDEX idx_store_products_brand_name_trgm IS 'Trigram index for fast ILIKE searches on brand name';
|
||||
COMMENT ON INDEX idx_store_products_description_trgm IS 'Trigram index for fast ILIKE searches on description';
|
||||
@@ -1,11 +0,0 @@
|
||||
-- Migration: Add indexes for dashboard performance
|
||||
-- Speeds up the tasks listing query with ORDER BY and JOIN
|
||||
|
||||
-- Index for JOIN with worker_registry
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_worker_tasks_worker_id
|
||||
ON worker_tasks(worker_id)
|
||||
WHERE worker_id IS NOT NULL;
|
||||
|
||||
-- Index for ORDER BY created_at DESC (dashboard listing)
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_worker_tasks_created_at_desc
|
||||
ON worker_tasks(created_at DESC);
|
||||
@@ -1,13 +0,0 @@
|
||||
-- Migration: Add stage tracking columns to dispensaries table
|
||||
-- Required for stage checkpoint feature in task handlers
|
||||
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_successes INTEGER DEFAULT 0;
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_failures INTEGER DEFAULT 0;
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_successful_crawl_at TIMESTAMPTZ;
|
||||
|
||||
-- Indexes for finding stores by status
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_consecutive_successes
|
||||
ON dispensaries(consecutive_successes) WHERE consecutive_successes > 0;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_consecutive_failures
|
||||
ON dispensaries(consecutive_failures) WHERE consecutive_failures > 0;
|
||||
@@ -1,68 +0,0 @@
|
||||
-- Migration: 099_working_hours.sql
|
||||
-- Description: Working hours profiles for natural traffic pattern simulation
|
||||
-- Created: 2024-12-13
|
||||
|
||||
-- Working hours table: defines hourly activity weights to mimic natural traffic
|
||||
CREATE TABLE IF NOT EXISTS working_hours (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(50) UNIQUE NOT NULL,
|
||||
description TEXT,
|
||||
|
||||
-- Hour weights: {"0": 15, "1": 5, ..., "18": 100, ...}
|
||||
-- Value = percent chance to trigger activity that hour (0-100)
|
||||
hour_weights JSONB NOT NULL,
|
||||
|
||||
-- Day-of-week multipliers (0=Sunday, 6=Saturday)
|
||||
-- Optional adjustment for weekend vs weekday patterns
|
||||
dow_weights JSONB DEFAULT '{"0": 90, "1": 100, "2": 100, "3": 100, "4": 100, "5": 110, "6": 95}',
|
||||
|
||||
timezone VARCHAR(50) DEFAULT 'America/Phoenix',
|
||||
enabled BOOLEAN DEFAULT true,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Seed: Natural traffic pattern based on internet usage research
|
||||
-- Optimized for cannabis dispensary browsing (lunch + after-work peaks)
|
||||
INSERT INTO working_hours (name, description, timezone, hour_weights) VALUES (
|
||||
'natural_traffic',
|
||||
'Mimics natural user browsing patterns - peaks at lunch and 5-7 PM',
|
||||
'America/Phoenix',
|
||||
'{
|
||||
"0": 15,
|
||||
"1": 5,
|
||||
"2": 5,
|
||||
"3": 5,
|
||||
"4": 5,
|
||||
"5": 10,
|
||||
"6": 20,
|
||||
"7": 30,
|
||||
"8": 35,
|
||||
"9": 45,
|
||||
"10": 50,
|
||||
"11": 60,
|
||||
"12": 75,
|
||||
"13": 65,
|
||||
"14": 60,
|
||||
"15": 70,
|
||||
"16": 80,
|
||||
"17": 95,
|
||||
"18": 100,
|
||||
"19": 100,
|
||||
"20": 90,
|
||||
"21": 70,
|
||||
"22": 45,
|
||||
"23": 25
|
||||
}'::jsonb
|
||||
) ON CONFLICT (name) DO UPDATE SET
|
||||
hour_weights = EXCLUDED.hour_weights,
|
||||
description = EXCLUDED.description,
|
||||
updated_at = NOW();
|
||||
|
||||
-- Index for quick lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_working_hours_name ON working_hours(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_working_hours_enabled ON working_hours(enabled);
|
||||
|
||||
COMMENT ON TABLE working_hours IS 'Activity profiles for natural traffic simulation. Hour weights are percent chance (0-100) to trigger activity.';
|
||||
COMMENT ON COLUMN working_hours.hour_weights IS 'JSON object mapping hour (0-23) to percent chance (0-100). 100 = always run, 0 = never run.';
|
||||
COMMENT ON COLUMN working_hours.dow_weights IS 'Optional day-of-week multipliers. 0=Sunday. Applied as (hour_weight * dow_weight / 100).';
|
||||
@@ -1,19 +0,0 @@
|
||||
-- Migration: 100_worker_timezone.sql
|
||||
-- Description: Add timezone column to worker_registry for working hours support
|
||||
-- Created: 2024-12-13
|
||||
|
||||
-- Add timezone column to worker_registry
|
||||
-- Populated from preflight IP geolocation (e.g., 'America/New_York')
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||
|
||||
-- Add working_hours_id to link worker to a specific working hours profile
|
||||
-- NULL means use default 'natural_traffic' profile
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS working_hours_id INTEGER REFERENCES working_hours(id);
|
||||
|
||||
-- Index for workers by timezone (useful for capacity planning)
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_registry_timezone ON worker_registry(timezone);
|
||||
|
||||
COMMENT ON COLUMN worker_registry.timezone IS 'IANA timezone from preflight IP geolocation (e.g., America/New_York)';
|
||||
COMMENT ON COLUMN worker_registry.working_hours_id IS 'Reference to working_hours profile. NULL uses default natural_traffic.';
|
||||
@@ -1,78 +0,0 @@
|
||||
-- Migration: 101_worker_preflight_timezone.sql
|
||||
-- Description: Update update_worker_preflight to extract timezone from fingerprint
|
||||
-- Created: 2024-12-13
|
||||
|
||||
CREATE OR REPLACE FUNCTION public.update_worker_preflight(
|
||||
p_worker_id character varying,
|
||||
p_transport character varying,
|
||||
p_status character varying,
|
||||
p_ip character varying DEFAULT NULL,
|
||||
p_response_ms integer DEFAULT NULL,
|
||||
p_error text DEFAULT NULL,
|
||||
p_fingerprint jsonb DEFAULT NULL
|
||||
)
|
||||
RETURNS void
|
||||
LANGUAGE plpgsql
|
||||
AS $function$
|
||||
DECLARE
|
||||
v_curl_status VARCHAR(20);
|
||||
v_http_status VARCHAR(20);
|
||||
v_overall_status VARCHAR(20);
|
||||
v_timezone VARCHAR(50);
|
||||
BEGIN
|
||||
IF p_transport = 'curl' THEN
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_curl_status = p_status,
|
||||
preflight_curl_at = NOW(),
|
||||
preflight_curl_ms = p_response_ms,
|
||||
preflight_curl_error = p_error,
|
||||
curl_ip = p_ip,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
ELSIF p_transport = 'http' THEN
|
||||
-- Extract timezone from fingerprint JSON if present
|
||||
v_timezone := p_fingerprint->>'detectedTimezone';
|
||||
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_http_status = p_status,
|
||||
preflight_http_at = NOW(),
|
||||
preflight_http_ms = p_response_ms,
|
||||
preflight_http_error = p_error,
|
||||
http_ip = p_ip,
|
||||
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
|
||||
-- Save extracted timezone
|
||||
timezone = COALESCE(v_timezone, timezone),
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
END IF;
|
||||
|
||||
-- Update overall preflight status
|
||||
SELECT preflight_curl_status, preflight_http_status
|
||||
INTO v_curl_status, v_http_status
|
||||
FROM worker_registry
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
-- Compute overall status
|
||||
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
|
||||
v_overall_status := 'passed';
|
||||
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
|
||||
v_overall_status := 'partial';
|
||||
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
|
||||
v_overall_status := 'failed';
|
||||
ELSE
|
||||
v_overall_status := 'pending';
|
||||
END IF;
|
||||
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
preflight_status = v_overall_status,
|
||||
preflight_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
END;
|
||||
$function$;
|
||||
|
||||
COMMENT ON FUNCTION update_worker_preflight(varchar, varchar, varchar, varchar, integer, text, jsonb)
|
||||
IS 'Updates worker preflight status and extracts timezone from fingerprint for working hours';
|
||||
@@ -1,114 +0,0 @@
|
||||
-- Migration: 102_check_working_hours.sql
|
||||
-- Description: Function to check if worker should be available based on working hours
|
||||
-- Created: 2024-12-13
|
||||
|
||||
-- Function to check if a worker should be available for work
|
||||
-- Returns TRUE if worker passes the probability check for current hour
|
||||
-- Returns FALSE if worker should sleep/skip this cycle
|
||||
CREATE OR REPLACE FUNCTION check_working_hours(
|
||||
p_worker_id VARCHAR,
|
||||
p_profile_name VARCHAR DEFAULT 'natural_traffic'
|
||||
)
|
||||
RETURNS TABLE (
|
||||
is_available BOOLEAN,
|
||||
current_hour INTEGER,
|
||||
hour_weight INTEGER,
|
||||
worker_timezone VARCHAR,
|
||||
roll INTEGER,
|
||||
reason TEXT
|
||||
)
|
||||
LANGUAGE plpgsql
|
||||
AS $function$
|
||||
DECLARE
|
||||
v_timezone VARCHAR(50);
|
||||
v_hour INTEGER;
|
||||
v_weight INTEGER;
|
||||
v_dow INTEGER;
|
||||
v_dow_weight INTEGER;
|
||||
v_final_weight INTEGER;
|
||||
v_roll INTEGER;
|
||||
v_hour_weights JSONB;
|
||||
v_dow_weights JSONB;
|
||||
v_profile_enabled BOOLEAN;
|
||||
BEGIN
|
||||
-- Get worker's timezone (from preflight)
|
||||
SELECT wr.timezone INTO v_timezone
|
||||
FROM worker_registry wr
|
||||
WHERE wr.worker_id = p_worker_id;
|
||||
|
||||
-- Default to America/Phoenix if no timezone set
|
||||
v_timezone := COALESCE(v_timezone, 'America/Phoenix');
|
||||
|
||||
-- Get current hour in worker's timezone
|
||||
v_hour := EXTRACT(HOUR FROM NOW() AT TIME ZONE v_timezone)::INTEGER;
|
||||
|
||||
-- Get day of week (0=Sunday)
|
||||
v_dow := EXTRACT(DOW FROM NOW() AT TIME ZONE v_timezone)::INTEGER;
|
||||
|
||||
-- Get working hours profile
|
||||
SELECT wh.hour_weights, wh.dow_weights, wh.enabled
|
||||
INTO v_hour_weights, v_dow_weights, v_profile_enabled
|
||||
FROM working_hours wh
|
||||
WHERE wh.name = p_profile_name AND wh.enabled = true;
|
||||
|
||||
-- If profile not found or disabled, always available
|
||||
IF v_hour_weights IS NULL THEN
|
||||
RETURN QUERY SELECT
|
||||
TRUE::BOOLEAN,
|
||||
v_hour,
|
||||
100::INTEGER,
|
||||
v_timezone,
|
||||
0::INTEGER,
|
||||
'Profile not found or disabled - defaulting to available'::TEXT;
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
-- Get hour weight (default to 50 if hour not specified)
|
||||
v_weight := COALESCE((v_hour_weights->>v_hour::TEXT)::INTEGER, 50);
|
||||
|
||||
-- Get day-of-week weight (default to 100)
|
||||
v_dow_weight := COALESCE((v_dow_weights->>v_dow::TEXT)::INTEGER, 100);
|
||||
|
||||
-- Calculate final weight (hour_weight * dow_weight / 100)
|
||||
v_final_weight := (v_weight * v_dow_weight / 100);
|
||||
|
||||
-- Roll the dice (0-99)
|
||||
v_roll := floor(random() * 100)::INTEGER;
|
||||
|
||||
-- Return result
|
||||
RETURN QUERY SELECT
|
||||
(v_roll < v_final_weight)::BOOLEAN AS is_available,
|
||||
v_hour AS current_hour,
|
||||
v_final_weight AS hour_weight,
|
||||
v_timezone AS worker_timezone,
|
||||
v_roll AS roll,
|
||||
CASE
|
||||
WHEN v_roll < v_final_weight THEN
|
||||
format('Available: rolled %s < %s%% threshold', v_roll, v_final_weight)
|
||||
ELSE
|
||||
format('Sleeping: rolled %s >= %s%% threshold', v_roll, v_final_weight)
|
||||
END AS reason;
|
||||
END;
|
||||
$function$;
|
||||
|
||||
-- Simplified version that just returns boolean
|
||||
CREATE OR REPLACE FUNCTION is_worker_available(
|
||||
p_worker_id VARCHAR,
|
||||
p_profile_name VARCHAR DEFAULT 'natural_traffic'
|
||||
)
|
||||
RETURNS BOOLEAN
|
||||
LANGUAGE plpgsql
|
||||
AS $function$
|
||||
DECLARE
|
||||
v_result BOOLEAN;
|
||||
BEGIN
|
||||
SELECT is_available INTO v_result
|
||||
FROM check_working_hours(p_worker_id, p_profile_name);
|
||||
RETURN COALESCE(v_result, TRUE);
|
||||
END;
|
||||
$function$;
|
||||
|
||||
COMMENT ON FUNCTION check_working_hours(VARCHAR, VARCHAR) IS
|
||||
'Check if worker should be available based on working hours profile. Returns detailed info.';
|
||||
COMMENT ON FUNCTION is_worker_available(VARCHAR, VARCHAR) IS
|
||||
'Simple boolean check if worker passes working hours probability roll.';
|
||||
@@ -1,12 +0,0 @@
|
||||
-- Migration: 103_schedule_dispensary_id.sql
|
||||
-- Description: Add dispensary_id to task_schedules for per-store schedules
|
||||
-- Created: 2025-12-13
|
||||
|
||||
-- Add dispensary_id column for single-store schedules
|
||||
ALTER TABLE task_schedules
|
||||
ADD COLUMN IF NOT EXISTS dispensary_id INTEGER REFERENCES dispensaries(id);
|
||||
|
||||
-- Index for quick lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_task_schedules_dispensary_id ON task_schedules(dispensary_id);
|
||||
|
||||
COMMENT ON COLUMN task_schedules.dispensary_id IS 'For single-store schedules. If set, only this store is refreshed. If NULL, uses state_code for all stores in state.';
|
||||
@@ -1,25 +0,0 @@
|
||||
-- Migration 104: Add source tracking to worker_tasks
|
||||
-- Purpose: Track WHERE tasks are created from (schedule vs API endpoint)
|
||||
--
|
||||
-- All automated task creation should be visible in task_schedules.
|
||||
-- This column helps identify "phantom" tasks created outside the schedule system.
|
||||
|
||||
-- Add source column to worker_tasks
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS source VARCHAR(100);
|
||||
|
||||
-- Add source_id column (references schedule_id if from a schedule)
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS source_schedule_id INTEGER REFERENCES task_schedules(id);
|
||||
|
||||
-- Add request metadata (IP, user agent) for debugging
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS source_metadata JSONB;
|
||||
|
||||
-- Create index for querying by source
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_tasks_source ON worker_tasks(source);
|
||||
|
||||
-- Comment explaining source values
|
||||
COMMENT ON COLUMN worker_tasks.source IS 'Task creation source: schedule, api_run_now, api_crawl_state, api_batch_staggered, api_batch_az_stores, task_chain, manual';
|
||||
COMMENT ON COLUMN worker_tasks.source_schedule_id IS 'ID of the schedule that created this task (if source=schedule or source=api_run_now)';
|
||||
COMMENT ON COLUMN worker_tasks.source_metadata IS 'Request metadata: {ip, user_agent, endpoint, timestamp}';
|
||||
@@ -1,25 +0,0 @@
|
||||
-- Migration 105: Add indexes for dashboard performance
|
||||
-- Purpose: Speed up the /dashboard and /national/summary endpoints
|
||||
--
|
||||
-- These queries were identified as slow:
|
||||
-- 1. COUNT(*) FROM store_product_snapshots WHERE captured_at >= NOW() - INTERVAL '24 hours'
|
||||
-- 2. National summary aggregate queries
|
||||
|
||||
-- Index for snapshot counts by time (used in dashboard)
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_product_snapshots_captured_at
|
||||
ON store_product_snapshots(captured_at DESC);
|
||||
|
||||
-- Index for crawl traces by time and success (used in dashboard)
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_crawl_traces_started_success
|
||||
ON crawl_orchestration_traces(started_at DESC, success);
|
||||
|
||||
-- Partial index for recent failed crawls (faster for dashboard alerts)
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_crawl_traces_recent_failures
|
||||
ON crawl_orchestration_traces(started_at DESC)
|
||||
WHERE success = false;
|
||||
|
||||
-- Composite index for store_products aggregations by dispensary
|
||||
-- Helps with national summary state metrics query
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_dispensary_brand
|
||||
ON store_products(dispensary_id, brand_name_raw)
|
||||
WHERE brand_name_raw IS NOT NULL;
|
||||
@@ -1,10 +0,0 @@
|
||||
-- Migration: 106_rename_store_discovery_schedule.sql
|
||||
-- Description: Rename store_discovery_dutchie to 'Store Discovery'
|
||||
-- Created: 2025-12-13
|
||||
|
||||
-- Update the schedule name for better display
|
||||
-- The platform='dutchie' field is preserved for badge display in UI
|
||||
UPDATE task_schedules
|
||||
SET name = 'Store Discovery',
|
||||
updated_at = NOW()
|
||||
WHERE name = 'store_discovery_dutchie';
|
||||
@@ -1,23 +0,0 @@
|
||||
-- Migration: 107_proxy_tracking.sql
|
||||
-- Description: Add proxy tracking columns to worker_tasks for geo-targeting visibility
|
||||
-- Created: 2025-12-13
|
||||
|
||||
-- Add proxy tracking columns to worker_tasks
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
|
||||
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
|
||||
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS proxy_source VARCHAR(10);
|
||||
|
||||
-- Comments
|
||||
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'IP address of proxy used for this task';
|
||||
COMMENT ON COLUMN worker_tasks.proxy_geo IS 'Geo target used (e.g., "arizona", "phoenix, arizona")';
|
||||
COMMENT ON COLUMN worker_tasks.proxy_source IS 'Source of proxy: "api" (Evomi dynamic) or "static" (fallback table)';
|
||||
|
||||
-- Index for proxy analysis
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip
|
||||
ON worker_tasks(proxy_ip)
|
||||
WHERE proxy_ip IS NOT NULL;
|
||||
@@ -1,231 +0,0 @@
|
||||
-- Migration: 108_worker_geo_sessions.sql
|
||||
-- Description: Add geo session tracking to worker_registry for state-based task assignment
|
||||
-- Created: 2025-12-13
|
||||
|
||||
-- Worker geo session columns
|
||||
-- Worker qualifies with a geo (state/city), then only claims tasks matching that geo
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS current_state VARCHAR(2);
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS current_city VARCHAR(100);
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS geo_session_started_at TIMESTAMPTZ;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS session_task_count INT DEFAULT 0;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS session_max_tasks INT DEFAULT 7;
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
|
||||
|
||||
-- Comments
|
||||
COMMENT ON COLUMN worker_registry.current_state IS 'Worker''s current geo assignment (US state code, e.g., AZ)';
|
||||
COMMENT ON COLUMN worker_registry.current_city IS 'Worker''s current city assignment (optional, e.g., phoenix)';
|
||||
COMMENT ON COLUMN worker_registry.geo_session_started_at IS 'When worker''s current geo session started';
|
||||
COMMENT ON COLUMN worker_registry.session_task_count IS 'Number of tasks completed in current geo session';
|
||||
COMMENT ON COLUMN worker_registry.session_max_tasks IS 'Max tasks per geo session before re-qualification (default 7)';
|
||||
COMMENT ON COLUMN worker_registry.proxy_geo IS 'Geo target string used for proxy (e.g., "arizona" or "phoenix, arizona")';
|
||||
|
||||
-- Index for finding workers by state
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_registry_current_state
|
||||
ON worker_registry(current_state)
|
||||
WHERE current_state IS NOT NULL;
|
||||
|
||||
-- ============================================================
|
||||
-- UPDATED claim_task FUNCTION
|
||||
-- Now filters by worker's geo session state
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION claim_task(
|
||||
p_role VARCHAR(50),
|
||||
p_worker_id VARCHAR(100),
|
||||
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||
p_http_passed BOOLEAN DEFAULT FALSE
|
||||
) RETURNS worker_tasks AS $$
|
||||
DECLARE
|
||||
claimed_task worker_tasks;
|
||||
worker_state VARCHAR(2);
|
||||
session_valid BOOLEAN;
|
||||
session_tasks INT;
|
||||
max_tasks INT;
|
||||
BEGIN
|
||||
-- Get worker's current geo session info
|
||||
SELECT
|
||||
current_state,
|
||||
session_task_count,
|
||||
session_max_tasks,
|
||||
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
|
||||
INTO worker_state, session_tasks, max_tasks, session_valid
|
||||
FROM worker_registry
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
-- If no valid geo session, or session exhausted, worker can't claim tasks
|
||||
-- Worker must re-qualify first
|
||||
IF worker_state IS NULL OR NOT session_valid OR session_tasks >= COALESCE(max_tasks, 7) THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Claim task matching worker's state
|
||||
UPDATE worker_tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
worker_id = p_worker_id,
|
||||
claimed_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT wt.id FROM worker_tasks wt
|
||||
JOIN dispensaries d ON wt.dispensary_id = d.id
|
||||
WHERE wt.role = p_role
|
||||
AND wt.status = 'pending'
|
||||
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
|
||||
-- GEO FILTER: Task's dispensary must match worker's state
|
||||
AND d.state = worker_state
|
||||
-- Method compatibility: worker must have passed the required preflight
|
||||
AND (
|
||||
wt.method IS NULL -- No preference, any worker can claim
|
||||
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
|
||||
OR (wt.method = 'http' AND p_http_passed = TRUE)
|
||||
)
|
||||
-- Exclude stores that already have an active task
|
||||
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
|
||||
SELECT dispensary_id FROM worker_tasks
|
||||
WHERE status IN ('claimed', 'running')
|
||||
AND dispensary_id IS NOT NULL
|
||||
))
|
||||
ORDER BY wt.priority DESC, wt.created_at ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_task;
|
||||
|
||||
-- If task claimed, increment session task count
|
||||
-- Note: Use claimed_task.id IS NOT NULL (not claimed_task IS NOT NULL)
|
||||
-- PostgreSQL composite type NULL check quirk
|
||||
IF claimed_task.id IS NOT NULL THEN
|
||||
UPDATE worker_registry
|
||||
SET session_task_count = session_task_count + 1
|
||||
WHERE worker_id = p_worker_id;
|
||||
END IF;
|
||||
|
||||
RETURN claimed_task;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: assign_worker_geo
|
||||
-- Assigns a geo session to a worker based on demand
|
||||
-- Returns the assigned state, or NULL if no tasks available
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION assign_worker_geo(
|
||||
p_worker_id VARCHAR(100)
|
||||
) RETURNS VARCHAR(2) AS $$
|
||||
DECLARE
|
||||
assigned_state VARCHAR(2);
|
||||
BEGIN
|
||||
-- Find state with highest demand (pending tasks) and lowest coverage (workers)
|
||||
SELECT d.state INTO assigned_state
|
||||
FROM dispensaries d
|
||||
JOIN worker_tasks wt ON wt.dispensary_id = d.id
|
||||
LEFT JOIN worker_registry wr ON wr.current_state = d.state
|
||||
AND wr.status = 'active'
|
||||
AND wr.geo_session_started_at > NOW() - INTERVAL '60 minutes'
|
||||
WHERE wt.status = 'pending'
|
||||
AND d.platform_dispensary_id IS NOT NULL
|
||||
GROUP BY d.state
|
||||
ORDER BY
|
||||
COUNT(wt.id) DESC, -- Most pending tasks first
|
||||
COUNT(DISTINCT wr.worker_id) ASC -- Fewest workers second
|
||||
LIMIT 1;
|
||||
|
||||
-- If no pending tasks anywhere, return NULL
|
||||
IF assigned_state IS NULL THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Assign the state to this worker
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
current_state = assigned_state,
|
||||
current_city = NULL, -- City assigned later if available
|
||||
geo_session_started_at = NOW(),
|
||||
session_task_count = 0
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
RETURN assigned_state;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: check_worker_geo_session
|
||||
-- Returns info about worker's current geo session
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION check_worker_geo_session(
|
||||
p_worker_id VARCHAR(100)
|
||||
) RETURNS TABLE (
|
||||
current_state VARCHAR(2),
|
||||
current_city VARCHAR(100),
|
||||
session_valid BOOLEAN,
|
||||
session_tasks_remaining INT,
|
||||
session_minutes_remaining INT
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
wr.current_state,
|
||||
wr.current_city,
|
||||
(wr.geo_session_started_at IS NOT NULL AND wr.geo_session_started_at > NOW() - INTERVAL '60 minutes') as session_valid,
|
||||
GREATEST(0, wr.session_max_tasks - wr.session_task_count) as session_tasks_remaining,
|
||||
GREATEST(0, EXTRACT(EPOCH FROM (wr.geo_session_started_at + INTERVAL '60 minutes' - NOW())) / 60)::INT as session_minutes_remaining
|
||||
FROM worker_registry wr
|
||||
WHERE wr.worker_id = p_worker_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- View for worker thinness per state
|
||||
-- Derives states from dispensaries table - no external states table dependency
|
||||
CREATE OR REPLACE VIEW worker_state_capacity AS
|
||||
WITH active_states AS (
|
||||
-- Get unique states from dispensaries with valid platform IDs
|
||||
SELECT DISTINCT state as code
|
||||
FROM dispensaries
|
||||
WHERE state IS NOT NULL
|
||||
AND platform_dispensary_id IS NOT NULL
|
||||
),
|
||||
pending_by_state AS (
|
||||
SELECT d.state, COUNT(*) as count
|
||||
FROM worker_tasks t
|
||||
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||
WHERE t.status = 'pending'
|
||||
AND d.state IS NOT NULL
|
||||
GROUP BY d.state
|
||||
),
|
||||
workers_by_state AS (
|
||||
SELECT
|
||||
current_state,
|
||||
COUNT(*) as count,
|
||||
SUM(GREATEST(0, session_max_tasks - session_task_count)) as remaining_capacity
|
||||
FROM worker_registry
|
||||
WHERE status IN ('active', 'idle') -- Include both active and idle workers
|
||||
AND preflight_http_status = 'passed'
|
||||
AND current_state IS NOT NULL
|
||||
AND geo_session_started_at > NOW() - INTERVAL '60 minutes'
|
||||
GROUP BY current_state
|
||||
)
|
||||
SELECT
|
||||
s.code as state,
|
||||
s.code as state_name, -- Use code as name since we don't have a states lookup table
|
||||
COALESCE(p.count, 0) as pending_tasks,
|
||||
COALESCE(w.count, 0) as workers_on_state,
|
||||
COALESCE(w.remaining_capacity, 0) as remaining_capacity,
|
||||
CASE
|
||||
WHEN COALESCE(w.remaining_capacity, 0) = 0 AND COALESCE(p.count, 0) > 0 THEN 'no_coverage'
|
||||
WHEN COALESCE(w.remaining_capacity, 0) < COALESCE(p.count, 0) THEN 'thin'
|
||||
ELSE 'ok'
|
||||
END as status
|
||||
FROM active_states s
|
||||
LEFT JOIN pending_by_state p ON p.state = s.code
|
||||
LEFT JOIN workers_by_state w ON w.current_state = s.code
|
||||
ORDER BY COALESCE(p.count, 0) DESC;
|
||||
@@ -1,354 +0,0 @@
|
||||
-- Migration: 109_worker_identity_pool.sql
|
||||
-- Description: Identity pool for diverse IP/fingerprint rotation
|
||||
-- Created: 2025-12-14
|
||||
--
|
||||
-- Workers claim identities (IP + fingerprint) from pool.
|
||||
-- Each identity used for 3-5 tasks, then cools down 2-3 hours.
|
||||
-- This creates natural browsing patterns - same person doesn't hit 20 stores.
|
||||
|
||||
-- ============================================================
|
||||
-- IDENTITY POOL TABLE
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS worker_identities (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Evomi session controls the IP
|
||||
session_id VARCHAR(100) UNIQUE NOT NULL,
|
||||
|
||||
-- Detected IP from this session
|
||||
ip_address INET,
|
||||
|
||||
-- Geo targeting
|
||||
state_code VARCHAR(2) NOT NULL,
|
||||
city VARCHAR(100), -- City-level targeting for diversity
|
||||
|
||||
-- Fingerprint data (UA, timezone, locale, device, etc.)
|
||||
fingerprint JSONB NOT NULL,
|
||||
|
||||
-- Timestamps
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
last_used_at TIMESTAMPTZ,
|
||||
cooldown_until TIMESTAMPTZ, -- Can't reuse until this time
|
||||
|
||||
-- Usage stats
|
||||
total_tasks_completed INT DEFAULT 0,
|
||||
total_sessions INT DEFAULT 1, -- How many times this identity has been used
|
||||
|
||||
-- Current state
|
||||
is_active BOOLEAN DEFAULT FALSE, -- Currently claimed by a worker
|
||||
active_worker_id VARCHAR(100), -- Which worker has it
|
||||
|
||||
-- Health tracking
|
||||
consecutive_failures INT DEFAULT 0,
|
||||
is_healthy BOOLEAN DEFAULT TRUE -- Set false if IP gets blocked
|
||||
);
|
||||
|
||||
-- Indexes for efficient lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_identities_state_city
|
||||
ON worker_identities(state_code, city);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_identities_available
|
||||
ON worker_identities(state_code, is_active, cooldown_until)
|
||||
WHERE is_healthy = TRUE;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_identities_cooldown
|
||||
ON worker_identities(cooldown_until)
|
||||
WHERE is_healthy = TRUE AND is_active = FALSE;
|
||||
|
||||
-- ============================================================
|
||||
-- METRO AREA MAPPING
|
||||
-- For fallback when exact city not available
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS metro_areas (
|
||||
id SERIAL PRIMARY KEY,
|
||||
metro_name VARCHAR(100) NOT NULL,
|
||||
state_code VARCHAR(2) NOT NULL,
|
||||
city VARCHAR(100) NOT NULL,
|
||||
is_primary BOOLEAN DEFAULT FALSE, -- Primary city of the metro
|
||||
UNIQUE(state_code, city)
|
||||
);
|
||||
|
||||
-- Phoenix Metro Area
|
||||
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||
('Phoenix Metro', 'AZ', 'Phoenix', TRUE),
|
||||
('Phoenix Metro', 'AZ', 'Mesa', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Glendale', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Tempe', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Scottsdale', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Chandler', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Peoria', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'El Mirage', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Tolleson', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Sun City', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Apache Junction', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Cave Creek', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Gilbert', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Surprise', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Avondale', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Goodyear', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Buckeye', FALSE),
|
||||
('Phoenix Metro', 'AZ', 'Queen Creek', FALSE)
|
||||
ON CONFLICT (state_code, city) DO NOTHING;
|
||||
|
||||
-- Tucson Metro Area
|
||||
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||
('Tucson Metro', 'AZ', 'Tucson', TRUE),
|
||||
('Tucson Metro', 'AZ', 'Oro Valley', FALSE),
|
||||
('Tucson Metro', 'AZ', 'Marana', FALSE),
|
||||
('Tucson Metro', 'AZ', 'Sahuarita', FALSE),
|
||||
('Tucson Metro', 'AZ', 'South Tucson', FALSE)
|
||||
ON CONFLICT (state_code, city) DO NOTHING;
|
||||
|
||||
-- Flagstaff Area
|
||||
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||
('Flagstaff Area', 'AZ', 'Flagstaff', TRUE),
|
||||
('Flagstaff Area', 'AZ', 'Sedona', FALSE)
|
||||
ON CONFLICT (state_code, city) DO NOTHING;
|
||||
|
||||
-- Prescott Area
|
||||
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
|
||||
('Prescott Area', 'AZ', 'Prescott', TRUE),
|
||||
('Prescott Area', 'AZ', 'Prescott Valley', FALSE)
|
||||
ON CONFLICT (state_code, city) DO NOTHING;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: claim_identity
|
||||
-- Claims an available identity for a worker
|
||||
-- Tries: exact city -> metro area -> any in state -> create new
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION claim_identity(
|
||||
p_worker_id VARCHAR(100),
|
||||
p_state_code VARCHAR(2),
|
||||
p_city VARCHAR(100) DEFAULT NULL
|
||||
) RETURNS worker_identities AS $$
|
||||
DECLARE
|
||||
claimed_identity worker_identities;
|
||||
metro_name_val VARCHAR(100);
|
||||
primary_city VARCHAR(100);
|
||||
BEGIN
|
||||
-- 1. Try exact city match (if city provided)
|
||||
IF p_city IS NOT NULL THEN
|
||||
UPDATE worker_identities
|
||||
SET is_active = TRUE,
|
||||
active_worker_id = p_worker_id,
|
||||
last_used_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT id FROM worker_identities
|
||||
WHERE state_code = p_state_code
|
||||
AND city = p_city
|
||||
AND is_active = FALSE
|
||||
AND is_healthy = TRUE
|
||||
AND (cooldown_until IS NULL OR cooldown_until < NOW())
|
||||
ORDER BY last_used_at ASC NULLS FIRST
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_identity;
|
||||
|
||||
IF claimed_identity.id IS NOT NULL THEN
|
||||
RETURN claimed_identity;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- 2. Try metro area fallback
|
||||
IF p_city IS NOT NULL THEN
|
||||
-- Find the metro area for this city
|
||||
SELECT ma.metro_name INTO metro_name_val
|
||||
FROM metro_areas ma
|
||||
WHERE ma.state_code = p_state_code AND ma.city = p_city;
|
||||
|
||||
IF metro_name_val IS NOT NULL THEN
|
||||
-- Get primary city of metro
|
||||
SELECT ma.city INTO primary_city
|
||||
FROM metro_areas ma
|
||||
WHERE ma.metro_name = metro_name_val AND ma.is_primary = TRUE;
|
||||
|
||||
-- Try any city in same metro
|
||||
UPDATE worker_identities wi
|
||||
SET is_active = TRUE,
|
||||
active_worker_id = p_worker_id,
|
||||
last_used_at = NOW()
|
||||
WHERE wi.id = (
|
||||
SELECT wi2.id FROM worker_identities wi2
|
||||
JOIN metro_areas ma ON wi2.city = ma.city AND wi2.state_code = ma.state_code
|
||||
WHERE ma.metro_name = metro_name_val
|
||||
AND wi2.is_active = FALSE
|
||||
AND wi2.is_healthy = TRUE
|
||||
AND (wi2.cooldown_until IS NULL OR wi2.cooldown_until < NOW())
|
||||
ORDER BY wi2.last_used_at ASC NULLS FIRST
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_identity;
|
||||
|
||||
IF claimed_identity.id IS NOT NULL THEN
|
||||
RETURN claimed_identity;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- 3. Try any identity in state
|
||||
UPDATE worker_identities
|
||||
SET is_active = TRUE,
|
||||
active_worker_id = p_worker_id,
|
||||
last_used_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT id FROM worker_identities
|
||||
WHERE state_code = p_state_code
|
||||
AND is_active = FALSE
|
||||
AND is_healthy = TRUE
|
||||
AND (cooldown_until IS NULL OR cooldown_until < NOW())
|
||||
ORDER BY last_used_at ASC NULLS FIRST
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_identity;
|
||||
|
||||
-- Return whatever we got (NULL if nothing available - caller should create new)
|
||||
RETURN claimed_identity;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: release_identity
|
||||
-- Releases an identity back to pool with cooldown
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION release_identity(
|
||||
p_identity_id INT,
|
||||
p_tasks_completed INT DEFAULT 0,
|
||||
p_failed BOOLEAN DEFAULT FALSE
|
||||
) RETURNS VOID AS $$
|
||||
DECLARE
|
||||
cooldown_hours FLOAT;
|
||||
BEGIN
|
||||
-- Random cooldown between 2-3 hours for diversity
|
||||
cooldown_hours := 2 + random(); -- 2.0 to 3.0 hours
|
||||
|
||||
UPDATE worker_identities
|
||||
SET is_active = FALSE,
|
||||
active_worker_id = NULL,
|
||||
total_tasks_completed = total_tasks_completed + p_tasks_completed,
|
||||
total_sessions = total_sessions + 1,
|
||||
cooldown_until = NOW() + (cooldown_hours || ' hours')::INTERVAL,
|
||||
consecutive_failures = CASE WHEN p_failed THEN consecutive_failures + 1 ELSE 0 END,
|
||||
is_healthy = CASE WHEN consecutive_failures >= 3 THEN FALSE ELSE TRUE END
|
||||
WHERE id = p_identity_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: get_pending_tasks_by_geo
|
||||
-- Gets pending tasks grouped by state/city for identity assignment
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION get_pending_tasks_by_geo(
|
||||
p_limit INT DEFAULT 10
|
||||
) RETURNS TABLE (
|
||||
state_code VARCHAR(2),
|
||||
city VARCHAR(100),
|
||||
pending_count BIGINT,
|
||||
available_identities BIGINT
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
d.state as state_code,
|
||||
d.city,
|
||||
COUNT(t.id) as pending_count,
|
||||
(
|
||||
SELECT COUNT(*) FROM worker_identities wi
|
||||
WHERE wi.state_code = d.state
|
||||
AND (wi.city = d.city OR wi.city IS NULL)
|
||||
AND wi.is_active = FALSE
|
||||
AND wi.is_healthy = TRUE
|
||||
AND (wi.cooldown_until IS NULL OR wi.cooldown_until < NOW())
|
||||
) as available_identities
|
||||
FROM worker_tasks t
|
||||
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||
WHERE t.status = 'pending'
|
||||
AND d.state IS NOT NULL
|
||||
GROUP BY d.state, d.city
|
||||
ORDER BY COUNT(t.id) DESC
|
||||
LIMIT p_limit;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: get_tasks_for_identity
|
||||
-- Gets tasks matching an identity's geo (same city or metro)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION get_tasks_for_identity(
|
||||
p_state_code VARCHAR(2),
|
||||
p_city VARCHAR(100),
|
||||
p_limit INT DEFAULT 5
|
||||
) RETURNS TABLE (
|
||||
task_id INT,
|
||||
dispensary_id INT,
|
||||
dispensary_name VARCHAR(255),
|
||||
dispensary_city VARCHAR(100),
|
||||
role VARCHAR(50)
|
||||
) AS $$
|
||||
DECLARE
|
||||
metro_name_val VARCHAR(100);
|
||||
BEGIN
|
||||
-- Find metro area for this city
|
||||
SELECT ma.metro_name INTO metro_name_val
|
||||
FROM metro_areas ma
|
||||
WHERE ma.state_code = p_state_code AND ma.city = p_city;
|
||||
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
t.id as task_id,
|
||||
d.id as dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
d.city as dispensary_city,
|
||||
t.role
|
||||
FROM worker_tasks t
|
||||
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||
WHERE t.status = 'pending'
|
||||
AND d.state = p_state_code
|
||||
AND (
|
||||
-- Exact city match
|
||||
d.city = p_city
|
||||
-- Or same metro area
|
||||
OR (metro_name_val IS NOT NULL AND d.city IN (
|
||||
SELECT ma.city FROM metro_areas ma WHERE ma.metro_name = metro_name_val
|
||||
))
|
||||
-- Or any in state if no metro
|
||||
OR (metro_name_val IS NULL)
|
||||
)
|
||||
ORDER BY
|
||||
CASE WHEN d.city = p_city THEN 0 ELSE 1 END, -- Prefer exact city
|
||||
t.priority DESC,
|
||||
t.created_at ASC
|
||||
LIMIT p_limit;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: identity_pool_status
|
||||
-- Overview of identity pool health and availability
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW identity_pool_status AS
|
||||
SELECT
|
||||
state_code,
|
||||
city,
|
||||
COUNT(*) as total_identities,
|
||||
COUNT(*) FILTER (WHERE is_active) as active,
|
||||
COUNT(*) FILTER (WHERE NOT is_active AND is_healthy AND (cooldown_until IS NULL OR cooldown_until < NOW())) as available,
|
||||
COUNT(*) FILTER (WHERE NOT is_active AND cooldown_until > NOW()) as cooling_down,
|
||||
COUNT(*) FILTER (WHERE NOT is_healthy) as unhealthy,
|
||||
SUM(total_tasks_completed) as total_tasks,
|
||||
AVG(total_tasks_completed)::INT as avg_tasks_per_identity
|
||||
FROM worker_identities
|
||||
GROUP BY state_code, city
|
||||
ORDER BY state_code, city;
|
||||
|
||||
-- ============================================================
|
||||
-- Comments
|
||||
-- ============================================================
|
||||
COMMENT ON TABLE worker_identities IS 'Pool of IP/fingerprint identities for worker rotation';
|
||||
COMMENT ON TABLE metro_areas IS 'City groupings for geographic fallback matching';
|
||||
COMMENT ON FUNCTION claim_identity IS 'Claim an available identity: exact city -> metro -> state -> NULL (create new)';
|
||||
COMMENT ON FUNCTION release_identity IS 'Release identity with 2-3 hour random cooldown';
|
||||
COMMENT ON FUNCTION get_pending_tasks_by_geo IS 'Get pending task counts by state/city';
|
||||
COMMENT ON FUNCTION get_tasks_for_identity IS 'Get tasks matching identity geo (city or metro area)';
|
||||
@@ -1,92 +0,0 @@
|
||||
-- Migration: 110_trusted_origins.sql
|
||||
-- Description: Trusted origins for API access without token
|
||||
-- Created: 2024-12-14
|
||||
--
|
||||
-- Manages which domains, IPs, and patterns can access the API without a Bearer token.
|
||||
-- Used by auth middleware to grant 'internal' role to trusted requests.
|
||||
|
||||
-- ============================================================
|
||||
-- TRUSTED ORIGINS TABLE
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS trusted_origins (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Origin identification
|
||||
name VARCHAR(100) NOT NULL, -- Friendly name (e.g., "CannaIQ Production")
|
||||
origin_type VARCHAR(20) NOT NULL, -- 'domain', 'ip', or 'pattern'
|
||||
origin_value VARCHAR(255) NOT NULL, -- The actual value to match
|
||||
|
||||
-- Metadata
|
||||
description TEXT, -- Optional notes
|
||||
active BOOLEAN DEFAULT TRUE,
|
||||
|
||||
-- Tracking
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_by INTEGER REFERENCES users(id),
|
||||
|
||||
-- Constraints
|
||||
CONSTRAINT valid_origin_type CHECK (origin_type IN ('domain', 'ip', 'pattern')),
|
||||
UNIQUE(origin_type, origin_value)
|
||||
);
|
||||
|
||||
-- Index for active lookups (used by auth middleware)
|
||||
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active
|
||||
ON trusted_origins(active) WHERE active = TRUE;
|
||||
|
||||
-- Updated at trigger
|
||||
CREATE OR REPLACE FUNCTION update_trusted_origins_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
DROP TRIGGER IF EXISTS trusted_origins_updated_at ON trusted_origins;
|
||||
CREATE TRIGGER trusted_origins_updated_at
|
||||
BEFORE UPDATE ON trusted_origins
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION update_trusted_origins_updated_at();
|
||||
|
||||
-- ============================================================
|
||||
-- SEED DEFAULT TRUSTED ORIGINS
|
||||
-- These match the hardcoded fallbacks in middleware.ts
|
||||
-- ============================================================
|
||||
|
||||
-- Production domains
|
||||
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||
('CannaIQ Production', 'domain', 'https://cannaiq.co', 'Main CannaIQ dashboard'),
|
||||
('CannaIQ Production (www)', 'domain', 'https://www.cannaiq.co', 'Main CannaIQ dashboard with www'),
|
||||
('FindADispo Production', 'domain', 'https://findadispo.com', 'Consumer dispensary finder'),
|
||||
('FindADispo Production (www)', 'domain', 'https://www.findadispo.com', 'Consumer dispensary finder with www'),
|
||||
('Findagram Production', 'domain', 'https://findagram.co', 'Instagram-style cannabis discovery'),
|
||||
('Findagram Production (www)', 'domain', 'https://www.findagram.co', 'Instagram-style cannabis discovery with www')
|
||||
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||
|
||||
-- Wildcard patterns
|
||||
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||
('CannaBrands Subdomains', 'pattern', '^https://.*\\.cannabrands\\.app$', 'All *.cannabrands.app subdomains'),
|
||||
('CannaIQ Subdomains', 'pattern', '^https://.*\\.cannaiq\\.co$', 'All *.cannaiq.co subdomains')
|
||||
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||
|
||||
-- Local development
|
||||
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||
('Local API', 'domain', 'http://localhost:3010', 'Local backend API'),
|
||||
('Local Admin', 'domain', 'http://localhost:8080', 'Local admin dashboard'),
|
||||
('Local Vite Dev', 'domain', 'http://localhost:5173', 'Vite dev server')
|
||||
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||
|
||||
-- Trusted IPs (localhost)
|
||||
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
|
||||
('Localhost IPv4', 'ip', '127.0.0.1', 'Local machine'),
|
||||
('Localhost IPv6', 'ip', '::1', 'Local machine IPv6'),
|
||||
('Localhost IPv6 Mapped', 'ip', '::ffff:127.0.0.1', 'IPv6-mapped IPv4 localhost')
|
||||
ON CONFLICT (origin_type, origin_value) DO NOTHING;
|
||||
|
||||
-- ============================================================
|
||||
-- COMMENTS
|
||||
-- ============================================================
|
||||
COMMENT ON TABLE trusted_origins IS 'Domains, IPs, and patterns that can access API without token';
|
||||
COMMENT ON COLUMN trusted_origins.origin_type IS 'domain = exact URL match, ip = IP address, pattern = regex pattern';
|
||||
COMMENT ON COLUMN trusted_origins.origin_value IS 'For domain: full URL. For ip: IP address. For pattern: regex string';
|
||||
@@ -1,35 +0,0 @@
|
||||
-- Migration: 111_system_settings.sql
|
||||
-- Description: System settings table for runtime configuration
|
||||
-- Created: 2024-12-14
|
||||
|
||||
CREATE TABLE IF NOT EXISTS system_settings (
|
||||
key VARCHAR(100) PRIMARY KEY,
|
||||
value TEXT NOT NULL,
|
||||
description TEXT,
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_by INTEGER REFERENCES users(id)
|
||||
);
|
||||
|
||||
-- Task pool gate - controls whether workers can claim tasks
|
||||
INSERT INTO system_settings (key, value, description) VALUES
|
||||
('task_pool_open', 'true', 'When false, workers cannot claim new tasks from the pool')
|
||||
ON CONFLICT (key) DO NOTHING;
|
||||
|
||||
-- Updated at trigger
|
||||
CREATE OR REPLACE FUNCTION update_system_settings_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = NOW();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
DROP TRIGGER IF EXISTS system_settings_updated_at ON system_settings;
|
||||
CREATE TRIGGER system_settings_updated_at
|
||||
BEFORE UPDATE ON system_settings
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION update_system_settings_updated_at();
|
||||
|
||||
COMMENT ON TABLE system_settings IS 'Runtime configuration settings';
|
||||
COMMENT ON COLUMN system_settings.key IS 'Setting name (e.g., task_pool_open)';
|
||||
COMMENT ON COLUMN system_settings.value IS 'Setting value as string';
|
||||
@@ -1,390 +0,0 @@
|
||||
-- Migration 112: Worker Session Pool
|
||||
-- Tracks IP/fingerprint sessions with exclusive locks and cooldowns
|
||||
-- Each worker claims up to 6 tasks, uses one IP/fingerprint for those tasks,
|
||||
-- then retires the session (8hr cooldown before IP can be reused)
|
||||
|
||||
-- Drop old identity pool tables if they exist (replacing with simpler session model)
|
||||
DROP TABLE IF EXISTS worker_identity_claims CASCADE;
|
||||
DROP TABLE IF EXISTS worker_identities CASCADE;
|
||||
|
||||
-- Worker sessions: tracks active and cooling down IP/fingerprint pairs
|
||||
CREATE TABLE IF NOT EXISTS worker_sessions (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- IP and fingerprint for this session
|
||||
ip_address VARCHAR(45) NOT NULL,
|
||||
fingerprint_hash VARCHAR(64) NOT NULL,
|
||||
fingerprint_data JSONB,
|
||||
|
||||
-- Geo this session is locked to
|
||||
state_code VARCHAR(2) NOT NULL,
|
||||
city VARCHAR(100),
|
||||
|
||||
-- Ownership
|
||||
worker_id VARCHAR(255), -- NULL if in cooldown
|
||||
|
||||
-- Status: 'active' (locked to worker), 'cooldown' (8hr wait), 'available'
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'available',
|
||||
|
||||
-- Task tracking
|
||||
tasks_claimed INTEGER NOT NULL DEFAULT 0,
|
||||
tasks_completed INTEGER NOT NULL DEFAULT 0,
|
||||
tasks_failed INTEGER NOT NULL DEFAULT 0,
|
||||
max_tasks INTEGER NOT NULL DEFAULT 6,
|
||||
|
||||
-- Timestamps
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
locked_at TIMESTAMPTZ, -- When worker locked this session
|
||||
retired_at TIMESTAMPTZ, -- When session was retired (cooldown starts)
|
||||
cooldown_until TIMESTAMPTZ, -- When session becomes available again
|
||||
|
||||
-- Constraints
|
||||
CONSTRAINT valid_status CHECK (status IN ('active', 'cooldown', 'available'))
|
||||
);
|
||||
|
||||
-- Indexes for fast lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_sessions_ip ON worker_sessions(ip_address);
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_sessions_status ON worker_sessions(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_sessions_worker ON worker_sessions(worker_id) WHERE worker_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_sessions_geo ON worker_sessions(state_code, city);
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_sessions_cooldown ON worker_sessions(cooldown_until) WHERE status = 'cooldown';
|
||||
|
||||
-- Unique constraint: only one active session per IP
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_sessions_active_ip
|
||||
ON worker_sessions(ip_address)
|
||||
WHERE status = 'active';
|
||||
|
||||
-- Function: Check if IP is available (not active, not in cooldown)
|
||||
CREATE OR REPLACE FUNCTION is_ip_available(check_ip VARCHAR(45))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
-- Check if any session has this IP and is either active or in cooldown
|
||||
RETURN NOT EXISTS (
|
||||
SELECT 1 FROM worker_sessions
|
||||
WHERE ip_address = check_ip
|
||||
AND (status = 'active' OR (status = 'cooldown' AND cooldown_until > NOW()))
|
||||
);
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Lock a session to a worker
|
||||
-- Returns the session if successful, NULL if IP not available
|
||||
CREATE OR REPLACE FUNCTION lock_worker_session(
|
||||
p_worker_id VARCHAR(255),
|
||||
p_ip_address VARCHAR(45),
|
||||
p_state_code VARCHAR(2),
|
||||
p_city VARCHAR(100) DEFAULT NULL,
|
||||
p_fingerprint_hash VARCHAR(64) DEFAULT NULL,
|
||||
p_fingerprint_data JSONB DEFAULT NULL
|
||||
) RETURNS worker_sessions AS $$
|
||||
DECLARE
|
||||
v_session worker_sessions;
|
||||
BEGIN
|
||||
-- First check if IP is available
|
||||
IF NOT is_ip_available(p_ip_address) THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Try to find an existing available session for this IP
|
||||
SELECT * INTO v_session
|
||||
FROM worker_sessions
|
||||
WHERE ip_address = p_ip_address
|
||||
AND status = 'available'
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT 1;
|
||||
|
||||
IF v_session.id IS NOT NULL THEN
|
||||
-- Reuse existing session
|
||||
UPDATE worker_sessions SET
|
||||
worker_id = p_worker_id,
|
||||
status = 'active',
|
||||
state_code = p_state_code,
|
||||
city = p_city,
|
||||
fingerprint_hash = COALESCE(p_fingerprint_hash, fingerprint_hash),
|
||||
fingerprint_data = COALESCE(p_fingerprint_data, fingerprint_data),
|
||||
tasks_claimed = 0,
|
||||
tasks_completed = 0,
|
||||
tasks_failed = 0,
|
||||
locked_at = NOW(),
|
||||
retired_at = NULL,
|
||||
cooldown_until = NULL
|
||||
WHERE id = v_session.id
|
||||
RETURNING * INTO v_session;
|
||||
ELSE
|
||||
-- Create new session
|
||||
INSERT INTO worker_sessions (
|
||||
ip_address, fingerprint_hash, fingerprint_data,
|
||||
state_code, city, worker_id, status, locked_at
|
||||
) VALUES (
|
||||
p_ip_address, COALESCE(p_fingerprint_hash, md5(random()::text)),
|
||||
p_fingerprint_data, p_state_code, p_city, p_worker_id, 'active', NOW()
|
||||
)
|
||||
RETURNING * INTO v_session;
|
||||
END IF;
|
||||
|
||||
RETURN v_session;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Retire a session (start 8hr cooldown)
|
||||
CREATE OR REPLACE FUNCTION retire_worker_session(p_worker_id VARCHAR(255))
|
||||
RETURNS BOOLEAN AS $$
|
||||
DECLARE
|
||||
v_updated INTEGER;
|
||||
BEGIN
|
||||
UPDATE worker_sessions SET
|
||||
status = 'cooldown',
|
||||
worker_id = NULL,
|
||||
retired_at = NOW(),
|
||||
cooldown_until = NOW() + INTERVAL '8 hours'
|
||||
WHERE worker_id = p_worker_id
|
||||
AND status = 'active';
|
||||
|
||||
GET DIAGNOSTICS v_updated = ROW_COUNT;
|
||||
RETURN v_updated > 0;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Release expired cooldowns
|
||||
CREATE OR REPLACE FUNCTION release_expired_sessions()
|
||||
RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
v_released INTEGER;
|
||||
BEGIN
|
||||
UPDATE worker_sessions SET
|
||||
status = 'available'
|
||||
WHERE status = 'cooldown'
|
||||
AND cooldown_until <= NOW();
|
||||
|
||||
GET DIAGNOSTICS v_released = ROW_COUNT;
|
||||
RETURN v_released;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Get session for worker
|
||||
CREATE OR REPLACE FUNCTION get_worker_session(p_worker_id VARCHAR(255))
|
||||
RETURNS worker_sessions AS $$
|
||||
SELECT * FROM worker_sessions
|
||||
WHERE worker_id = p_worker_id AND status = 'active'
|
||||
LIMIT 1;
|
||||
$$ LANGUAGE sql;
|
||||
|
||||
-- Function: Increment task counters
|
||||
CREATE OR REPLACE FUNCTION session_task_completed(p_worker_id VARCHAR(255))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_sessions SET
|
||||
tasks_completed = tasks_completed + 1
|
||||
WHERE worker_id = p_worker_id AND status = 'active';
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION session_task_failed(p_worker_id VARCHAR(255))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_sessions SET
|
||||
tasks_failed = tasks_failed + 1
|
||||
WHERE worker_id = p_worker_id AND status = 'active';
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION session_task_claimed(p_worker_id VARCHAR(255), p_count INTEGER DEFAULT 1)
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_sessions SET
|
||||
tasks_claimed = tasks_claimed + p_count
|
||||
WHERE worker_id = p_worker_id AND status = 'active';
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Scheduled job hint: Run release_expired_sessions() every 5 minutes
|
||||
COMMENT ON FUNCTION release_expired_sessions() IS
|
||||
'Run periodically to release sessions from cooldown. Suggest: every 5 minutes.';
|
||||
|
||||
-- =============================================================================
|
||||
-- ATOMIC TASK CLAIMING
|
||||
-- Worker claims up to 6 tasks for same geo in one transaction
|
||||
-- =============================================================================
|
||||
|
||||
-- Function: Claim up to N tasks for same geo
|
||||
-- Returns claimed tasks with dispensary geo info
|
||||
CREATE OR REPLACE FUNCTION claim_tasks_batch(
|
||||
p_worker_id VARCHAR(255),
|
||||
p_max_tasks INTEGER DEFAULT 6,
|
||||
p_role VARCHAR(50) DEFAULT NULL -- Optional role filter
|
||||
) RETURNS TABLE (
|
||||
task_id INTEGER,
|
||||
role VARCHAR(50),
|
||||
dispensary_id INTEGER,
|
||||
dispensary_name VARCHAR(255),
|
||||
city VARCHAR(100),
|
||||
state_code VARCHAR(2),
|
||||
platform VARCHAR(50),
|
||||
method VARCHAR(20)
|
||||
) AS $$
|
||||
DECLARE
|
||||
v_target_state VARCHAR(2);
|
||||
v_target_city VARCHAR(100);
|
||||
v_claimed_count INTEGER := 0;
|
||||
BEGIN
|
||||
-- First, find the geo with most pending tasks to target
|
||||
SELECT d.state, d.city INTO v_target_state, v_target_city
|
||||
FROM worker_tasks t
|
||||
JOIN dispensaries d ON t.dispensary_id = d.id
|
||||
WHERE t.status = 'pending'
|
||||
AND (p_role IS NULL OR t.role = p_role)
|
||||
GROUP BY d.state, d.city
|
||||
ORDER BY COUNT(*) DESC
|
||||
LIMIT 1;
|
||||
|
||||
-- No pending tasks
|
||||
IF v_target_state IS NULL THEN
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
-- Claim up to p_max_tasks for this geo
|
||||
RETURN QUERY
|
||||
WITH claimed AS (
|
||||
UPDATE worker_tasks t SET
|
||||
status = 'claimed',
|
||||
worker_id = p_worker_id,
|
||||
claimed_at = NOW()
|
||||
FROM (
|
||||
SELECT t2.id
|
||||
FROM worker_tasks t2
|
||||
JOIN dispensaries d ON t2.dispensary_id = d.id
|
||||
WHERE t2.status = 'pending'
|
||||
AND d.state = v_target_state
|
||||
AND (v_target_city IS NULL OR d.city = v_target_city)
|
||||
AND (p_role IS NULL OR t2.role = p_role)
|
||||
ORDER BY t2.priority DESC, t2.created_at ASC
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT p_max_tasks
|
||||
) sub
|
||||
WHERE t.id = sub.id
|
||||
RETURNING t.id, t.role, t.dispensary_id, t.method
|
||||
)
|
||||
SELECT
|
||||
c.id as task_id,
|
||||
c.role,
|
||||
c.dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
d.city,
|
||||
d.state as state_code,
|
||||
d.platform,
|
||||
c.method
|
||||
FROM claimed c
|
||||
JOIN dispensaries d ON c.dispensary_id = d.id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Release claimed tasks back to pending (for failed worker or cleanup)
|
||||
CREATE OR REPLACE FUNCTION release_claimed_tasks(p_worker_id VARCHAR(255))
|
||||
RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
v_released INTEGER;
|
||||
BEGIN
|
||||
UPDATE worker_tasks SET
|
||||
status = 'pending',
|
||||
worker_id = NULL,
|
||||
claimed_at = NULL
|
||||
WHERE worker_id = p_worker_id
|
||||
AND status IN ('claimed', 'running');
|
||||
|
||||
GET DIAGNOSTICS v_released = ROW_COUNT;
|
||||
RETURN v_released;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Mark task as running
|
||||
CREATE OR REPLACE FUNCTION start_task(p_task_id INTEGER, p_worker_id VARCHAR(255))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_tasks SET
|
||||
status = 'running',
|
||||
started_at = NOW()
|
||||
WHERE id = p_task_id
|
||||
AND worker_id = p_worker_id
|
||||
AND status = 'claimed';
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Mark task as completed (leaves pool)
|
||||
CREATE OR REPLACE FUNCTION complete_task(
|
||||
p_task_id INTEGER,
|
||||
p_worker_id VARCHAR(255),
|
||||
p_result JSONB DEFAULT NULL
|
||||
) RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_tasks SET
|
||||
status = 'completed',
|
||||
completed_at = NOW(),
|
||||
result = p_result
|
||||
WHERE id = p_task_id
|
||||
AND worker_id = p_worker_id
|
||||
AND status = 'running';
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Mark task as failed (returns to pending for retry)
|
||||
CREATE OR REPLACE FUNCTION fail_task(
|
||||
p_task_id INTEGER,
|
||||
p_worker_id VARCHAR(255),
|
||||
p_error TEXT DEFAULT NULL,
|
||||
p_max_retries INTEGER DEFAULT 3
|
||||
) RETURNS BOOLEAN AS $$
|
||||
DECLARE
|
||||
v_retry_count INTEGER;
|
||||
BEGIN
|
||||
-- Get current retry count
|
||||
SELECT COALESCE(retry_count, 0) INTO v_retry_count
|
||||
FROM worker_tasks WHERE id = p_task_id;
|
||||
|
||||
IF v_retry_count >= p_max_retries THEN
|
||||
-- Max retries exceeded - mark as permanently failed
|
||||
UPDATE worker_tasks SET
|
||||
status = 'failed',
|
||||
completed_at = NOW(),
|
||||
error_message = p_error,
|
||||
retry_count = v_retry_count + 1
|
||||
WHERE id = p_task_id
|
||||
AND worker_id = p_worker_id;
|
||||
ELSE
|
||||
-- Return to pending for retry
|
||||
UPDATE worker_tasks SET
|
||||
status = 'pending',
|
||||
worker_id = NULL,
|
||||
claimed_at = NULL,
|
||||
started_at = NULL,
|
||||
error_message = p_error,
|
||||
retry_count = v_retry_count + 1
|
||||
WHERE id = p_task_id
|
||||
AND worker_id = p_worker_id;
|
||||
END IF;
|
||||
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Add retry_count column if not exists
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'worker_tasks' AND column_name = 'retry_count'
|
||||
) THEN
|
||||
ALTER TABLE worker_tasks ADD COLUMN retry_count INTEGER NOT NULL DEFAULT 0;
|
||||
END IF;
|
||||
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'worker_tasks' AND column_name = 'claimed_at'
|
||||
) THEN
|
||||
ALTER TABLE worker_tasks ADD COLUMN claimed_at TIMESTAMPTZ;
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -1,381 +0,0 @@
|
||||
-- Task Pools: Group tasks by geo area for worker assignment
|
||||
-- Workers claim a pool, get proxy for that geo, then pull tasks from pool
|
||||
|
||||
-- ============================================================================
|
||||
-- TASK POOLS TABLE
|
||||
-- ============================================================================
|
||||
-- Each pool represents a metro area (e.g., Phoenix AZ = 100mi radius)
|
||||
-- Dispensaries are assigned to pools based on location
|
||||
-- Workers claim a pool, not individual tasks
|
||||
|
||||
CREATE TABLE IF NOT EXISTS task_pools (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(100) NOT NULL UNIQUE, -- e.g., 'phoenix_az'
|
||||
display_name VARCHAR(100) NOT NULL, -- e.g., 'Phoenix, AZ'
|
||||
state_code VARCHAR(2) NOT NULL, -- e.g., 'AZ'
|
||||
city VARCHAR(100) NOT NULL, -- e.g., 'Phoenix'
|
||||
latitude DECIMAL(10, 6) NOT NULL, -- pool center lat
|
||||
longitude DECIMAL(10, 6) NOT NULL, -- pool center lng
|
||||
radius_miles INTEGER DEFAULT 100, -- pool radius (100mi default)
|
||||
timezone VARCHAR(50) NOT NULL, -- e.g., 'America/Phoenix'
|
||||
is_active BOOLEAN DEFAULT true,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Index for active pools
|
||||
CREATE INDEX IF NOT EXISTS idx_task_pools_active ON task_pools(is_active) WHERE is_active = true;
|
||||
|
||||
-- ============================================================================
|
||||
-- LINK DISPENSARIES TO POOLS
|
||||
-- ============================================================================
|
||||
-- Add pool_id to dispensaries table
|
||||
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
|
||||
|
||||
-- Index for pool membership
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_pool ON dispensaries(pool_id) WHERE pool_id IS NOT NULL;
|
||||
|
||||
-- ============================================================================
|
||||
-- WORKER POOL ASSIGNMENT
|
||||
-- ============================================================================
|
||||
-- Track which pool a worker is currently assigned to
|
||||
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS current_pool_id INTEGER REFERENCES task_pools(id),
|
||||
ADD COLUMN IF NOT EXISTS pool_claimed_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS pool_stores_visited INTEGER DEFAULT 0,
|
||||
ADD COLUMN IF NOT EXISTS pool_max_stores INTEGER DEFAULT 6;
|
||||
|
||||
-- ============================================================================
|
||||
-- SEED INITIAL POOLS
|
||||
-- ============================================================================
|
||||
-- Major cannabis markets with approximate center coordinates
|
||||
|
||||
INSERT INTO task_pools (name, display_name, state_code, city, latitude, longitude, timezone, radius_miles) VALUES
|
||||
-- Arizona
|
||||
('phoenix_az', 'Phoenix, AZ', 'AZ', 'Phoenix', 33.4484, -112.0740, 'America/Phoenix', 100),
|
||||
('tucson_az', 'Tucson, AZ', 'AZ', 'Tucson', 32.2226, -110.9747, 'America/Phoenix', 75),
|
||||
|
||||
-- California
|
||||
('los_angeles_ca', 'Los Angeles, CA', 'CA', 'Los Angeles', 34.0522, -118.2437, 'America/Los_Angeles', 100),
|
||||
('san_francisco_ca', 'San Francisco, CA', 'CA', 'San Francisco', 37.7749, -122.4194, 'America/Los_Angeles', 75),
|
||||
('san_diego_ca', 'San Diego, CA', 'CA', 'San Diego', 32.7157, -117.1611, 'America/Los_Angeles', 75),
|
||||
('sacramento_ca', 'Sacramento, CA', 'CA', 'Sacramento', 38.5816, -121.4944, 'America/Los_Angeles', 75),
|
||||
|
||||
-- Colorado
|
||||
('denver_co', 'Denver, CO', 'CO', 'Denver', 39.7392, -104.9903, 'America/Denver', 100),
|
||||
|
||||
-- Illinois
|
||||
('chicago_il', 'Chicago, IL', 'IL', 'Chicago', 41.8781, -87.6298, 'America/Chicago', 100),
|
||||
|
||||
-- Massachusetts
|
||||
('boston_ma', 'Boston, MA', 'MA', 'Boston', 42.3601, -71.0589, 'America/New_York', 75),
|
||||
|
||||
-- Michigan
|
||||
('detroit_mi', 'Detroit, MI', 'MI', 'Detroit', 42.3314, -83.0458, 'America/Detroit', 100),
|
||||
|
||||
-- Nevada
|
||||
('las_vegas_nv', 'Las Vegas, NV', 'NV', 'Las Vegas', 36.1699, -115.1398, 'America/Los_Angeles', 75),
|
||||
('reno_nv', 'Reno, NV', 'NV', 'Reno', 39.5296, -119.8138, 'America/Los_Angeles', 50),
|
||||
|
||||
-- New Jersey
|
||||
('newark_nj', 'Newark, NJ', 'NJ', 'Newark', 40.7357, -74.1724, 'America/New_York', 75),
|
||||
|
||||
-- New York
|
||||
('new_york_ny', 'New York, NY', 'NY', 'New York', 40.7128, -74.0060, 'America/New_York', 75),
|
||||
|
||||
-- Oklahoma
|
||||
('oklahoma_city_ok', 'Oklahoma City, OK', 'OK', 'Oklahoma City', 35.4676, -97.5164, 'America/Chicago', 100),
|
||||
('tulsa_ok', 'Tulsa, OK', 'OK', 'Tulsa', 36.1540, -95.9928, 'America/Chicago', 75),
|
||||
|
||||
-- Oregon
|
||||
('portland_or', 'Portland, OR', 'OR', 'Portland', 45.5152, -122.6784, 'America/Los_Angeles', 75),
|
||||
|
||||
-- Washington
|
||||
('seattle_wa', 'Seattle, WA', 'WA', 'Seattle', 47.6062, -122.3321, 'America/Los_Angeles', 100)
|
||||
|
||||
ON CONFLICT (name) DO NOTHING;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Assign dispensary to nearest pool
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION assign_dispensary_to_pool(disp_id INTEGER)
|
||||
RETURNS INTEGER AS $$
|
||||
DECLARE
|
||||
disp_lat DECIMAL(10,6);
|
||||
disp_lng DECIMAL(10,6);
|
||||
nearest_pool_id INTEGER;
|
||||
BEGIN
|
||||
-- Get dispensary coordinates
|
||||
SELECT latitude, longitude INTO disp_lat, disp_lng
|
||||
FROM dispensaries WHERE id = disp_id;
|
||||
|
||||
IF disp_lat IS NULL OR disp_lng IS NULL THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Find nearest active pool within radius
|
||||
-- Using Haversine approximation (accurate enough for 100mi)
|
||||
SELECT id INTO nearest_pool_id
|
||||
FROM task_pools
|
||||
WHERE is_active = true
|
||||
AND (
|
||||
3959 * acos(
|
||||
cos(radians(latitude)) * cos(radians(disp_lat)) *
|
||||
cos(radians(disp_lng) - radians(longitude)) +
|
||||
sin(radians(latitude)) * sin(radians(disp_lat))
|
||||
)
|
||||
) <= radius_miles
|
||||
ORDER BY (
|
||||
3959 * acos(
|
||||
cos(radians(latitude)) * cos(radians(disp_lat)) *
|
||||
cos(radians(disp_lng) - radians(longitude)) +
|
||||
sin(radians(latitude)) * sin(radians(disp_lat))
|
||||
)
|
||||
)
|
||||
LIMIT 1;
|
||||
|
||||
-- Update dispensary
|
||||
IF nearest_pool_id IS NOT NULL THEN
|
||||
UPDATE dispensaries SET pool_id = nearest_pool_id WHERE id = disp_id;
|
||||
END IF;
|
||||
|
||||
RETURN nearest_pool_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Assign all dispensaries to pools (batch)
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION assign_all_dispensaries_to_pools()
|
||||
RETURNS TABLE(assigned INTEGER, unassigned INTEGER) AS $$
|
||||
DECLARE
|
||||
assigned_count INTEGER := 0;
|
||||
unassigned_count INTEGER := 0;
|
||||
disp RECORD;
|
||||
pool_id INTEGER;
|
||||
BEGIN
|
||||
FOR disp IN SELECT id FROM dispensaries WHERE pool_id IS NULL AND latitude IS NOT NULL LOOP
|
||||
pool_id := assign_dispensary_to_pool(disp.id);
|
||||
IF pool_id IS NOT NULL THEN
|
||||
assigned_count := assigned_count + 1;
|
||||
ELSE
|
||||
unassigned_count := unassigned_count + 1;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
RETURN QUERY SELECT assigned_count, unassigned_count;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Get pools with pending tasks
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION get_pools_with_pending_tasks()
|
||||
RETURNS TABLE(
|
||||
pool_id INTEGER,
|
||||
pool_name VARCHAR(100),
|
||||
display_name VARCHAR(100),
|
||||
state_code VARCHAR(2),
|
||||
city VARCHAR(100),
|
||||
timezone VARCHAR(50),
|
||||
pending_count BIGINT,
|
||||
store_count BIGINT
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
tp.id as pool_id,
|
||||
tp.name as pool_name,
|
||||
tp.display_name,
|
||||
tp.state_code,
|
||||
tp.city,
|
||||
tp.timezone,
|
||||
COUNT(DISTINCT t.id) as pending_count,
|
||||
COUNT(DISTINCT d.id) as store_count
|
||||
FROM task_pools tp
|
||||
JOIN dispensaries d ON d.pool_id = tp.id
|
||||
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
|
||||
WHERE tp.is_active = true
|
||||
GROUP BY tp.id, tp.name, tp.display_name, tp.state_code, tp.city, tp.timezone
|
||||
HAVING COUNT(DISTINCT t.id) > 0
|
||||
ORDER BY COUNT(DISTINCT t.id) DESC;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Worker claims a pool
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION worker_claim_pool(
|
||||
p_worker_id VARCHAR(100),
|
||||
p_pool_id INTEGER DEFAULT NULL
|
||||
)
|
||||
RETURNS TABLE(
|
||||
pool_id INTEGER,
|
||||
pool_name VARCHAR(100),
|
||||
display_name VARCHAR(100),
|
||||
state_code VARCHAR(2),
|
||||
city VARCHAR(100),
|
||||
latitude DECIMAL(10,6),
|
||||
longitude DECIMAL(10,6),
|
||||
timezone VARCHAR(50)
|
||||
) AS $$
|
||||
DECLARE
|
||||
claimed_pool_id INTEGER;
|
||||
BEGIN
|
||||
-- If no pool specified, pick the one with most pending tasks
|
||||
IF p_pool_id IS NULL THEN
|
||||
SELECT tp.id INTO claimed_pool_id
|
||||
FROM task_pools tp
|
||||
JOIN dispensaries d ON d.pool_id = tp.id
|
||||
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
|
||||
WHERE tp.is_active = true
|
||||
GROUP BY tp.id
|
||||
ORDER BY COUNT(DISTINCT t.id) DESC
|
||||
LIMIT 1;
|
||||
ELSE
|
||||
claimed_pool_id := p_pool_id;
|
||||
END IF;
|
||||
|
||||
IF claimed_pool_id IS NULL THEN
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
-- Update worker registry with pool assignment
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
current_pool_id = claimed_pool_id,
|
||||
pool_claimed_at = NOW(),
|
||||
pool_stores_visited = 0,
|
||||
pool_max_stores = 6,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
-- Return pool info
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
tp.id,
|
||||
tp.name,
|
||||
tp.display_name,
|
||||
tp.state_code,
|
||||
tp.city,
|
||||
tp.latitude,
|
||||
tp.longitude,
|
||||
tp.timezone
|
||||
FROM task_pools tp
|
||||
WHERE tp.id = claimed_pool_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Pull tasks from worker's pool (up to 6 stores)
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION pull_tasks_from_pool(
|
||||
p_worker_id VARCHAR(100),
|
||||
p_max_stores INTEGER DEFAULT 6
|
||||
)
|
||||
RETURNS TABLE(
|
||||
task_id INTEGER,
|
||||
dispensary_id INTEGER,
|
||||
dispensary_name VARCHAR(255),
|
||||
role VARCHAR(50),
|
||||
platform VARCHAR(50),
|
||||
method VARCHAR(20)
|
||||
) AS $$
|
||||
DECLARE
|
||||
worker_pool_id INTEGER;
|
||||
stores_visited INTEGER;
|
||||
max_stores INTEGER;
|
||||
stores_remaining INTEGER;
|
||||
BEGIN
|
||||
-- Get worker's current pool and store count
|
||||
SELECT current_pool_id, pool_stores_visited, pool_max_stores
|
||||
INTO worker_pool_id, stores_visited, max_stores
|
||||
FROM worker_registry
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
IF worker_pool_id IS NULL THEN
|
||||
RAISE EXCEPTION 'Worker % has no pool assigned', p_worker_id;
|
||||
END IF;
|
||||
|
||||
stores_remaining := max_stores - stores_visited;
|
||||
IF stores_remaining <= 0 THEN
|
||||
RETURN; -- Worker exhausted
|
||||
END IF;
|
||||
|
||||
-- Claim tasks from pool (one task per store, up to remaining capacity)
|
||||
RETURN QUERY
|
||||
WITH available_stores AS (
|
||||
SELECT DISTINCT ON (d.id)
|
||||
t.id as task_id,
|
||||
d.id as dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
t.role,
|
||||
t.platform,
|
||||
t.method
|
||||
FROM tasks t
|
||||
JOIN dispensaries d ON d.id = t.dispensary_id
|
||||
WHERE d.pool_id = worker_pool_id
|
||||
AND t.status = 'pending'
|
||||
AND t.scheduled_for <= NOW()
|
||||
ORDER BY d.id, t.priority DESC, t.created_at ASC
|
||||
LIMIT stores_remaining
|
||||
),
|
||||
claimed AS (
|
||||
UPDATE tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
claimed_by = p_worker_id,
|
||||
claimed_at = NOW()
|
||||
WHERE id IN (SELECT task_id FROM available_stores)
|
||||
RETURNING id
|
||||
)
|
||||
SELECT
|
||||
av.task_id,
|
||||
av.dispensary_id,
|
||||
av.dispensary_name,
|
||||
av.role,
|
||||
av.platform,
|
||||
av.method
|
||||
FROM available_stores av
|
||||
WHERE av.task_id IN (SELECT id FROM claimed);
|
||||
|
||||
-- Update worker store count
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
pool_stores_visited = pool_stores_visited + (
|
||||
SELECT COUNT(DISTINCT dispensary_id)
|
||||
FROM tasks
|
||||
WHERE claimed_by = p_worker_id AND status = 'claimed'
|
||||
),
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- FUNCTION: Worker releases pool (exhausted or done)
|
||||
-- ============================================================================
|
||||
CREATE OR REPLACE FUNCTION worker_release_pool(p_worker_id VARCHAR(100))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_registry
|
||||
SET
|
||||
current_pool_id = NULL,
|
||||
pool_claimed_at = NULL,
|
||||
pool_stores_visited = 0,
|
||||
current_state = NULL,
|
||||
current_city = NULL,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
RETURN true;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================================
|
||||
-- RUN: Assign existing dispensaries to pools
|
||||
-- ============================================================================
|
||||
SELECT * FROM assign_all_dispensaries_to_pools();
|
||||
@@ -1,10 +0,0 @@
|
||||
-- Migration 114: Add pool_id to task_schedules
|
||||
-- Allows schedules to target specific geo pools
|
||||
|
||||
ALTER TABLE task_schedules
|
||||
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
|
||||
|
||||
-- Index for pool-based schedule queries
|
||||
CREATE INDEX IF NOT EXISTS idx_task_schedules_pool ON task_schedules(pool_id) WHERE pool_id IS NOT NULL;
|
||||
|
||||
COMMENT ON COLUMN task_schedules.pool_id IS 'Optional geo pool filter. NULL = all pools/dispensaries matching state_code';
|
||||
@@ -1,17 +0,0 @@
|
||||
-- Migration: Add proxy_ip tracking to worker_tasks
|
||||
-- Purpose: Prevent same IP from hitting multiple stores on same platform simultaneously
|
||||
--
|
||||
-- Anti-detection measure: Dutchie/Jane may flag if same IP makes requests
|
||||
-- for multiple different stores. This column lets us track and prevent that.
|
||||
|
||||
-- Add proxy_ip column to track which proxy IP is being used for each task
|
||||
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
|
||||
|
||||
-- Index for quick lookup of active tasks by proxy IP
|
||||
-- Used to check: "Is this IP already hitting another store?"
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip_active
|
||||
ON worker_tasks (proxy_ip, platform)
|
||||
WHERE status IN ('claimed', 'running') AND proxy_ip IS NOT NULL;
|
||||
|
||||
-- Comment
|
||||
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'Proxy IP assigned to this task. Used to prevent same IP hitting multiple stores on same platform.';
|
||||
@@ -1,16 +0,0 @@
|
||||
-- Migration: Add source tracking columns to worker_tasks
|
||||
-- Purpose: Track where tasks originated from (schedule, API, manual)
|
||||
|
||||
-- Add source tracking columns
|
||||
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source VARCHAR(50);
|
||||
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source_schedule_id INTEGER REFERENCES task_schedules(id);
|
||||
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source_metadata JSONB;
|
||||
|
||||
-- Index for tracking tasks by schedule
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_tasks_source_schedule
|
||||
ON worker_tasks (source_schedule_id) WHERE source_schedule_id IS NOT NULL;
|
||||
|
||||
-- Comments
|
||||
COMMENT ON COLUMN worker_tasks.source IS 'Origin of task: schedule, api, manual, chain';
|
||||
COMMENT ON COLUMN worker_tasks.source_schedule_id IS 'ID of schedule that created this task';
|
||||
COMMENT ON COLUMN worker_tasks.source_metadata IS 'Additional metadata about task origin';
|
||||
@@ -1,32 +0,0 @@
|
||||
-- Migration 117: Per-store crawl interval scheduling
|
||||
-- Adds columns for configurable per-store crawl intervals
|
||||
-- Part of Real-Time Inventory Tracking feature
|
||||
|
||||
-- Per-store crawl interval (NULL = use state schedule default 4h)
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_interval_minutes INT DEFAULT NULL;
|
||||
|
||||
-- When this store should next be crawled (used by high-frequency scheduler)
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS next_crawl_at TIMESTAMPTZ DEFAULT NULL;
|
||||
|
||||
-- Track last request time to enforce minimum spacing
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_crawl_started_at TIMESTAMPTZ DEFAULT NULL;
|
||||
|
||||
-- Change tracking for optimization
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_inventory_hash TEXT DEFAULT NULL;
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_price_hash TEXT DEFAULT NULL;
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS inventory_changes_24h INT DEFAULT 0;
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS price_changes_24h INT DEFAULT 0;
|
||||
|
||||
-- Index for scheduler query: find stores due for high-frequency crawl
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_next_crawl
|
||||
ON dispensaries(next_crawl_at)
|
||||
WHERE crawl_interval_minutes IS NOT NULL AND crawl_enabled = TRUE;
|
||||
|
||||
-- Comment for documentation
|
||||
COMMENT ON COLUMN dispensaries.crawl_interval_minutes IS 'Custom crawl interval in minutes. NULL = use state schedule (4h default). Set to 15/30/60 for high-frequency tracking.';
|
||||
COMMENT ON COLUMN dispensaries.next_crawl_at IS 'When this store should next be crawled. Updated after each crawl with interval + jitter.';
|
||||
COMMENT ON COLUMN dispensaries.last_crawl_started_at IS 'When the last crawl task was created. Used to enforce minimum spacing.';
|
||||
COMMENT ON COLUMN dispensaries.last_inventory_hash IS 'Hash of inventory state from last crawl. Used to detect changes and skip unchanged payloads.';
|
||||
COMMENT ON COLUMN dispensaries.last_price_hash IS 'Hash of price state from last crawl. Used to detect price changes.';
|
||||
COMMENT ON COLUMN dispensaries.inventory_changes_24h IS 'Number of inventory changes detected in last 24h. Indicates store volatility.';
|
||||
COMMENT ON COLUMN dispensaries.price_changes_24h IS 'Number of price changes detected in last 24h.';
|
||||
@@ -1,48 +0,0 @@
|
||||
-- Migration 118: Inventory snapshots table
|
||||
-- Lightweight per-product tracking for sales velocity estimation
|
||||
-- Part of Real-Time Inventory Tracking feature
|
||||
|
||||
CREATE TABLE IF NOT EXISTS inventory_snapshots (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
dispensary_id INT NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
product_id TEXT NOT NULL, -- provider_product_id (normalized across platforms)
|
||||
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Platform (for debugging/filtering)
|
||||
platform TEXT NOT NULL, -- 'dutchie' | 'jane' | 'treez'
|
||||
|
||||
-- Inventory fields (normalized from all platforms)
|
||||
quantity_available INT, -- Dutchie: quantityAvailable, Jane: quantity, Treez: quantityAvailable
|
||||
is_below_threshold BOOLEAN, -- Dutchie: isBelowThreshold, Jane: computed, Treez: lowInventory
|
||||
status TEXT, -- Active/Inactive/available
|
||||
|
||||
-- Price fields (normalized)
|
||||
price_rec NUMERIC(10,2), -- recreational price
|
||||
price_med NUMERIC(10,2), -- medical price (if different)
|
||||
|
||||
-- Denormalized for fast queries
|
||||
brand_name TEXT,
|
||||
category TEXT,
|
||||
product_name TEXT
|
||||
);
|
||||
|
||||
-- Primary query: get snapshots for a store over time
|
||||
CREATE INDEX idx_inv_snap_store_time ON inventory_snapshots(dispensary_id, captured_at DESC);
|
||||
|
||||
-- Delta calculation: get consecutive snapshots for a product
|
||||
CREATE INDEX idx_inv_snap_product_time ON inventory_snapshots(dispensary_id, product_id, captured_at DESC);
|
||||
|
||||
-- Brand-level analytics
|
||||
CREATE INDEX idx_inv_snap_brand_time ON inventory_snapshots(brand_name, captured_at DESC) WHERE brand_name IS NOT NULL;
|
||||
|
||||
-- Platform filtering
|
||||
CREATE INDEX idx_inv_snap_platform ON inventory_snapshots(platform, captured_at DESC);
|
||||
|
||||
-- Retention cleanup (30 days) - simple index, cleanup job handles the WHERE
|
||||
CREATE INDEX IF NOT EXISTS idx_inv_snap_cleanup ON inventory_snapshots(captured_at);
|
||||
|
||||
-- Comments
|
||||
COMMENT ON TABLE inventory_snapshots IS 'Lightweight inventory snapshots for sales velocity tracking. Retained 30 days.';
|
||||
COMMENT ON COLUMN inventory_snapshots.product_id IS 'Provider product ID, normalized across platforms';
|
||||
COMMENT ON COLUMN inventory_snapshots.platform IS 'Menu platform: dutchie, jane, or treez';
|
||||
COMMENT ON COLUMN inventory_snapshots.quantity_available IS 'Current quantity in stock (Dutchie: quantityAvailable, Jane: quantity)';
|
||||
@@ -1,53 +0,0 @@
|
||||
-- Migration 119: Product visibility events table
|
||||
-- Tracks OOS, brand drops, and other notable events for alerts
|
||||
-- Part of Real-Time Inventory Tracking feature
|
||||
|
||||
CREATE TABLE IF NOT EXISTS product_visibility_events (
|
||||
id SERIAL PRIMARY KEY,
|
||||
dispensary_id INT NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
|
||||
-- Product identification (null for brand-level events)
|
||||
product_id TEXT, -- provider_product_id
|
||||
product_name TEXT, -- For display in alerts
|
||||
|
||||
-- Brand (always populated)
|
||||
brand_name TEXT,
|
||||
|
||||
-- Event details
|
||||
event_type TEXT NOT NULL, -- 'oos', 'back_in_stock', 'brand_dropped', 'brand_added', 'price_change'
|
||||
detected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Context
|
||||
previous_quantity INT, -- For OOS events: what quantity was before
|
||||
previous_price NUMERIC(10,2), -- For price change events
|
||||
new_price NUMERIC(10,2), -- For price change events
|
||||
price_change_pct NUMERIC(5,2), -- Percentage change (e.g., -15.5 for 15.5% decrease)
|
||||
|
||||
-- Platform
|
||||
platform TEXT, -- 'dutchie' | 'jane' | 'treez'
|
||||
|
||||
-- Alert status
|
||||
notified BOOLEAN DEFAULT FALSE, -- Has external system been notified?
|
||||
acknowledged_at TIMESTAMPTZ, -- When user acknowledged the alert
|
||||
acknowledged_by TEXT -- User who acknowledged
|
||||
);
|
||||
|
||||
-- Primary query: recent events by store
|
||||
CREATE INDEX idx_vis_events_store_time ON product_visibility_events(dispensary_id, detected_at DESC);
|
||||
|
||||
-- Alert queries: unnotified events
|
||||
CREATE INDEX idx_vis_events_unnotified ON product_visibility_events(notified, detected_at DESC) WHERE notified = FALSE;
|
||||
|
||||
-- Event type filtering
|
||||
CREATE INDEX idx_vis_events_type ON product_visibility_events(event_type, detected_at DESC);
|
||||
|
||||
-- Brand-level queries
|
||||
CREATE INDEX idx_vis_events_brand ON product_visibility_events(brand_name, event_type, detected_at DESC) WHERE brand_name IS NOT NULL;
|
||||
|
||||
-- Cleanup (90 days retention) - simple index, cleanup job handles the WHERE
|
||||
CREATE INDEX IF NOT EXISTS idx_vis_events_cleanup ON product_visibility_events(detected_at);
|
||||
|
||||
-- Comments
|
||||
COMMENT ON TABLE product_visibility_events IS 'Notable inventory events for alerting. OOS, brand drops, significant price changes. Retained 90 days.';
|
||||
COMMENT ON COLUMN product_visibility_events.event_type IS 'Event type: oos (out of stock), back_in_stock, brand_dropped, brand_added, price_change';
|
||||
COMMENT ON COLUMN product_visibility_events.notified IS 'Whether external systems (other apps) have been notified of this event';
|
||||
@@ -1,13 +0,0 @@
|
||||
-- Migration 120: Daily baseline tracking
|
||||
-- Track when each store's daily baseline payload was last saved
|
||||
-- Part of Real-Time Inventory Tracking feature
|
||||
|
||||
-- Add column to track last baseline save time
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_baseline_at TIMESTAMPTZ DEFAULT NULL;
|
||||
|
||||
-- Index for finding stores that need baselines
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_baseline ON dispensaries(last_baseline_at)
|
||||
WHERE crawl_enabled = TRUE;
|
||||
|
||||
-- Comment
|
||||
COMMENT ON COLUMN dispensaries.last_baseline_at IS 'Timestamp of last daily baseline payload save. Baselines saved once per day between 12:01 AM - 3:00 AM.';
|
||||
@@ -1,383 +0,0 @@
|
||||
-- Migration 121: Sales Analytics Materialized Views
|
||||
-- Pre-computed views for sales velocity, brand market share, and store performance
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW 1: Daily Sales Estimates (per product/store)
|
||||
-- Calculates delta between consecutive snapshots
|
||||
-- ============================================================
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_daily_sales_estimates AS
|
||||
WITH qty_deltas AS (
|
||||
SELECT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
brand_name,
|
||||
category,
|
||||
DATE(captured_at) AS sale_date,
|
||||
price_rec,
|
||||
quantity_available,
|
||||
LAG(quantity_available) OVER (
|
||||
PARTITION BY dispensary_id, product_id
|
||||
ORDER BY captured_at
|
||||
) AS prev_quantity
|
||||
FROM inventory_snapshots
|
||||
WHERE quantity_available IS NOT NULL
|
||||
AND captured_at >= NOW() - INTERVAL '30 days'
|
||||
)
|
||||
SELECT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
brand_name,
|
||||
category,
|
||||
sale_date,
|
||||
AVG(price_rec) AS avg_price,
|
||||
SUM(GREATEST(0, COALESCE(prev_quantity, 0) - quantity_available)) AS units_sold,
|
||||
SUM(GREATEST(0, quantity_available - COALESCE(prev_quantity, 0))) AS units_restocked,
|
||||
SUM(GREATEST(0, COALESCE(prev_quantity, 0) - quantity_available) * COALESCE(price_rec, 0)) AS revenue_estimate,
|
||||
COUNT(*) AS snapshot_count
|
||||
FROM qty_deltas
|
||||
WHERE prev_quantity IS NOT NULL
|
||||
GROUP BY dispensary_id, product_id, brand_name, category, sale_date;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_daily_sales_pk
|
||||
ON mv_daily_sales_estimates(dispensary_id, product_id, sale_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_brand
|
||||
ON mv_daily_sales_estimates(brand_name, sale_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_category
|
||||
ON mv_daily_sales_estimates(category, sale_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_daily_sales_date
|
||||
ON mv_daily_sales_estimates(sale_date DESC);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW 2: Brand Market Share by State
|
||||
-- Weighted distribution across stores
|
||||
-- ============================================================
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_brand_market_share AS
|
||||
WITH brand_presence AS (
|
||||
SELECT
|
||||
sp.brand AS brand_name,
|
||||
d.state AS state_code,
|
||||
COUNT(DISTINCT sp.dispensary_id) AS stores_carrying,
|
||||
COUNT(*) AS sku_count,
|
||||
SUM(CASE WHEN sp.is_in_stock THEN 1 ELSE 0 END) AS in_stock_skus,
|
||||
AVG(sp.price_rec) AS avg_price
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
WHERE sp.brand IS NOT NULL
|
||||
AND d.state IS NOT NULL
|
||||
GROUP BY sp.brand, d.state
|
||||
),
|
||||
state_totals AS (
|
||||
SELECT
|
||||
d.state AS state_code,
|
||||
COUNT(DISTINCT d.id) FILTER (WHERE d.crawl_enabled) AS total_stores
|
||||
FROM dispensaries d
|
||||
WHERE d.state IS NOT NULL
|
||||
GROUP BY d.state
|
||||
)
|
||||
SELECT
|
||||
bp.brand_name,
|
||||
bp.state_code,
|
||||
bp.stores_carrying,
|
||||
st.total_stores,
|
||||
ROUND(bp.stores_carrying::NUMERIC * 100 / NULLIF(st.total_stores, 0), 2) AS penetration_pct,
|
||||
bp.sku_count,
|
||||
bp.in_stock_skus,
|
||||
bp.avg_price,
|
||||
NOW() AS calculated_at
|
||||
FROM brand_presence bp
|
||||
JOIN state_totals st ON st.state_code = bp.state_code;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_brand_market_pk
|
||||
ON mv_brand_market_share(brand_name, state_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_brand_market_state
|
||||
ON mv_brand_market_share(state_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_brand_market_penetration
|
||||
ON mv_brand_market_share(penetration_pct DESC);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW 3: SKU Velocity (30-day rolling)
|
||||
-- Average daily units sold per SKU
|
||||
-- ============================================================
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_sku_velocity AS
|
||||
SELECT
|
||||
dse.product_id,
|
||||
dse.brand_name,
|
||||
dse.category,
|
||||
dse.dispensary_id,
|
||||
d.name AS dispensary_name,
|
||||
d.state AS state_code,
|
||||
SUM(dse.units_sold) AS total_units_30d,
|
||||
SUM(dse.revenue_estimate) AS total_revenue_30d,
|
||||
COUNT(DISTINCT dse.sale_date) AS days_with_sales,
|
||||
ROUND(SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0), 2) AS avg_daily_units,
|
||||
AVG(dse.avg_price) AS avg_price,
|
||||
CASE
|
||||
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 5 THEN 'hot'
|
||||
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 1 THEN 'steady'
|
||||
WHEN SUM(dse.units_sold)::NUMERIC / NULLIF(COUNT(DISTINCT dse.sale_date), 0) >= 0.1 THEN 'slow'
|
||||
ELSE 'stale'
|
||||
END AS velocity_tier,
|
||||
NOW() AS calculated_at
|
||||
FROM mv_daily_sales_estimates dse
|
||||
JOIN dispensaries d ON d.id = dse.dispensary_id
|
||||
WHERE dse.sale_date >= CURRENT_DATE - INTERVAL '30 days'
|
||||
GROUP BY dse.product_id, dse.brand_name, dse.category, dse.dispensary_id, d.name, d.state;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_sku_velocity_pk
|
||||
ON mv_sku_velocity(dispensary_id, product_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_brand
|
||||
ON mv_sku_velocity(brand_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_tier
|
||||
ON mv_sku_velocity(velocity_tier);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_state
|
||||
ON mv_sku_velocity(state_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_sku_velocity_units
|
||||
ON mv_sku_velocity(total_units_30d DESC);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW 4: Store Performance Rankings
|
||||
-- Revenue estimates and brand diversity per store
|
||||
-- ============================================================
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_store_performance AS
|
||||
SELECT
|
||||
d.id AS dispensary_id,
|
||||
d.name AS dispensary_name,
|
||||
d.city,
|
||||
d.state AS state_code,
|
||||
-- Revenue metrics from sales estimates
|
||||
COALESCE(sales.total_revenue_30d, 0) AS total_revenue_30d,
|
||||
COALESCE(sales.total_units_30d, 0) AS total_units_30d,
|
||||
-- Inventory metrics
|
||||
COUNT(DISTINCT sp.id) AS total_skus,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock) AS in_stock_skus,
|
||||
-- Brand diversity
|
||||
COUNT(DISTINCT sp.brand) AS unique_brands,
|
||||
COUNT(DISTINCT sp.category) AS unique_categories,
|
||||
-- Pricing
|
||||
AVG(sp.price_rec) AS avg_price,
|
||||
-- Activity
|
||||
MAX(sp.updated_at) AS last_updated,
|
||||
NOW() AS calculated_at
|
||||
FROM dispensaries d
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||
LEFT JOIN (
|
||||
SELECT
|
||||
dispensary_id,
|
||||
SUM(revenue_estimate) AS total_revenue_30d,
|
||||
SUM(units_sold) AS total_units_30d
|
||||
FROM mv_daily_sales_estimates
|
||||
WHERE sale_date >= CURRENT_DATE - INTERVAL '30 days'
|
||||
GROUP BY dispensary_id
|
||||
) sales ON sales.dispensary_id = d.id
|
||||
WHERE d.crawl_enabled = TRUE
|
||||
GROUP BY d.id, d.name, d.city, d.state, sales.total_revenue_30d, sales.total_units_30d;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_store_perf_pk
|
||||
ON mv_store_performance(dispensary_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_store_perf_state
|
||||
ON mv_store_performance(state_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_store_perf_revenue
|
||||
ON mv_store_performance(total_revenue_30d DESC);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW 5: Weekly Category Trends
|
||||
-- Category performance over time
|
||||
-- ============================================================
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_category_weekly_trends AS
|
||||
SELECT
|
||||
dse.category,
|
||||
d.state AS state_code,
|
||||
DATE_TRUNC('week', dse.sale_date)::DATE AS week_start,
|
||||
COUNT(DISTINCT dse.product_id) AS sku_count,
|
||||
COUNT(DISTINCT dse.dispensary_id) AS store_count,
|
||||
SUM(dse.units_sold) AS total_units,
|
||||
SUM(dse.revenue_estimate) AS total_revenue,
|
||||
AVG(dse.avg_price) AS avg_price,
|
||||
NOW() AS calculated_at
|
||||
FROM mv_daily_sales_estimates dse
|
||||
JOIN dispensaries d ON d.id = dse.dispensary_id
|
||||
WHERE dse.category IS NOT NULL
|
||||
AND dse.sale_date >= CURRENT_DATE - INTERVAL '90 days'
|
||||
GROUP BY dse.category, d.state, DATE_TRUNC('week', dse.sale_date);
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_cat_weekly_pk
|
||||
ON mv_category_weekly_trends(category, state_code, week_start);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_cat_weekly_state
|
||||
ON mv_category_weekly_trends(state_code, week_start);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_cat_weekly_date
|
||||
ON mv_category_weekly_trends(week_start DESC);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW 6: Product Intelligence (Hoodie-style per-product metrics)
|
||||
-- Includes stock diff, days since OOS, days until stockout
|
||||
-- ============================================================
|
||||
CREATE MATERIALIZED VIEW IF NOT EXISTS mv_product_intelligence AS
|
||||
WITH
|
||||
-- Calculate stock diff over 120 days
|
||||
stock_diff AS (
|
||||
SELECT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
-- Get oldest and newest quantity in last 120 days
|
||||
FIRST_VALUE(quantity_available) OVER (
|
||||
PARTITION BY dispensary_id, product_id
|
||||
ORDER BY captured_at ASC
|
||||
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
||||
) AS qty_120d_ago,
|
||||
LAST_VALUE(quantity_available) OVER (
|
||||
PARTITION BY dispensary_id, product_id
|
||||
ORDER BY captured_at ASC
|
||||
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
||||
) AS qty_current
|
||||
FROM inventory_snapshots
|
||||
WHERE captured_at >= NOW() - INTERVAL '120 days'
|
||||
),
|
||||
stock_diff_calc AS (
|
||||
SELECT DISTINCT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
qty_current - COALESCE(qty_120d_ago, qty_current) AS stock_diff_120
|
||||
FROM stock_diff
|
||||
),
|
||||
-- Get days since last OOS event
|
||||
last_oos AS (
|
||||
SELECT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
MAX(detected_at) AS last_oos_date
|
||||
FROM product_visibility_events
|
||||
WHERE event_type = 'oos'
|
||||
GROUP BY dispensary_id, product_id
|
||||
),
|
||||
-- Calculate avg daily units sold (from velocity view)
|
||||
velocity AS (
|
||||
SELECT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
avg_daily_units
|
||||
FROM mv_sku_velocity
|
||||
)
|
||||
SELECT
|
||||
sp.dispensary_id,
|
||||
d.name AS dispensary_name,
|
||||
d.state AS state_code,
|
||||
d.city,
|
||||
sp.provider_product_id AS sku,
|
||||
sp.name_raw AS product_name,
|
||||
sp.brand_name_raw AS brand,
|
||||
sp.category_raw AS category,
|
||||
sp.is_in_stock,
|
||||
sp.stock_status,
|
||||
sp.stock_quantity,
|
||||
sp.price_rec AS price,
|
||||
sp.first_seen_at AS first_seen,
|
||||
sp.last_seen_at AS last_seen,
|
||||
-- Calculated fields
|
||||
COALESCE(sd.stock_diff_120, 0) AS stock_diff_120,
|
||||
CASE
|
||||
WHEN lo.last_oos_date IS NOT NULL
|
||||
THEN EXTRACT(DAY FROM NOW() - lo.last_oos_date)::INT
|
||||
ELSE NULL
|
||||
END AS days_since_oos,
|
||||
-- Days until stockout = current stock / daily burn rate
|
||||
CASE
|
||||
WHEN v.avg_daily_units > 0 AND sp.stock_quantity > 0
|
||||
THEN ROUND(sp.stock_quantity::NUMERIC / v.avg_daily_units)::INT
|
||||
ELSE NULL
|
||||
END AS days_until_stock_out,
|
||||
v.avg_daily_units,
|
||||
NOW() AS calculated_at
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
LEFT JOIN stock_diff_calc sd ON sd.dispensary_id = sp.dispensary_id
|
||||
AND sd.product_id = sp.provider_product_id
|
||||
LEFT JOIN last_oos lo ON lo.dispensary_id = sp.dispensary_id
|
||||
AND lo.product_id = sp.provider_product_id
|
||||
LEFT JOIN velocity v ON v.dispensary_id = sp.dispensary_id
|
||||
AND v.product_id = sp.provider_product_id
|
||||
WHERE d.crawl_enabled = TRUE;
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_mv_prod_intel_pk
|
||||
ON mv_product_intelligence(dispensary_id, sku);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_brand
|
||||
ON mv_product_intelligence(brand);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_state
|
||||
ON mv_product_intelligence(state_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_stock_out
|
||||
ON mv_product_intelligence(days_until_stock_out ASC NULLS LAST);
|
||||
CREATE INDEX IF NOT EXISTS idx_mv_prod_intel_oos
|
||||
ON mv_product_intelligence(days_since_oos DESC NULLS LAST);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- REFRESH FUNCTION
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION refresh_sales_analytics_views()
|
||||
RETURNS TABLE(view_name TEXT, rows_affected BIGINT) AS $$
|
||||
DECLARE
|
||||
row_count BIGINT;
|
||||
BEGIN
|
||||
-- Must refresh in dependency order:
|
||||
-- 1. daily_sales (base view)
|
||||
-- 2. sku_velocity (depends on daily_sales)
|
||||
-- 3. product_intelligence (depends on sku_velocity)
|
||||
-- 4. others (independent)
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_daily_sales_estimates;
|
||||
SELECT COUNT(*) INTO row_count FROM mv_daily_sales_estimates;
|
||||
view_name := 'mv_daily_sales_estimates';
|
||||
rows_affected := row_count;
|
||||
RETURN NEXT;
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_brand_market_share;
|
||||
SELECT COUNT(*) INTO row_count FROM mv_brand_market_share;
|
||||
view_name := 'mv_brand_market_share';
|
||||
rows_affected := row_count;
|
||||
RETURN NEXT;
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_sku_velocity;
|
||||
SELECT COUNT(*) INTO row_count FROM mv_sku_velocity;
|
||||
view_name := 'mv_sku_velocity';
|
||||
rows_affected := row_count;
|
||||
RETURN NEXT;
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_store_performance;
|
||||
SELECT COUNT(*) INTO row_count FROM mv_store_performance;
|
||||
view_name := 'mv_store_performance';
|
||||
rows_affected := row_count;
|
||||
RETURN NEXT;
|
||||
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_category_weekly_trends;
|
||||
SELECT COUNT(*) INTO row_count FROM mv_category_weekly_trends;
|
||||
view_name := 'mv_category_weekly_trends';
|
||||
rows_affected := row_count;
|
||||
RETURN NEXT;
|
||||
|
||||
-- Product intelligence depends on sku_velocity, so refresh last
|
||||
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_product_intelligence;
|
||||
SELECT COUNT(*) INTO row_count FROM mv_product_intelligence;
|
||||
view_name := 'mv_product_intelligence';
|
||||
rows_affected := row_count;
|
||||
RETURN NEXT;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
COMMENT ON FUNCTION refresh_sales_analytics_views IS
|
||||
'Refresh all sales analytics materialized views. Call hourly via scheduler.';
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- INITIAL REFRESH (populate views)
|
||||
-- ============================================================
|
||||
-- Note: Initial refresh must be non-concurrent (no unique index yet populated)
|
||||
-- Run these manually after migration:
|
||||
-- REFRESH MATERIALIZED VIEW mv_daily_sales_estimates;
|
||||
-- REFRESH MATERIALIZED VIEW mv_brand_market_share;
|
||||
-- REFRESH MATERIALIZED VIEW mv_sku_velocity;
|
||||
-- REFRESH MATERIALIZED VIEW mv_store_performance;
|
||||
-- REFRESH MATERIALIZED VIEW mv_category_weekly_trends;
|
||||
@@ -1,359 +0,0 @@
|
||||
-- Migration 122: Market Intelligence Schema
|
||||
-- Separate schema for external market data ingestion
|
||||
-- Supports product, brand, and dispensary data from third-party sources
|
||||
|
||||
-- Create dedicated schema
|
||||
CREATE SCHEMA IF NOT EXISTS market_intel;
|
||||
|
||||
-- ============================================================
|
||||
-- BRANDS: Brand/Company Intelligence
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS market_intel.brands (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Identity
|
||||
brand_name VARCHAR(255) NOT NULL,
|
||||
parent_brand VARCHAR(255),
|
||||
parent_company VARCHAR(255),
|
||||
slug VARCHAR(255),
|
||||
external_id VARCHAR(255) UNIQUE, -- objectID from source
|
||||
|
||||
-- Details
|
||||
brand_description TEXT,
|
||||
brand_logo_url TEXT,
|
||||
brand_url TEXT,
|
||||
linkedin_url TEXT,
|
||||
|
||||
-- Presence
|
||||
states JSONB DEFAULT '[]', -- Array of state names
|
||||
active_variants INTEGER DEFAULT 0,
|
||||
all_variants INTEGER DEFAULT 0,
|
||||
|
||||
-- Metadata
|
||||
source VARCHAR(50) DEFAULT 'external',
|
||||
fetched_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_name ON market_intel.brands(brand_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_parent ON market_intel.brands(parent_brand);
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_external ON market_intel.brands(external_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_brands_states ON market_intel.brands USING GIN(states);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- DISPENSARIES: Dispensary/Store Intelligence
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS market_intel.dispensaries (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Identity
|
||||
dispensary_name VARCHAR(255) NOT NULL,
|
||||
dispensary_company_name VARCHAR(255),
|
||||
dispensary_company_id VARCHAR(255),
|
||||
slug VARCHAR(255),
|
||||
external_id VARCHAR(255) UNIQUE, -- objectID from source
|
||||
|
||||
-- Location
|
||||
street_address VARCHAR(255),
|
||||
city VARCHAR(100),
|
||||
state VARCHAR(100),
|
||||
postal_code VARCHAR(20),
|
||||
county_name VARCHAR(100),
|
||||
country_code VARCHAR(10) DEFAULT 'USA',
|
||||
full_address TEXT,
|
||||
latitude DECIMAL(10, 7),
|
||||
longitude DECIMAL(10, 7),
|
||||
timezone VARCHAR(50),
|
||||
urbanicity VARCHAR(50), -- Urban, Suburban, Rural
|
||||
|
||||
-- Contact
|
||||
phone VARCHAR(50),
|
||||
email VARCHAR(255),
|
||||
website TEXT,
|
||||
linkedin_url TEXT,
|
||||
|
||||
-- License
|
||||
license_number VARCHAR(100),
|
||||
license_type VARCHAR(100),
|
||||
|
||||
-- Store Type
|
||||
is_medical BOOLEAN DEFAULT FALSE,
|
||||
is_recreational BOOLEAN DEFAULT FALSE,
|
||||
delivery_enabled BOOLEAN DEFAULT FALSE,
|
||||
curbside_pickup BOOLEAN DEFAULT FALSE,
|
||||
instore_pickup BOOLEAN DEFAULT FALSE,
|
||||
location_type VARCHAR(50), -- RETAIL, DELIVERY, etc.
|
||||
|
||||
-- Sales Estimates
|
||||
estimated_daily_sales DECIMAL(12, 2),
|
||||
estimated_sales DECIMAL(12, 2),
|
||||
avg_daily_sales DECIMAL(12, 2),
|
||||
state_sales_bucket INTEGER,
|
||||
|
||||
-- Customer Demographics
|
||||
affluency JSONB DEFAULT '[]', -- Array of affluency segments
|
||||
age_skew JSONB DEFAULT '[]', -- Array of age brackets
|
||||
customer_segments JSONB DEFAULT '[]', -- Array of segment names
|
||||
|
||||
-- Inventory Stats
|
||||
menus_count INTEGER DEFAULT 0,
|
||||
menus_count_med INTEGER DEFAULT 0,
|
||||
menus_count_rec INTEGER DEFAULT 0,
|
||||
parent_brands JSONB DEFAULT '[]',
|
||||
brand_company_names JSONB DEFAULT '[]',
|
||||
|
||||
-- Business Info
|
||||
banner VARCHAR(255), -- Chain/banner name
|
||||
business_type VARCHAR(50), -- MSO, Independent, etc.
|
||||
pos_system VARCHAR(100),
|
||||
atm_presence BOOLEAN DEFAULT FALSE,
|
||||
tax_included BOOLEAN DEFAULT FALSE,
|
||||
|
||||
-- Ratings
|
||||
rating DECIMAL(3, 2),
|
||||
reviews_count INTEGER DEFAULT 0,
|
||||
|
||||
-- Status
|
||||
is_closed BOOLEAN DEFAULT FALSE,
|
||||
open_date TIMESTAMPTZ,
|
||||
last_updated_at TIMESTAMPTZ,
|
||||
|
||||
-- Media
|
||||
logo_url TEXT,
|
||||
cover_url TEXT,
|
||||
|
||||
-- Metadata
|
||||
source VARCHAR(50) DEFAULT 'external',
|
||||
fetched_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_name ON market_intel.dispensaries(dispensary_name);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_state ON market_intel.dispensaries(state);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_city ON market_intel.dispensaries(city);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_external ON market_intel.dispensaries(external_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_banner ON market_intel.dispensaries(banner);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_business_type ON market_intel.dispensaries(business_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_geo ON market_intel.dispensaries(latitude, longitude);
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_segments ON market_intel.dispensaries USING GIN(customer_segments);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- PRODUCTS: Product/SKU Intelligence
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS market_intel.products (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Identity
|
||||
name VARCHAR(500) NOT NULL,
|
||||
brand VARCHAR(255),
|
||||
brand_id VARCHAR(255),
|
||||
brand_company_name VARCHAR(255),
|
||||
parent_brand VARCHAR(255),
|
||||
external_id VARCHAR(255) UNIQUE, -- objectID from source
|
||||
cm_id VARCHAR(100), -- Canonical menu ID
|
||||
|
||||
-- Category Hierarchy
|
||||
category_0 VARCHAR(100), -- Top level: Flower, Edibles, Vapes
|
||||
category_1 VARCHAR(255), -- Mid level: Flower > Pre-Rolls
|
||||
category_2 VARCHAR(500), -- Detailed: Flower > Pre-Rolls > Singles
|
||||
|
||||
-- Cannabis Classification
|
||||
cannabis_type VARCHAR(50), -- SATIVA, INDICA, HYBRID
|
||||
strain VARCHAR(255),
|
||||
flavor VARCHAR(255),
|
||||
pack_size VARCHAR(100),
|
||||
description TEXT,
|
||||
|
||||
-- Cannabinoids
|
||||
thc_mg DECIMAL(10, 2),
|
||||
cbd_mg DECIMAL(10, 2),
|
||||
percent_thc DECIMAL(5, 2),
|
||||
percent_cbd DECIMAL(5, 2),
|
||||
|
||||
-- Dispensary Context (denormalized for query performance)
|
||||
master_dispensary_name VARCHAR(255),
|
||||
master_dispensary_id VARCHAR(255),
|
||||
dispensary_count INTEGER DEFAULT 0, -- How many stores carry this
|
||||
d_state VARCHAR(100),
|
||||
d_city VARCHAR(100),
|
||||
d_banner VARCHAR(255),
|
||||
d_business_type VARCHAR(50),
|
||||
d_medical BOOLEAN,
|
||||
d_recreational BOOLEAN,
|
||||
|
||||
-- Customer Demographics (from dispensary)
|
||||
d_customer_segments JSONB DEFAULT '[]',
|
||||
d_age_skew JSONB DEFAULT '[]',
|
||||
d_affluency JSONB DEFAULT '[]',
|
||||
d_urbanicity VARCHAR(50),
|
||||
|
||||
-- Stock Status
|
||||
in_stock BOOLEAN DEFAULT TRUE,
|
||||
last_seen_at DATE,
|
||||
last_seen_at_ts BIGINT,
|
||||
|
||||
-- Media
|
||||
img_url TEXT,
|
||||
product_url TEXT,
|
||||
menu_slug VARCHAR(500),
|
||||
|
||||
-- Geo
|
||||
latitude DECIMAL(10, 7),
|
||||
longitude DECIMAL(10, 7),
|
||||
|
||||
-- Metadata
|
||||
source VARCHAR(50) DEFAULT 'external',
|
||||
fetched_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_products_name ON market_intel.products(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_brand ON market_intel.products(brand);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_external ON market_intel.products(external_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_category ON market_intel.products(category_0, category_1);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_cannabis_type ON market_intel.products(cannabis_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_strain ON market_intel.products(strain);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_state ON market_intel.products(d_state);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_in_stock ON market_intel.products(in_stock);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_dispensary_count ON market_intel.products(dispensary_count DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_segments ON market_intel.products USING GIN(d_customer_segments);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- PRODUCT_VARIANTS: Variant-Level Data (Pricing, Stock)
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS market_intel.product_variants (
|
||||
id SERIAL PRIMARY KEY,
|
||||
product_id INTEGER REFERENCES market_intel.products(id) ON DELETE CASCADE,
|
||||
|
||||
-- Identity
|
||||
variant_id VARCHAR(255) NOT NULL,
|
||||
pos_sku VARCHAR(255),
|
||||
pos_product_id VARCHAR(255),
|
||||
pos_system VARCHAR(100),
|
||||
|
||||
-- Pricing
|
||||
actual_price DECIMAL(10, 2),
|
||||
original_price DECIMAL(10, 2),
|
||||
discounted_price DECIMAL(10, 2),
|
||||
|
||||
-- Presentation
|
||||
product_presentation VARCHAR(255), -- "100.00 mg", "3.5g", etc.
|
||||
quantity DECIMAL(10, 2),
|
||||
unit VARCHAR(50), -- mg, g, oz, each
|
||||
|
||||
-- Availability
|
||||
is_medical BOOLEAN DEFAULT FALSE,
|
||||
is_recreational BOOLEAN DEFAULT FALSE,
|
||||
is_active BOOLEAN DEFAULT TRUE,
|
||||
|
||||
-- Stock Intelligence
|
||||
stock_status VARCHAR(50), -- In Stock, Low Stock, Out of Stock
|
||||
stock_diff_120 DECIMAL(10, 2), -- 120-day stock change
|
||||
days_since_oos INTEGER,
|
||||
days_until_stock_out INTEGER,
|
||||
|
||||
-- Timestamps
|
||||
first_seen_at_ts BIGINT,
|
||||
first_seen_at TIMESTAMPTZ,
|
||||
last_seen_at DATE,
|
||||
|
||||
-- Metadata
|
||||
fetched_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
|
||||
UNIQUE(product_id, variant_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_variants_product ON market_intel.product_variants(product_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_variants_sku ON market_intel.product_variants(pos_sku);
|
||||
CREATE INDEX IF NOT EXISTS idx_variants_stock_status ON market_intel.product_variants(stock_status);
|
||||
CREATE INDEX IF NOT EXISTS idx_variants_price ON market_intel.product_variants(actual_price);
|
||||
CREATE INDEX IF NOT EXISTS idx_variants_days_out ON market_intel.product_variants(days_until_stock_out);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- FETCH_LOG: Track data fetches
|
||||
-- ============================================================
|
||||
CREATE TABLE IF NOT EXISTS market_intel.fetch_log (
|
||||
id SERIAL PRIMARY KEY,
|
||||
fetch_type VARCHAR(50) NOT NULL, -- brands, dispensaries, products
|
||||
state_code VARCHAR(10),
|
||||
query_params JSONB,
|
||||
records_fetched INTEGER DEFAULT 0,
|
||||
records_inserted INTEGER DEFAULT 0,
|
||||
records_updated INTEGER DEFAULT 0,
|
||||
duration_ms INTEGER,
|
||||
error_message TEXT,
|
||||
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
completed_at TIMESTAMPTZ
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_fetch_log_type ON market_intel.fetch_log(fetch_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_fetch_log_state ON market_intel.fetch_log(state_code);
|
||||
CREATE INDEX IF NOT EXISTS idx_fetch_log_started ON market_intel.fetch_log(started_at DESC);
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- HELPER VIEWS
|
||||
-- ============================================================
|
||||
|
||||
-- Brand market presence summary
|
||||
CREATE OR REPLACE VIEW market_intel.v_brand_presence AS
|
||||
SELECT
|
||||
b.brand_name,
|
||||
b.parent_company,
|
||||
b.active_variants,
|
||||
b.all_variants,
|
||||
jsonb_array_length(b.states) as state_count,
|
||||
b.states,
|
||||
b.fetched_at
|
||||
FROM market_intel.brands b
|
||||
ORDER BY b.active_variants DESC;
|
||||
|
||||
-- Dispensary sales rankings by state
|
||||
CREATE OR REPLACE VIEW market_intel.v_dispensary_rankings AS
|
||||
SELECT
|
||||
d.dispensary_name,
|
||||
d.city,
|
||||
d.state,
|
||||
d.banner,
|
||||
d.business_type,
|
||||
d.estimated_daily_sales,
|
||||
d.menus_count,
|
||||
d.is_medical,
|
||||
d.is_recreational,
|
||||
d.customer_segments,
|
||||
RANK() OVER (PARTITION BY d.state ORDER BY d.estimated_daily_sales DESC NULLS LAST) as state_rank
|
||||
FROM market_intel.dispensaries d
|
||||
WHERE d.is_closed = FALSE;
|
||||
|
||||
-- Product distribution by brand and state
|
||||
CREATE OR REPLACE VIEW market_intel.v_product_distribution AS
|
||||
SELECT
|
||||
p.brand,
|
||||
p.d_state as state,
|
||||
p.category_0 as category,
|
||||
COUNT(*) as product_count,
|
||||
COUNT(*) FILTER (WHERE p.in_stock) as in_stock_count,
|
||||
AVG(p.dispensary_count) as avg_store_count,
|
||||
COUNT(DISTINCT p.master_dispensary_id) as unique_stores
|
||||
FROM market_intel.products p
|
||||
GROUP BY p.brand, p.d_state, p.category_0;
|
||||
|
||||
|
||||
-- ============================================================
|
||||
-- COMMENTS
|
||||
-- ============================================================
|
||||
COMMENT ON SCHEMA market_intel IS 'Market intelligence data from external sources';
|
||||
COMMENT ON TABLE market_intel.brands IS 'Brand/company data with multi-state presence';
|
||||
COMMENT ON TABLE market_intel.dispensaries IS 'Dispensary data with sales estimates and demographics';
|
||||
COMMENT ON TABLE market_intel.products IS 'Product/SKU data with cannabinoid and category info';
|
||||
COMMENT ON TABLE market_intel.product_variants IS 'Variant-level pricing and stock data';
|
||||
COMMENT ON TABLE market_intel.fetch_log IS 'Log of data fetches for monitoring';
|
||||
@@ -1,159 +0,0 @@
|
||||
-- Migration 123: Extract unmapped fields from provider_data
|
||||
-- These fields exist in our crawl payloads but weren't being stored in columns
|
||||
|
||||
-- ============================================================
|
||||
-- ADD NEW COLUMNS TO store_products
|
||||
-- ============================================================
|
||||
|
||||
-- Cannabis classification (SATIVA, INDICA, HYBRID, CBD)
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabis_type VARCHAR(50);
|
||||
|
||||
-- Canonical IDs from POS systems
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_strain_id VARCHAR(100);
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_vendor_id VARCHAR(100);
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_brand_id VARCHAR(100);
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS canonical_category_id VARCHAR(100);
|
||||
|
||||
-- Lab results
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS lab_result_url TEXT;
|
||||
|
||||
-- Flavors (extracted from JSONB to text array for easier querying)
|
||||
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS flavors_list TEXT[];
|
||||
|
||||
-- ============================================================
|
||||
-- BACKFILL FROM provider_data
|
||||
-- ============================================================
|
||||
|
||||
-- Backfill cannabis_type from classification
|
||||
UPDATE store_products
|
||||
SET cannabis_type = CASE
|
||||
WHEN provider_data->>'classification' IN ('HYBRID', 'H') THEN 'HYBRID'
|
||||
WHEN provider_data->>'classification' IN ('INDICA', 'I') THEN 'INDICA'
|
||||
WHEN provider_data->>'classification' IN ('SATIVA', 'S') THEN 'SATIVA'
|
||||
WHEN provider_data->>'classification' = 'I/S' THEN 'INDICA_DOMINANT'
|
||||
WHEN provider_data->>'classification' = 'S/I' THEN 'SATIVA_DOMINANT'
|
||||
WHEN provider_data->>'classification' = 'CBD' THEN 'CBD'
|
||||
ELSE provider_data->>'classification'
|
||||
END
|
||||
WHERE provider_data->>'classification' IS NOT NULL
|
||||
AND cannabis_type IS NULL;
|
||||
|
||||
-- Also backfill from strain_type if cannabis_type still null
|
||||
UPDATE store_products
|
||||
SET cannabis_type = CASE
|
||||
WHEN strain_type ILIKE '%indica%hybrid%' OR strain_type ILIKE '%hybrid%indica%' THEN 'INDICA_DOMINANT'
|
||||
WHEN strain_type ILIKE '%sativa%hybrid%' OR strain_type ILIKE '%hybrid%sativa%' THEN 'SATIVA_DOMINANT'
|
||||
WHEN strain_type ILIKE '%indica%' THEN 'INDICA'
|
||||
WHEN strain_type ILIKE '%sativa%' THEN 'SATIVA'
|
||||
WHEN strain_type ILIKE '%hybrid%' THEN 'HYBRID'
|
||||
WHEN strain_type ILIKE '%cbd%' THEN 'CBD'
|
||||
ELSE NULL
|
||||
END
|
||||
WHERE strain_type IS NOT NULL
|
||||
AND cannabis_type IS NULL;
|
||||
|
||||
-- Backfill canonical IDs from POSMetaData
|
||||
UPDATE store_products
|
||||
SET
|
||||
canonical_strain_id = provider_data->'POSMetaData'->>'canonicalStrainId',
|
||||
canonical_vendor_id = provider_data->'POSMetaData'->>'canonicalVendorId',
|
||||
canonical_brand_id = provider_data->'POSMetaData'->>'canonicalBrandId',
|
||||
canonical_category_id = provider_data->'POSMetaData'->>'canonicalCategoryId'
|
||||
WHERE provider_data->'POSMetaData' IS NOT NULL
|
||||
AND canonical_strain_id IS NULL;
|
||||
|
||||
-- Backfill lab result URLs
|
||||
UPDATE store_products
|
||||
SET lab_result_url = provider_data->'POSMetaData'->>'canonicalLabResultUrl'
|
||||
WHERE provider_data->'POSMetaData'->>'canonicalLabResultUrl' IS NOT NULL
|
||||
AND lab_result_url IS NULL;
|
||||
|
||||
-- ============================================================
|
||||
-- INDEXES
|
||||
-- ============================================================
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_cannabis_type ON store_products(cannabis_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_vendor_id ON store_products(canonical_vendor_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_store_products_strain_id ON store_products(canonical_strain_id);
|
||||
|
||||
-- ============================================================
|
||||
-- ADD MSO FLAG TO DISPENSARIES
|
||||
-- ============================================================
|
||||
|
||||
-- Multi-State Operator flag (calculated from chain presence in multiple states)
|
||||
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_mso BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- Update MSO flag based on chain presence in multiple states
|
||||
WITH mso_chains AS (
|
||||
SELECT chain_id
|
||||
FROM dispensaries
|
||||
WHERE chain_id IS NOT NULL
|
||||
GROUP BY chain_id
|
||||
HAVING COUNT(DISTINCT state) > 1
|
||||
)
|
||||
UPDATE dispensaries d
|
||||
SET is_mso = TRUE
|
||||
WHERE d.chain_id IN (SELECT chain_id FROM mso_chains);
|
||||
|
||||
-- Index for MSO queries
|
||||
CREATE INDEX IF NOT EXISTS idx_dispensaries_is_mso ON dispensaries(is_mso) WHERE is_mso = TRUE;
|
||||
|
||||
-- ============================================================
|
||||
-- PRODUCT DISTRIBUTION VIEW
|
||||
-- ============================================================
|
||||
|
||||
-- View: How many stores carry each product (by brand + canonical name)
|
||||
CREATE OR REPLACE VIEW v_product_distribution AS
|
||||
SELECT
|
||||
sp.brand_name_raw as brand,
|
||||
sp.c_name as product_canonical_name,
|
||||
COUNT(DISTINCT sp.dispensary_id) as store_count,
|
||||
COUNT(DISTINCT d.state) as state_count,
|
||||
ARRAY_AGG(DISTINCT d.state) as states,
|
||||
AVG(sp.price_rec) as avg_price,
|
||||
MIN(sp.price_rec) as min_price,
|
||||
MAX(sp.price_rec) as max_price
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
WHERE sp.c_name IS NOT NULL
|
||||
AND sp.brand_name_raw IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
GROUP BY sp.brand_name_raw, sp.c_name
|
||||
HAVING COUNT(DISTINCT sp.dispensary_id) > 1
|
||||
ORDER BY store_count DESC;
|
||||
|
||||
-- ============================================================
|
||||
-- MSO SUMMARY VIEW
|
||||
-- ============================================================
|
||||
|
||||
CREATE OR REPLACE VIEW v_mso_summary AS
|
||||
SELECT
|
||||
c.name as chain_name,
|
||||
COUNT(DISTINCT d.id) as store_count,
|
||||
COUNT(DISTINCT d.state) as state_count,
|
||||
ARRAY_AGG(DISTINCT d.state ORDER BY d.state) as states,
|
||||
SUM(d.product_count) as total_products,
|
||||
TRUE as is_mso
|
||||
FROM dispensaries d
|
||||
JOIN chains c ON c.id = d.chain_id
|
||||
WHERE d.chain_id IN (
|
||||
SELECT chain_id
|
||||
FROM dispensaries
|
||||
WHERE chain_id IS NOT NULL
|
||||
GROUP BY chain_id
|
||||
HAVING COUNT(DISTINCT state) > 1
|
||||
)
|
||||
GROUP BY c.id, c.name
|
||||
ORDER BY state_count DESC, store_count DESC;
|
||||
|
||||
-- ============================================================
|
||||
-- COMMENTS
|
||||
-- ============================================================
|
||||
|
||||
COMMENT ON COLUMN store_products.cannabis_type IS 'Normalized cannabis classification: SATIVA, INDICA, HYBRID, INDICA_DOMINANT, SATIVA_DOMINANT, CBD';
|
||||
COMMENT ON COLUMN store_products.canonical_strain_id IS 'POS system strain identifier for cross-store matching';
|
||||
COMMENT ON COLUMN store_products.canonical_vendor_id IS 'POS system vendor/supplier identifier';
|
||||
COMMENT ON COLUMN store_products.lab_result_url IS 'Link to Certificate of Analysis / lab test results';
|
||||
COMMENT ON COLUMN dispensaries.is_mso IS 'Multi-State Operator: chain operates in 2+ states';
|
||||
COMMENT ON VIEW v_product_distribution IS 'Shows how many stores carry each product for distribution analysis';
|
||||
COMMENT ON VIEW v_mso_summary IS 'Summary of multi-state operator chains';
|
||||
@@ -1,73 +0,0 @@
|
||||
-- Migration 124: Convert inventory_snapshots to TimescaleDB hypertable
|
||||
-- Requires: CREATE EXTENSION timescaledb; (run after installing TimescaleDB)
|
||||
|
||||
-- ============================================================
|
||||
-- STEP 1: Enable TimescaleDB extension
|
||||
-- ============================================================
|
||||
CREATE EXTENSION IF NOT EXISTS timescaledb;
|
||||
|
||||
-- ============================================================
|
||||
-- STEP 2: Convert to hypertable
|
||||
-- ============================================================
|
||||
-- Note: Table must have a time column and no foreign key constraints
|
||||
|
||||
-- First, drop any foreign keys if they exist
|
||||
ALTER TABLE inventory_snapshots DROP CONSTRAINT IF EXISTS inventory_snapshots_dispensary_id_fkey;
|
||||
|
||||
-- Convert to hypertable, partitioned by captured_at (1 day chunks)
|
||||
SELECT create_hypertable(
|
||||
'inventory_snapshots',
|
||||
'captured_at',
|
||||
chunk_time_interval => INTERVAL '1 day',
|
||||
if_not_exists => TRUE,
|
||||
migrate_data => TRUE
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- STEP 3: Enable compression
|
||||
-- ============================================================
|
||||
-- Compress by dispensary_id and product_id (common query patterns)
|
||||
ALTER TABLE inventory_snapshots SET (
|
||||
timescaledb.compress,
|
||||
timescaledb.compress_segmentby = 'dispensary_id, product_id',
|
||||
timescaledb.compress_orderby = 'captured_at DESC'
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- STEP 4: Compression policy (compress chunks older than 1 day)
|
||||
-- ============================================================
|
||||
SELECT add_compression_policy('inventory_snapshots', INTERVAL '1 day');
|
||||
|
||||
-- ============================================================
|
||||
-- STEP 5: Retention policy (optional - drop chunks older than 90 days)
|
||||
-- ============================================================
|
||||
-- Uncomment if you want automatic cleanup:
|
||||
-- SELECT add_retention_policy('inventory_snapshots', INTERVAL '90 days');
|
||||
|
||||
-- ============================================================
|
||||
-- STEP 6: Optimize indexes for time-series queries
|
||||
-- ============================================================
|
||||
-- TimescaleDB automatically creates time-based indexes
|
||||
-- Add composite index for common queries
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_disp_prod_time
|
||||
ON inventory_snapshots (dispensary_id, product_id, captured_at DESC);
|
||||
|
||||
-- ============================================================
|
||||
-- VERIFICATION QUERIES (run after migration)
|
||||
-- ============================================================
|
||||
-- Check hypertable status:
|
||||
-- SELECT * FROM timescaledb_information.hypertables WHERE hypertable_name = 'inventory_snapshots';
|
||||
|
||||
-- Check compression status:
|
||||
-- SELECT * FROM timescaledb_information.compression_settings WHERE hypertable_name = 'inventory_snapshots';
|
||||
|
||||
-- Check chunk sizes:
|
||||
-- SELECT chunk_name, pg_size_pretty(before_compression_total_bytes) as before,
|
||||
-- pg_size_pretty(after_compression_total_bytes) as after,
|
||||
-- round(100 - (after_compression_total_bytes::numeric / before_compression_total_bytes * 100), 1) as compression_pct
|
||||
-- FROM chunk_compression_stats('inventory_snapshots');
|
||||
|
||||
-- ============================================================
|
||||
-- COMMENTS
|
||||
-- ============================================================
|
||||
COMMENT ON TABLE inventory_snapshots IS 'TimescaleDB hypertable for inventory time-series data. Compressed after 1 day.';
|
||||
@@ -1,402 +0,0 @@
|
||||
-- Migration 125: Delta-only inventory snapshots
|
||||
-- Only store a row when something meaningful changes
|
||||
-- Revenue calculated as: effective_price × qty_sold
|
||||
|
||||
-- ============================================================
|
||||
-- ADD DELTA TRACKING COLUMNS
|
||||
-- ============================================================
|
||||
|
||||
-- Previous values (to show what changed)
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_quantity INTEGER;
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_price_rec DECIMAL(10,2);
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_price_med DECIMAL(10,2);
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS prev_status VARCHAR(50);
|
||||
|
||||
-- Calculated deltas
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS qty_delta INTEGER; -- negative = sold, positive = restocked
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS price_delta DECIMAL(10,2);
|
||||
|
||||
-- Change type flags
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS change_type VARCHAR(50); -- 'sale', 'restock', 'price_change', 'oos', 'back_in_stock'
|
||||
|
||||
-- ============================================================
|
||||
-- INDEX FOR CHANGE TYPE QUERIES
|
||||
-- ============================================================
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_change_type ON inventory_snapshots(change_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshots_qty_delta ON inventory_snapshots(qty_delta) WHERE qty_delta != 0;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Latest product state (for delta comparison)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_product_latest_state AS
|
||||
SELECT DISTINCT ON (dispensary_id, product_id)
|
||||
dispensary_id,
|
||||
product_id,
|
||||
quantity_available,
|
||||
price_rec,
|
||||
price_med,
|
||||
status,
|
||||
captured_at
|
||||
FROM inventory_snapshots
|
||||
ORDER BY dispensary_id, product_id, captured_at DESC;
|
||||
|
||||
-- ============================================================
|
||||
-- FUNCTION: Check if product state changed
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE FUNCTION should_capture_snapshot(
|
||||
p_dispensary_id INTEGER,
|
||||
p_product_id TEXT,
|
||||
p_quantity INTEGER,
|
||||
p_price_rec DECIMAL,
|
||||
p_price_med DECIMAL,
|
||||
p_status VARCHAR
|
||||
) RETURNS TABLE (
|
||||
should_capture BOOLEAN,
|
||||
prev_quantity INTEGER,
|
||||
prev_price_rec DECIMAL,
|
||||
prev_price_med DECIMAL,
|
||||
prev_status VARCHAR,
|
||||
qty_delta INTEGER,
|
||||
price_delta DECIMAL,
|
||||
change_type VARCHAR
|
||||
) AS $$
|
||||
DECLARE
|
||||
v_prev RECORD;
|
||||
BEGIN
|
||||
-- Get previous state
|
||||
SELECT
|
||||
ls.quantity_available,
|
||||
ls.price_rec,
|
||||
ls.price_med,
|
||||
ls.status
|
||||
INTO v_prev
|
||||
FROM v_product_latest_state ls
|
||||
WHERE ls.dispensary_id = p_dispensary_id
|
||||
AND ls.product_id = p_product_id;
|
||||
|
||||
-- First time seeing this product
|
||||
IF NOT FOUND THEN
|
||||
RETURN QUERY SELECT
|
||||
TRUE,
|
||||
NULL::INTEGER,
|
||||
NULL::DECIMAL,
|
||||
NULL::DECIMAL,
|
||||
NULL::VARCHAR,
|
||||
NULL::INTEGER,
|
||||
NULL::DECIMAL,
|
||||
'new_product'::VARCHAR;
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
-- Check for changes
|
||||
IF v_prev.quantity_available IS DISTINCT FROM p_quantity
|
||||
OR v_prev.price_rec IS DISTINCT FROM p_price_rec
|
||||
OR v_prev.price_med IS DISTINCT FROM p_price_med
|
||||
OR v_prev.status IS DISTINCT FROM p_status THEN
|
||||
|
||||
RETURN QUERY SELECT
|
||||
TRUE,
|
||||
v_prev.quantity_available,
|
||||
v_prev.price_rec,
|
||||
v_prev.price_med,
|
||||
v_prev.status,
|
||||
COALESCE(p_quantity, 0) - COALESCE(v_prev.quantity_available, 0),
|
||||
COALESCE(p_price_rec, 0) - COALESCE(v_prev.price_rec, 0),
|
||||
CASE
|
||||
WHEN COALESCE(p_quantity, 0) < COALESCE(v_prev.quantity_available, 0) THEN 'sale'
|
||||
WHEN COALESCE(p_quantity, 0) > COALESCE(v_prev.quantity_available, 0) THEN 'restock'
|
||||
WHEN p_quantity = 0 AND v_prev.quantity_available > 0 THEN 'oos'
|
||||
WHEN p_quantity > 0 AND v_prev.quantity_available = 0 THEN 'back_in_stock'
|
||||
WHEN p_price_rec IS DISTINCT FROM v_prev.price_rec THEN 'price_change'
|
||||
ELSE 'status_change'
|
||||
END;
|
||||
RETURN;
|
||||
END IF;
|
||||
|
||||
-- No change
|
||||
RETURN QUERY SELECT
|
||||
FALSE,
|
||||
NULL::INTEGER,
|
||||
NULL::DECIMAL,
|
||||
NULL::DECIMAL,
|
||||
NULL::VARCHAR,
|
||||
NULL::INTEGER,
|
||||
NULL::DECIMAL,
|
||||
NULL::VARCHAR;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- ============================================================
|
||||
-- REVENUE CALCULATION COLUMNS
|
||||
-- ============================================================
|
||||
-- Effective prices (sale price if on special, otherwise regular)
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS effective_price_rec DECIMAL(10,2);
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS effective_price_med DECIMAL(10,2);
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS is_on_special BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- Revenue by market type
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS revenue_rec DECIMAL(10,2);
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS revenue_med DECIMAL(10,2);
|
||||
|
||||
-- Time between snapshots (for velocity calc)
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS time_since_last_snapshot INTERVAL;
|
||||
ALTER TABLE inventory_snapshots ADD COLUMN IF NOT EXISTS hours_since_last DECIMAL(10,2);
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Hourly Sales Velocity
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_hourly_sales AS
|
||||
SELECT
|
||||
dispensary_id,
|
||||
DATE(captured_at) as sale_date,
|
||||
EXTRACT(HOUR FROM captured_at) as sale_hour,
|
||||
COUNT(*) FILTER (WHERE qty_delta < 0) as transactions,
|
||||
SUM(ABS(qty_delta)) FILTER (WHERE qty_delta < 0) as units_sold,
|
||||
SUM(revenue_estimate) FILTER (WHERE qty_delta < 0) as revenue,
|
||||
COUNT(DISTINCT product_id) FILTER (WHERE qty_delta < 0) as unique_products_sold
|
||||
FROM inventory_snapshots
|
||||
WHERE change_type = 'sale'
|
||||
GROUP BY dispensary_id, DATE(captured_at), EXTRACT(HOUR FROM captured_at);
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Daily Sales by Store
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_daily_store_sales AS
|
||||
SELECT
|
||||
s.dispensary_id,
|
||||
d.name as store_name,
|
||||
d.state,
|
||||
DATE(s.captured_at) as sale_date,
|
||||
SUM(ABS(s.qty_delta)) as units_sold,
|
||||
SUM(s.revenue_estimate) as revenue,
|
||||
COUNT(*) as sale_events,
|
||||
COUNT(DISTINCT s.product_id) as unique_products
|
||||
FROM inventory_snapshots s
|
||||
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||
WHERE s.change_type = 'sale'
|
||||
GROUP BY s.dispensary_id, d.name, d.state, DATE(s.captured_at);
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Daily Sales by Brand
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_daily_brand_sales AS
|
||||
SELECT
|
||||
s.brand_name,
|
||||
d.state,
|
||||
DATE(s.captured_at) as sale_date,
|
||||
SUM(ABS(s.qty_delta)) as units_sold,
|
||||
SUM(s.revenue_estimate) as revenue,
|
||||
COUNT(DISTINCT s.dispensary_id) as stores_with_sales,
|
||||
COUNT(DISTINCT s.product_id) as unique_skus_sold
|
||||
FROM inventory_snapshots s
|
||||
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||
WHERE s.change_type = 'sale'
|
||||
AND s.brand_name IS NOT NULL
|
||||
GROUP BY s.brand_name, d.state, DATE(s.captured_at);
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Product Velocity Rankings
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_product_velocity AS
|
||||
SELECT
|
||||
s.product_id,
|
||||
s.brand_name,
|
||||
s.category,
|
||||
s.dispensary_id,
|
||||
d.name as store_name,
|
||||
d.state,
|
||||
SUM(ABS(s.qty_delta)) as units_sold_30d,
|
||||
SUM(s.revenue_estimate) as revenue_30d,
|
||||
COUNT(*) as sale_events,
|
||||
ROUND(SUM(ABS(s.qty_delta))::NUMERIC / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0), 2) as avg_daily_units,
|
||||
ROUND(SUM(s.revenue_estimate) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0), 2) as avg_daily_revenue,
|
||||
CASE
|
||||
WHEN SUM(ABS(s.qty_delta)) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0) >= 10 THEN 'hot'
|
||||
WHEN SUM(ABS(s.qty_delta)) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0) >= 3 THEN 'steady'
|
||||
WHEN SUM(ABS(s.qty_delta)) / NULLIF(COUNT(DISTINCT DATE(s.captured_at)), 0) >= 1 THEN 'slow'
|
||||
ELSE 'stale'
|
||||
END as velocity_tier
|
||||
FROM inventory_snapshots s
|
||||
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||
WHERE s.change_type = 'sale'
|
||||
AND s.captured_at >= NOW() - INTERVAL '30 days'
|
||||
GROUP BY s.product_id, s.brand_name, s.category, s.dispensary_id, d.name, d.state;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Busiest Hours by Store
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_busiest_hours AS
|
||||
SELECT
|
||||
dispensary_id,
|
||||
sale_hour,
|
||||
AVG(units_sold) as avg_units_per_hour,
|
||||
AVG(revenue) as avg_revenue_per_hour,
|
||||
SUM(units_sold) as total_units,
|
||||
SUM(revenue) as total_revenue,
|
||||
COUNT(*) as days_with_data,
|
||||
RANK() OVER (PARTITION BY dispensary_id ORDER BY AVG(revenue) DESC) as hour_rank
|
||||
FROM v_hourly_sales
|
||||
GROUP BY dispensary_id, sale_hour;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Promotion Effectiveness (compare sale vs non-sale prices)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_promotion_effectiveness AS
|
||||
SELECT
|
||||
s.dispensary_id,
|
||||
d.name as store_name,
|
||||
s.product_id,
|
||||
s.brand_name,
|
||||
DATE(s.captured_at) as sale_date,
|
||||
SUM(ABS(s.qty_delta)) FILTER (WHERE s.price_rec < s.prev_price_rec) as units_on_discount,
|
||||
SUM(ABS(s.qty_delta)) FILTER (WHERE s.price_rec >= COALESCE(s.prev_price_rec, s.price_rec)) as units_full_price,
|
||||
SUM(s.revenue_estimate) FILTER (WHERE s.price_rec < s.prev_price_rec) as revenue_discounted,
|
||||
SUM(s.revenue_estimate) FILTER (WHERE s.price_rec >= COALESCE(s.prev_price_rec, s.price_rec)) as revenue_full_price
|
||||
FROM inventory_snapshots s
|
||||
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||
WHERE s.change_type = 'sale'
|
||||
GROUP BY s.dispensary_id, d.name, s.product_id, s.brand_name, DATE(s.captured_at);
|
||||
|
||||
-- ============================================================
|
||||
-- COMMENTS
|
||||
-- ============================================================
|
||||
COMMENT ON COLUMN inventory_snapshots.qty_delta IS 'Quantity change: negative=sold, positive=restocked';
|
||||
COMMENT ON COLUMN inventory_snapshots.revenue_estimate IS 'Estimated revenue: ABS(qty_delta) * price_rec when qty_delta < 0';
|
||||
COMMENT ON COLUMN inventory_snapshots.change_type IS 'Type of change: sale, restock, price_change, oos, back_in_stock, new_product';
|
||||
COMMENT ON FUNCTION should_capture_snapshot IS 'Returns whether a snapshot should be captured and delta values';
|
||||
COMMENT ON VIEW v_hourly_sales IS 'Sales aggregated by hour - find busiest times';
|
||||
COMMENT ON VIEW v_daily_store_sales IS 'Daily revenue by store';
|
||||
COMMENT ON VIEW v_daily_brand_sales IS 'Daily brand performance by state';
|
||||
COMMENT ON VIEW v_product_velocity IS 'Product sales velocity rankings (hot/steady/slow/stale)';
|
||||
COMMENT ON VIEW v_busiest_hours IS 'Rank hours by sales volume per store';
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Days Until Stock Out (Predictive)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_stock_out_prediction AS
|
||||
WITH velocity AS (
|
||||
SELECT
|
||||
dispensary_id,
|
||||
product_id,
|
||||
brand_name,
|
||||
-- Average units sold per day (last 7 days)
|
||||
ROUND(SUM(ABS(qty_delta))::NUMERIC / NULLIF(COUNT(DISTINCT DATE(captured_at)), 0), 2) as daily_velocity,
|
||||
-- Hours between sales
|
||||
AVG(hours_since_last) FILTER (WHERE qty_delta < 0) as avg_hours_between_sales
|
||||
FROM inventory_snapshots
|
||||
WHERE change_type = 'sale'
|
||||
AND captured_at >= NOW() - INTERVAL '7 days'
|
||||
GROUP BY dispensary_id, product_id, brand_name
|
||||
),
|
||||
current_stock AS (
|
||||
SELECT DISTINCT ON (dispensary_id, product_id)
|
||||
dispensary_id,
|
||||
product_id,
|
||||
quantity_available as current_qty,
|
||||
captured_at as last_seen
|
||||
FROM inventory_snapshots
|
||||
ORDER BY dispensary_id, product_id, captured_at DESC
|
||||
)
|
||||
SELECT
|
||||
cs.dispensary_id,
|
||||
d.name as store_name,
|
||||
cs.product_id,
|
||||
v.brand_name,
|
||||
cs.current_qty,
|
||||
v.daily_velocity,
|
||||
CASE
|
||||
WHEN v.daily_velocity > 0 THEN ROUND(cs.current_qty / v.daily_velocity, 1)
|
||||
ELSE NULL
|
||||
END as days_until_stock_out,
|
||||
CASE
|
||||
WHEN v.daily_velocity > 0 AND cs.current_qty / v.daily_velocity <= 3 THEN 'critical'
|
||||
WHEN v.daily_velocity > 0 AND cs.current_qty / v.daily_velocity <= 7 THEN 'low'
|
||||
WHEN v.daily_velocity > 0 AND cs.current_qty / v.daily_velocity <= 14 THEN 'moderate'
|
||||
ELSE 'healthy'
|
||||
END as stock_health,
|
||||
cs.last_seen
|
||||
FROM current_stock cs
|
||||
JOIN velocity v ON v.dispensary_id = cs.dispensary_id AND v.product_id = cs.product_id
|
||||
JOIN dispensaries d ON d.id = cs.dispensary_id
|
||||
WHERE cs.current_qty > 0
|
||||
AND v.daily_velocity > 0;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Days Since OOS (for products currently out of stock)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_days_since_oos AS
|
||||
SELECT
|
||||
s.dispensary_id,
|
||||
d.name as store_name,
|
||||
s.product_id,
|
||||
s.brand_name,
|
||||
s.captured_at as went_oos_at,
|
||||
EXTRACT(EPOCH FROM (NOW() - s.captured_at)) / 86400 as days_since_oos,
|
||||
s.prev_quantity as last_known_qty
|
||||
FROM inventory_snapshots s
|
||||
JOIN dispensaries d ON d.id = s.dispensary_id
|
||||
WHERE s.change_type = 'oos'
|
||||
AND NOT EXISTS (
|
||||
-- No back_in_stock event after this OOS
|
||||
SELECT 1 FROM inventory_snapshots s2
|
||||
WHERE s2.dispensary_id = s.dispensary_id
|
||||
AND s2.product_id = s.product_id
|
||||
AND s2.change_type = 'back_in_stock'
|
||||
AND s2.captured_at > s.captured_at
|
||||
);
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Brand Variant Counts (track brand growth)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_brand_variants AS
|
||||
SELECT
|
||||
sp.brand_name_raw as brand_name,
|
||||
d.state,
|
||||
COUNT(DISTINCT sp.id) as total_variants,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = TRUE) as active_variants,
|
||||
COUNT(DISTINCT sp.id) FILTER (WHERE sp.is_in_stock = FALSE) as inactive_variants,
|
||||
COUNT(DISTINCT sp.dispensary_id) as stores_carrying,
|
||||
COUNT(DISTINCT sp.category_raw) as categories,
|
||||
MIN(sp.first_seen_at) as brand_first_seen,
|
||||
MAX(sp.last_seen_at) as brand_last_seen
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
WHERE sp.brand_name_raw IS NOT NULL
|
||||
GROUP BY sp.brand_name_raw, d.state;
|
||||
|
||||
-- ============================================================
|
||||
-- VIEW: Brand Growth (compare variant counts over time)
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_brand_growth AS
|
||||
WITH weekly_counts AS (
|
||||
SELECT
|
||||
brand_name_raw as brand_name,
|
||||
DATE_TRUNC('week', last_seen_at) as week,
|
||||
COUNT(DISTINCT id) as variant_count
|
||||
FROM store_products
|
||||
WHERE brand_name_raw IS NOT NULL
|
||||
AND last_seen_at >= NOW() - INTERVAL '90 days'
|
||||
GROUP BY brand_name_raw, DATE_TRUNC('week', last_seen_at)
|
||||
)
|
||||
SELECT
|
||||
w1.brand_name,
|
||||
w1.week as current_week,
|
||||
w1.variant_count as current_variants,
|
||||
w2.variant_count as prev_week_variants,
|
||||
w1.variant_count - COALESCE(w2.variant_count, 0) as variant_change,
|
||||
CASE
|
||||
WHEN w2.variant_count IS NULL THEN 'new'
|
||||
WHEN w1.variant_count > w2.variant_count THEN 'growing'
|
||||
WHEN w1.variant_count < w2.variant_count THEN 'declining'
|
||||
ELSE 'stable'
|
||||
END as growth_status
|
||||
FROM weekly_counts w1
|
||||
LEFT JOIN weekly_counts w2
|
||||
ON w2.brand_name = w1.brand_name
|
||||
AND w2.week = w1.week - INTERVAL '1 week'
|
||||
ORDER BY w1.brand_name, w1.week DESC;
|
||||
|
||||
COMMENT ON VIEW v_stock_out_prediction IS 'Predict days until stock out based on velocity';
|
||||
COMMENT ON VIEW v_days_since_oos IS 'Products currently OOS and how long they have been out';
|
||||
COMMENT ON VIEW v_brand_variants IS 'Active vs inactive SKU counts per brand per state';
|
||||
COMMENT ON VIEW v_brand_growth IS 'Week-over-week brand variant growth tracking';
|
||||
@@ -1,53 +0,0 @@
|
||||
-- Migration 126: Set AZ stores to 5-minute high-frequency crawls
|
||||
-- Other states default to 60-minute (1 hour) intervals
|
||||
|
||||
-- ============================================================
|
||||
-- SET AZ STORES TO 5-MINUTE INTERVALS (with 3-min jitter)
|
||||
-- ============================================================
|
||||
-- Base interval: 5 minutes
|
||||
-- Jitter: +/- 3 minutes (so 2-8 minute effective range)
|
||||
UPDATE dispensaries
|
||||
SET
|
||||
crawl_interval_minutes = 5,
|
||||
next_crawl_at = NOW() + (RANDOM() * INTERVAL '5 minutes') -- Stagger initial crawls
|
||||
WHERE state = 'AZ'
|
||||
AND crawl_enabled = TRUE;
|
||||
|
||||
-- ============================================================
|
||||
-- SET OTHER STATES TO 60-MINUTE INTERVALS (with 3-min jitter)
|
||||
-- ============================================================
|
||||
UPDATE dispensaries
|
||||
SET
|
||||
crawl_interval_minutes = 60,
|
||||
next_crawl_at = NOW() + (RANDOM() * INTERVAL '60 minutes') -- Stagger initial crawls
|
||||
WHERE state != 'AZ'
|
||||
AND crawl_enabled = TRUE
|
||||
AND crawl_interval_minutes IS NULL;
|
||||
|
||||
-- ============================================================
|
||||
-- VERIFY RESULTS
|
||||
-- ============================================================
|
||||
-- SELECT state, crawl_interval_minutes, COUNT(*)
|
||||
-- FROM dispensaries
|
||||
-- WHERE crawl_enabled = TRUE
|
||||
-- GROUP BY state, crawl_interval_minutes
|
||||
-- ORDER BY state;
|
||||
|
||||
-- ============================================================
|
||||
-- CREATE VIEW FOR MONITORING CRAWL LOAD
|
||||
-- ============================================================
|
||||
CREATE OR REPLACE VIEW v_crawl_load AS
|
||||
SELECT
|
||||
state,
|
||||
crawl_interval_minutes,
|
||||
COUNT(*) as store_count,
|
||||
-- Crawls per hour = stores * (60 / interval)
|
||||
ROUND(COUNT(*) * (60.0 / COALESCE(crawl_interval_minutes, 60))) as crawls_per_hour,
|
||||
-- Assuming 30 sec per crawl, workers needed = crawls_per_hour / 120
|
||||
ROUND(COUNT(*) * (60.0 / COALESCE(crawl_interval_minutes, 60)) / 120, 1) as workers_needed
|
||||
FROM dispensaries
|
||||
WHERE crawl_enabled = TRUE
|
||||
GROUP BY state, crawl_interval_minutes
|
||||
ORDER BY crawls_per_hour DESC;
|
||||
|
||||
COMMENT ON VIEW v_crawl_load IS 'Monitor crawl load by state and interval';
|
||||
@@ -1,164 +0,0 @@
|
||||
-- Migration 127: Fix worker task concurrency limit
|
||||
-- Problem: claim_task function checks session_task_count but never increments it
|
||||
-- Solution: Increment on claim, decrement on complete/fail/release
|
||||
|
||||
-- =============================================================================
|
||||
-- STEP 1: Set max tasks to 5 for all workers
|
||||
-- =============================================================================
|
||||
UPDATE worker_registry SET session_max_tasks = 5;
|
||||
|
||||
-- Set default to 5 for new workers
|
||||
ALTER TABLE worker_registry ALTER COLUMN session_max_tasks SET DEFAULT 5;
|
||||
|
||||
-- =============================================================================
|
||||
-- STEP 2: Reset all session_task_count to match actual active tasks
|
||||
-- =============================================================================
|
||||
UPDATE worker_registry wr SET session_task_count = (
|
||||
SELECT COUNT(*) FROM worker_tasks wt
|
||||
WHERE wt.worker_id = wr.worker_id
|
||||
AND wt.status IN ('claimed', 'running')
|
||||
);
|
||||
|
||||
-- =============================================================================
|
||||
-- STEP 3: Update claim_task function to increment session_task_count
|
||||
-- =============================================================================
|
||||
CREATE OR REPLACE FUNCTION claim_task(
|
||||
p_role VARCHAR(50),
|
||||
p_worker_id VARCHAR(100),
|
||||
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||
p_http_passed BOOLEAN DEFAULT FALSE
|
||||
) RETURNS worker_tasks AS $$
|
||||
DECLARE
|
||||
claimed_task worker_tasks;
|
||||
worker_state VARCHAR(2);
|
||||
session_valid BOOLEAN;
|
||||
session_tasks INT;
|
||||
max_tasks INT;
|
||||
BEGIN
|
||||
-- Get worker's current geo session info
|
||||
SELECT
|
||||
current_state,
|
||||
session_task_count,
|
||||
session_max_tasks,
|
||||
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
|
||||
INTO worker_state, session_tasks, max_tasks, session_valid
|
||||
FROM worker_registry
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
-- Check if worker has reached max concurrent tasks (default 5)
|
||||
IF session_tasks >= COALESCE(max_tasks, 5) THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- If no valid geo session, or session expired, worker can't claim tasks
|
||||
-- Worker must re-qualify first
|
||||
IF worker_state IS NULL OR NOT session_valid THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Claim task matching worker's state
|
||||
UPDATE worker_tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
worker_id = p_worker_id,
|
||||
claimed_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT wt.id FROM worker_tasks wt
|
||||
JOIN dispensaries d ON wt.dispensary_id = d.id
|
||||
WHERE wt.role = p_role
|
||||
AND wt.status = 'pending'
|
||||
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
|
||||
-- GEO FILTER: Task's dispensary must match worker's state
|
||||
AND d.state = worker_state
|
||||
-- Method compatibility: worker must have passed the required preflight
|
||||
AND (
|
||||
wt.method IS NULL -- No preference, any worker can claim
|
||||
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
|
||||
OR (wt.method = 'http' AND p_http_passed = TRUE)
|
||||
)
|
||||
-- Exclude stores that already have an active task
|
||||
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
|
||||
SELECT dispensary_id FROM worker_tasks
|
||||
WHERE status IN ('claimed', 'running')
|
||||
AND dispensary_id IS NOT NULL
|
||||
AND dispensary_id != wt.dispensary_id
|
||||
))
|
||||
ORDER BY wt.priority DESC, wt.created_at ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_task;
|
||||
|
||||
-- INCREMENT session_task_count if we claimed a task
|
||||
IF claimed_task.id IS NOT NULL THEN
|
||||
UPDATE worker_registry
|
||||
SET session_task_count = session_task_count + 1
|
||||
WHERE worker_id = p_worker_id;
|
||||
END IF;
|
||||
|
||||
RETURN claimed_task;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- =============================================================================
|
||||
-- STEP 4: Create trigger to decrement on task completion/failure/release
|
||||
-- =============================================================================
|
||||
CREATE OR REPLACE FUNCTION decrement_worker_task_count()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
-- Only decrement when task was assigned to a worker and is now complete/released
|
||||
IF OLD.worker_id IS NOT NULL AND OLD.status IN ('claimed', 'running') THEN
|
||||
-- Task completed/failed/released - decrement count
|
||||
IF NEW.status IN ('pending', 'completed', 'failed') OR NEW.worker_id IS NULL THEN
|
||||
UPDATE worker_registry
|
||||
SET session_task_count = GREATEST(0, session_task_count - 1)
|
||||
WHERE worker_id = OLD.worker_id;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Drop existing trigger if any
|
||||
DROP TRIGGER IF EXISTS trg_decrement_worker_task_count ON worker_tasks;
|
||||
|
||||
-- Create trigger on UPDATE (status change or worker_id cleared)
|
||||
CREATE TRIGGER trg_decrement_worker_task_count
|
||||
AFTER UPDATE ON worker_tasks
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION decrement_worker_task_count();
|
||||
|
||||
-- Also handle DELETE (completed tasks are deleted from pool)
|
||||
CREATE OR REPLACE FUNCTION decrement_worker_task_count_delete()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
IF OLD.worker_id IS NOT NULL AND OLD.status IN ('claimed', 'running') THEN
|
||||
UPDATE worker_registry
|
||||
SET session_task_count = GREATEST(0, session_task_count - 1)
|
||||
WHERE worker_id = OLD.worker_id;
|
||||
END IF;
|
||||
RETURN OLD;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
DROP TRIGGER IF EXISTS trg_decrement_worker_task_count_delete ON worker_tasks;
|
||||
|
||||
CREATE TRIGGER trg_decrement_worker_task_count_delete
|
||||
AFTER DELETE ON worker_tasks
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION decrement_worker_task_count_delete();
|
||||
|
||||
-- =============================================================================
|
||||
-- STEP 5: Verify current state
|
||||
-- =============================================================================
|
||||
SELECT
|
||||
wr.worker_id,
|
||||
wr.friendly_name,
|
||||
wr.session_task_count,
|
||||
wr.session_max_tasks,
|
||||
(SELECT COUNT(*) FROM worker_tasks wt WHERE wt.worker_id = wr.worker_id AND wt.status IN ('claimed', 'running')) as actual_count
|
||||
FROM worker_registry wr
|
||||
WHERE wr.status = 'active'
|
||||
ORDER BY wr.friendly_name;
|
||||
@@ -1,109 +0,0 @@
|
||||
-- Migration 128: Pool configuration table
|
||||
-- Controls whether workers can claim tasks from the pool
|
||||
|
||||
CREATE TABLE IF NOT EXISTS pool_config (
|
||||
id SERIAL PRIMARY KEY,
|
||||
pool_open BOOLEAN NOT NULL DEFAULT true,
|
||||
closed_reason TEXT,
|
||||
closed_at TIMESTAMPTZ,
|
||||
closed_by VARCHAR(100),
|
||||
opened_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Insert default config (pool open)
|
||||
INSERT INTO pool_config (pool_open, opened_at)
|
||||
VALUES (true, NOW())
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
-- Update claim_task function to check pool status
|
||||
CREATE OR REPLACE FUNCTION claim_task(
|
||||
p_role VARCHAR(50),
|
||||
p_worker_id VARCHAR(100),
|
||||
p_curl_passed BOOLEAN DEFAULT TRUE,
|
||||
p_http_passed BOOLEAN DEFAULT FALSE
|
||||
) RETURNS worker_tasks AS $$
|
||||
DECLARE
|
||||
claimed_task worker_tasks;
|
||||
worker_state VARCHAR(2);
|
||||
session_valid BOOLEAN;
|
||||
session_tasks INT;
|
||||
max_tasks INT;
|
||||
is_pool_open BOOLEAN;
|
||||
BEGIN
|
||||
-- Check if pool is open
|
||||
SELECT pool_open INTO is_pool_open FROM pool_config LIMIT 1;
|
||||
IF NOT COALESCE(is_pool_open, true) THEN
|
||||
RETURN NULL; -- Pool is closed, no claiming allowed
|
||||
END IF;
|
||||
|
||||
-- Get worker's current geo session info
|
||||
SELECT
|
||||
current_state,
|
||||
session_task_count,
|
||||
session_max_tasks,
|
||||
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
|
||||
INTO worker_state, session_tasks, max_tasks, session_valid
|
||||
FROM worker_registry
|
||||
WHERE worker_id = p_worker_id;
|
||||
|
||||
-- Check if worker has reached max concurrent tasks (default 5)
|
||||
IF session_tasks >= COALESCE(max_tasks, 5) THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- If no valid geo session, or session expired, worker can't claim tasks
|
||||
-- Worker must re-qualify first
|
||||
IF worker_state IS NULL OR NOT session_valid THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
-- Claim task matching worker's state
|
||||
UPDATE worker_tasks
|
||||
SET
|
||||
status = 'claimed',
|
||||
worker_id = p_worker_id,
|
||||
claimed_at = NOW(),
|
||||
updated_at = NOW()
|
||||
WHERE id = (
|
||||
SELECT wt.id FROM worker_tasks wt
|
||||
JOIN dispensaries d ON wt.dispensary_id = d.id
|
||||
WHERE wt.role = p_role
|
||||
AND wt.status = 'pending'
|
||||
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
|
||||
-- GEO FILTER: Task's dispensary must match worker's state
|
||||
AND d.state = worker_state
|
||||
-- Method compatibility: worker must have passed the required preflight
|
||||
AND (
|
||||
wt.method IS NULL -- No preference, any worker can claim
|
||||
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
|
||||
OR (wt.method = 'http' AND p_http_passed = TRUE)
|
||||
)
|
||||
-- Exclude stores that already have an active task
|
||||
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
|
||||
SELECT dispensary_id FROM worker_tasks
|
||||
WHERE status IN ('claimed', 'running')
|
||||
AND dispensary_id IS NOT NULL
|
||||
AND dispensary_id != wt.dispensary_id
|
||||
))
|
||||
ORDER BY wt.priority DESC, wt.created_at ASC
|
||||
LIMIT 1
|
||||
FOR UPDATE SKIP LOCKED
|
||||
)
|
||||
RETURNING * INTO claimed_task;
|
||||
|
||||
-- INCREMENT session_task_count if we claimed a task
|
||||
IF claimed_task.id IS NOT NULL THEN
|
||||
UPDATE worker_registry
|
||||
SET session_task_count = session_task_count + 1
|
||||
WHERE worker_id = p_worker_id;
|
||||
END IF;
|
||||
|
||||
RETURN claimed_task;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Verify
|
||||
SELECT 'pool_config table created' as status;
|
||||
SELECT * FROM pool_config;
|
||||
@@ -1,60 +0,0 @@
|
||||
-- Migration 129: Claim tasks for specific geo
|
||||
-- Used after worker gets IP to claim more tasks for same geo
|
||||
|
||||
-- Function: Claim up to N tasks for a SPECIFIC geo (state/city)
|
||||
-- Different from claim_tasks_batch which picks the geo with most tasks
|
||||
CREATE OR REPLACE FUNCTION claim_tasks_batch_for_geo(
|
||||
p_worker_id VARCHAR(255),
|
||||
p_max_tasks INTEGER DEFAULT 4,
|
||||
p_state_code VARCHAR(2),
|
||||
p_city VARCHAR(100) DEFAULT NULL,
|
||||
p_role VARCHAR(50) DEFAULT NULL
|
||||
) RETURNS TABLE (
|
||||
task_id INTEGER,
|
||||
role VARCHAR(50),
|
||||
dispensary_id INTEGER,
|
||||
dispensary_name VARCHAR(255),
|
||||
city VARCHAR(100),
|
||||
state_code VARCHAR(2),
|
||||
platform VARCHAR(50),
|
||||
method VARCHAR(20)
|
||||
) AS $$
|
||||
BEGIN
|
||||
-- Claim up to p_max_tasks for the specified geo
|
||||
RETURN QUERY
|
||||
WITH claimed AS (
|
||||
UPDATE worker_tasks t SET
|
||||
status = 'claimed',
|
||||
worker_id = p_worker_id,
|
||||
claimed_at = NOW()
|
||||
FROM (
|
||||
SELECT t2.id
|
||||
FROM worker_tasks t2
|
||||
JOIN dispensaries d ON t2.dispensary_id = d.id
|
||||
WHERE t2.status = 'pending'
|
||||
AND d.state = p_state_code
|
||||
AND (p_city IS NULL OR d.city = p_city)
|
||||
AND (p_role IS NULL OR t2.role = p_role)
|
||||
ORDER BY t2.priority DESC, t2.created_at ASC
|
||||
FOR UPDATE SKIP LOCKED
|
||||
LIMIT p_max_tasks
|
||||
) sub
|
||||
WHERE t.id = sub.id
|
||||
RETURNING t.id, t.role, t.dispensary_id, t.method
|
||||
)
|
||||
SELECT
|
||||
c.id as task_id,
|
||||
c.role,
|
||||
c.dispensary_id,
|
||||
d.name as dispensary_name,
|
||||
d.city,
|
||||
d.state as state_code,
|
||||
d.platform,
|
||||
c.method
|
||||
FROM claimed c
|
||||
JOIN dispensaries d ON c.dispensary_id = d.id;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Verify
|
||||
SELECT 'claim_tasks_batch_for_geo function created' as status;
|
||||
@@ -1,49 +0,0 @@
|
||||
-- Hoodie Comparison Reports
|
||||
-- Stores delta results from comparing Hoodie data against CannaIQ
|
||||
-- Raw Hoodie data stays remote (proxy only) - we only store comparison results
|
||||
|
||||
CREATE TABLE IF NOT EXISTS hoodie_comparison_reports (
|
||||
id SERIAL PRIMARY KEY,
|
||||
report_type VARCHAR(50) NOT NULL, -- 'dispensaries', 'brands', 'products'
|
||||
state VARCHAR(50) NOT NULL,
|
||||
|
||||
-- Counts
|
||||
hoodie_total INT NOT NULL DEFAULT 0,
|
||||
cannaiq_total INT NOT NULL DEFAULT 0,
|
||||
in_both INT NOT NULL DEFAULT 0,
|
||||
hoodie_only INT NOT NULL DEFAULT 0,
|
||||
cannaiq_only INT NOT NULL DEFAULT 0,
|
||||
|
||||
-- Delta details (JSONB for flexibility)
|
||||
hoodie_only_items JSONB DEFAULT '[]', -- Items in Hoodie but not CannaIQ
|
||||
cannaiq_only_items JSONB DEFAULT '[]', -- Items in CannaIQ but not Hoodie
|
||||
matched_items JSONB DEFAULT '[]', -- Items in both (with any differences)
|
||||
|
||||
-- Metadata
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
duration_ms INT, -- How long the comparison took
|
||||
error TEXT -- Any errors during comparison
|
||||
);
|
||||
|
||||
-- Index for querying latest reports
|
||||
CREATE INDEX idx_hoodie_reports_type_state ON hoodie_comparison_reports(report_type, state, created_at DESC);
|
||||
CREATE INDEX idx_hoodie_reports_created ON hoodie_comparison_reports(created_at DESC);
|
||||
|
||||
-- View for latest report per type/state
|
||||
CREATE OR REPLACE VIEW v_hoodie_latest_reports AS
|
||||
SELECT DISTINCT ON (report_type, state)
|
||||
id,
|
||||
report_type,
|
||||
state,
|
||||
hoodie_total,
|
||||
cannaiq_total,
|
||||
in_both,
|
||||
hoodie_only,
|
||||
cannaiq_only,
|
||||
created_at,
|
||||
duration_ms
|
||||
FROM hoodie_comparison_reports
|
||||
WHERE error IS NULL
|
||||
ORDER BY report_type, state, created_at DESC;
|
||||
|
||||
COMMENT ON TABLE hoodie_comparison_reports IS 'Stores comparison results between Hoodie and CannaIQ data. Raw Hoodie data stays remote.';
|
||||
@@ -1,53 +0,0 @@
|
||||
-- Migration 130: Worker qualification badge
|
||||
-- Session-scoped badge showing worker qualification status
|
||||
|
||||
-- Add badge column to worker_registry
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS badge VARCHAR(20) DEFAULT NULL;
|
||||
|
||||
-- Add qualified_at timestamp
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS qualified_at TIMESTAMPTZ DEFAULT NULL;
|
||||
|
||||
-- Add current_session_id to link worker to their active session
|
||||
ALTER TABLE worker_registry
|
||||
ADD COLUMN IF NOT EXISTS current_session_id INTEGER DEFAULT NULL;
|
||||
|
||||
-- Badge values:
|
||||
-- 'gold' = preflight passed, actively qualified with valid session
|
||||
-- NULL = not qualified (no active session or session expired)
|
||||
|
||||
-- Function: Set worker badge to gold when qualified
|
||||
CREATE OR REPLACE FUNCTION set_worker_qualified(
|
||||
p_worker_id VARCHAR(255),
|
||||
p_session_id INTEGER
|
||||
) RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_registry
|
||||
SET badge = 'gold',
|
||||
qualified_at = NOW(),
|
||||
current_session_id = p_session_id
|
||||
WHERE worker_id = p_worker_id;
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function: Clear worker badge when session ends
|
||||
CREATE OR REPLACE FUNCTION clear_worker_badge(p_worker_id VARCHAR(255))
|
||||
RETURNS BOOLEAN AS $$
|
||||
BEGIN
|
||||
UPDATE worker_registry
|
||||
SET badge = NULL,
|
||||
qualified_at = NULL,
|
||||
current_session_id = NULL
|
||||
WHERE worker_id = p_worker_id;
|
||||
RETURN FOUND;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Index for finding qualified workers
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_registry_badge
|
||||
ON worker_registry(badge) WHERE badge IS NOT NULL;
|
||||
|
||||
-- Verify
|
||||
SELECT 'worker_registry badge column added' as status;
|
||||
@@ -1,21 +0,0 @@
|
||||
-- Migration: 131_normalize_brand
|
||||
-- Purpose: Add normalize_brand() function for fuzzy brand matching across dispensaries
|
||||
-- Used by Cannabrands integration to match brand names regardless of spelling variations
|
||||
|
||||
-- Function to normalize brand names for matching
|
||||
-- "Aloha TymeMachine" → "alohatymemachine"
|
||||
-- "ALOHA TYME MACHINE" → "alohatymemachine"
|
||||
-- "Aloha Tyme Machine" → "alohatymemachine"
|
||||
CREATE OR REPLACE FUNCTION normalize_brand(name TEXT)
|
||||
RETURNS TEXT AS $$
|
||||
SELECT LOWER(REGEXP_REPLACE(COALESCE(name, ''), '[^a-zA-Z0-9]', '', 'g'))
|
||||
$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
-- Create functional index for efficient lookups
|
||||
-- This allows queries like: WHERE normalize_brand(brand_name_raw) = 'alohatymemachine'
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_brand_normalized
|
||||
ON store_products (normalize_brand(brand_name_raw));
|
||||
|
||||
-- Also index on snapshots table for historical queries
|
||||
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_product_snapshots_brand_normalized
|
||||
ON store_product_snapshots (normalize_brand(brand_name_raw));
|
||||
1989
backend/node_modules/.package-lock.json
generated
vendored
1989
backend/node_modules/.package-lock.json
generated
vendored
File diff suppressed because it is too large
Load Diff
1995
backend/package-lock.json
generated
1995
backend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -22,10 +22,8 @@
|
||||
"seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@aws-sdk/client-s3": "^3.953.0",
|
||||
"@kubernetes/client-node": "^1.4.0",
|
||||
"@types/bcryptjs": "^3.0.0",
|
||||
"algoliasearch": "^5.46.1",
|
||||
"axios": "^1.6.2",
|
||||
"bcrypt": "^5.1.1",
|
||||
"bcryptjs": "^3.0.3",
|
||||
@@ -51,8 +49,6 @@
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"sharp": "^0.32.0",
|
||||
"socks-proxy-agent": "^8.0.2",
|
||||
"swagger-jsdoc": "^6.2.8",
|
||||
"swagger-ui-express": "^5.0.1",
|
||||
"user-agents": "^1.1.669",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.22.4"
|
||||
@@ -65,8 +61,6 @@
|
||||
"@types/node": "^20.10.5",
|
||||
"@types/node-cron": "^3.0.11",
|
||||
"@types/pg": "^8.15.6",
|
||||
"@types/swagger-jsdoc": "^6.0.4",
|
||||
"@types/swagger-ui-express": "^4.1.8",
|
||||
"@types/uuid": "^9.0.7",
|
||||
"tsx": "^4.7.0",
|
||||
"typescript": "^5.3.3"
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1 +1 @@
|
||||
cannaiq-menus-2.3.0.zip
|
||||
cannaiq-menus-1.6.0.zip
|
||||
@@ -1,130 +0,0 @@
|
||||
/**
|
||||
* Count Jane stores - v2: Try Algolia store search
|
||||
* Usage: npx ts-node scripts/count-jane-stores-v2.ts
|
||||
*/
|
||||
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const STATES = [
|
||||
'AZ', 'CA', 'CO', 'FL', 'IL', 'MA', 'MI', 'NV', 'NJ', 'NY', 'OH', 'PA', 'WA', 'OR'
|
||||
];
|
||||
|
||||
async function main() {
|
||||
console.log('Counting Jane stores by exploring state pages...\n');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
const allStores: Map<number, any> = new Map();
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
const type = req.resourceType();
|
||||
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
page.on('response', async (response) => {
|
||||
const url = response.url();
|
||||
const contentType = response.headers()['content-type'] || '';
|
||||
if (url.includes('iheartjane.com') && contentType.includes('json')) {
|
||||
try {
|
||||
const json = await response.json();
|
||||
// Look for stores in any response
|
||||
if (json.stores && Array.isArray(json.stores)) {
|
||||
for (const s of json.stores) {
|
||||
if (s.id) allStores.set(s.id, s);
|
||||
}
|
||||
}
|
||||
// Also check hits (Algolia format)
|
||||
if (json.hits && Array.isArray(json.hits)) {
|
||||
for (const s of json.hits) {
|
||||
if (s.id) allStores.set(s.id, s);
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
});
|
||||
|
||||
// First visit the main stores page
|
||||
console.log('Visiting main stores page...');
|
||||
await page.goto('https://www.iheartjane.com/stores', {
|
||||
waitUntil: 'networkidle0',
|
||||
timeout: 60000,
|
||||
});
|
||||
await new Promise(r => setTimeout(r, 3000));
|
||||
|
||||
// Try to scroll to load more stores
|
||||
console.log('Scrolling to load more...');
|
||||
for (let i = 0; i < 5; i++) {
|
||||
await page.evaluate(() => window.scrollBy(0, 1000));
|
||||
await new Promise(r => setTimeout(r, 1000));
|
||||
}
|
||||
|
||||
// Try clicking "Load More" if it exists
|
||||
try {
|
||||
const loadMore = await page.$('button:has-text("Load More"), [class*="load-more"]');
|
||||
if (loadMore) {
|
||||
console.log('Clicking Load More...');
|
||||
await loadMore.click();
|
||||
await new Promise(r => setTimeout(r, 3000));
|
||||
}
|
||||
} catch {}
|
||||
|
||||
// Extract stores from DOM as fallback
|
||||
const domStores = await page.evaluate(() => {
|
||||
const storeElements = document.querySelectorAll('[data-store-id], [class*="StoreCard"], [class*="store-card"]');
|
||||
return storeElements.length;
|
||||
});
|
||||
|
||||
console.log(`\nStores from DOM elements: ${domStores}`);
|
||||
|
||||
await browser.close();
|
||||
|
||||
// Count by state
|
||||
const byState: Record<string, number> = {};
|
||||
for (const store of allStores.values()) {
|
||||
const state = store.state || 'Unknown';
|
||||
byState[state] = (byState[state] || 0) + 1;
|
||||
}
|
||||
|
||||
console.log('\n=== JANE STORE COUNTS ===\n');
|
||||
console.log(`Unique stores captured: ${allStores.size}`);
|
||||
|
||||
if (allStores.size > 0) {
|
||||
console.log('\nBy State:');
|
||||
const sorted = Object.entries(byState).sort((a, b) => b[1] - a[1]);
|
||||
for (const [state, count] of sorted.slice(0, 20)) {
|
||||
console.log(` ${state}: ${count}`);
|
||||
}
|
||||
|
||||
// Check Arizona specifically
|
||||
const azStores = Array.from(allStores.values()).filter(s =>
|
||||
s.state === 'Arizona' || s.state === 'AZ'
|
||||
);
|
||||
console.log(`\nArizona stores: ${azStores.length}`);
|
||||
if (azStores.length > 0) {
|
||||
console.log('AZ stores:');
|
||||
for (const s of azStores.slice(0, 10)) {
|
||||
console.log(` - ${s.name} (ID: ${s.id}) - ${s.city}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Note about total
|
||||
console.log('\n--- Note ---');
|
||||
console.log('Jane uses server-side rendering. To get full store count,');
|
||||
console.log('you may need to check their public marketing materials or');
|
||||
console.log('iterate through known store IDs.');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,98 +0,0 @@
|
||||
/**
|
||||
* Count Jane stores by state
|
||||
* Usage: npx ts-node scripts/count-jane-stores.ts
|
||||
*/
|
||||
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
async function main() {
|
||||
console.log('Counting Jane stores...\n');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Capture store data from API
|
||||
const stores: any[] = [];
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
const type = req.resourceType();
|
||||
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
page.on('response', async (response) => {
|
||||
const url = response.url();
|
||||
if (url.includes('iheartjane.com') && url.includes('stores')) {
|
||||
try {
|
||||
const json = await response.json();
|
||||
if (json.stores && Array.isArray(json.stores)) {
|
||||
stores.push(...json.stores);
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
});
|
||||
|
||||
// Visit the store directory
|
||||
console.log('Loading Jane store directory...');
|
||||
await page.goto('https://www.iheartjane.com/stores', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000,
|
||||
});
|
||||
|
||||
// Wait for stores to load
|
||||
await new Promise(r => setTimeout(r, 5000));
|
||||
|
||||
// Also try to get store count from page content
|
||||
const pageStoreCount = await page.evaluate(() => {
|
||||
// Look for store count in page text
|
||||
const text = document.body.innerText;
|
||||
const match = text.match(/(\d+)\s*stores?/i);
|
||||
return match ? parseInt(match[1]) : null;
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
|
||||
// Count by state
|
||||
const byState: Record<string, number> = {};
|
||||
for (const store of stores) {
|
||||
const state = store.state || 'Unknown';
|
||||
byState[state] = (byState[state] || 0) + 1;
|
||||
}
|
||||
|
||||
console.log('\n=== JANE STORE COUNTS ===\n');
|
||||
console.log(`Total stores captured from API: ${stores.length}`);
|
||||
if (pageStoreCount) {
|
||||
console.log(`Page claims: ${pageStoreCount} stores`);
|
||||
}
|
||||
|
||||
console.log('\nBy State:');
|
||||
const sorted = Object.entries(byState).sort((a, b) => b[1] - a[1]);
|
||||
for (const [state, count] of sorted) {
|
||||
console.log(` ${state}: ${count}`);
|
||||
}
|
||||
|
||||
// Check Arizona specifically
|
||||
const azStores = stores.filter(s =>
|
||||
s.state === 'Arizona' || s.state === 'AZ'
|
||||
);
|
||||
console.log(`\nArizona stores: ${azStores.length}`);
|
||||
if (azStores.length > 0) {
|
||||
console.log('Sample AZ stores:');
|
||||
for (const s of azStores.slice(0, 5)) {
|
||||
console.log(` - ${s.name} (ID: ${s.id}) - ${s.city}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,184 +0,0 @@
|
||||
/**
|
||||
* Explore all Treez page URLs to find the full product catalog
|
||||
*/
|
||||
|
||||
import puppeteer, { Page } from 'puppeteer';
|
||||
|
||||
const STORE_ID = 'best';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function bypassAgeGate(page: Page): Promise<void> {
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log(' Age gate detected, bypassing...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
}
|
||||
|
||||
async function countProducts(page: Page): Promise<number> {
|
||||
return page.evaluate(() =>
|
||||
document.querySelectorAll('[class*="product_product__"]').length
|
||||
);
|
||||
}
|
||||
|
||||
async function scrollAndCount(page: Page, maxScrolls: number = 30): Promise<{ products: number; scrolls: number }> {
|
||||
let previousHeight = 0;
|
||||
let scrollCount = 0;
|
||||
let sameHeightCount = 0;
|
||||
|
||||
while (scrollCount < maxScrolls) {
|
||||
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
|
||||
if (currentHeight === previousHeight) {
|
||||
sameHeightCount++;
|
||||
if (sameHeightCount >= 3) break;
|
||||
} else {
|
||||
sameHeightCount = 0;
|
||||
}
|
||||
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1500);
|
||||
|
||||
previousHeight = currentHeight;
|
||||
scrollCount++;
|
||||
}
|
||||
|
||||
const products = await countProducts(page);
|
||||
return { products, scrolls: scrollCount };
|
||||
}
|
||||
|
||||
async function testUrl(page: Page, path: string): Promise<{ products: number; scrolls: number; error?: string }> {
|
||||
const url = `https://${STORE_ID}.treez.io${path}`;
|
||||
console.log(`\nTesting: ${url}`);
|
||||
|
||||
try {
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(2000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(1000);
|
||||
|
||||
const initialCount = await countProducts(page);
|
||||
console.log(` Initial products: ${initialCount}`);
|
||||
|
||||
if (initialCount > 0) {
|
||||
const result = await scrollAndCount(page);
|
||||
console.log(` After scroll: ${result.products} products (${result.scrolls} scrolls)`);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Check for brand/category cards instead
|
||||
const cardCount = await page.evaluate(() => {
|
||||
const selectors = [
|
||||
'[class*="brand"]',
|
||||
'[class*="Brand"]',
|
||||
'[class*="category"]',
|
||||
'[class*="Category"]',
|
||||
'[class*="card"]',
|
||||
'a[href*="/brand/"]',
|
||||
'a[href*="/category/"]',
|
||||
];
|
||||
let count = 0;
|
||||
selectors.forEach(sel => {
|
||||
count += document.querySelectorAll(sel).length;
|
||||
});
|
||||
return count;
|
||||
});
|
||||
console.log(` Cards/links found: ${cardCount}`);
|
||||
|
||||
return { products: initialCount, scrolls: 0 };
|
||||
} catch (error: any) {
|
||||
console.log(` Error: ${error.message}`);
|
||||
return { products: 0, scrolls: 0, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Exploring Treez Page URLs');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Block images to speed up
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media', 'stylesheet'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
const urlsToTest = [
|
||||
'/onlinemenu/?customerType=ADULT', // Homepage
|
||||
'/onlinemenu/brands?customerType=ADULT', // Brands page
|
||||
'/onlinemenu/shop?customerType=ADULT', // Shop page?
|
||||
'/onlinemenu/products?customerType=ADULT', // Products page?
|
||||
'/onlinemenu/menu?customerType=ADULT', // Menu page?
|
||||
'/onlinemenu/all?customerType=ADULT', // All products?
|
||||
'/onlinemenu/flower?customerType=ADULT', // Flower category
|
||||
'/onlinemenu/vapes?customerType=ADULT', // Vapes category
|
||||
'/onlinemenu/edibles?customerType=ADULT', // Edibles category
|
||||
'/onlinemenu/concentrates?customerType=ADULT', // Concentrates category
|
||||
];
|
||||
|
||||
const results: { path: string; products: number; scrolls: number }[] = [];
|
||||
|
||||
for (const path of urlsToTest) {
|
||||
const result = await testUrl(page, path);
|
||||
results.push({ path, ...result });
|
||||
}
|
||||
|
||||
// Look for navigation links on the main page
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('Checking navigation structure on homepage...');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
await page.goto(`https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 30000,
|
||||
});
|
||||
await sleep(2000);
|
||||
await bypassAgeGate(page);
|
||||
await sleep(1000);
|
||||
|
||||
const navLinks = await page.evaluate(() => {
|
||||
const links: { text: string; href: string }[] = [];
|
||||
document.querySelectorAll('a[href*="/onlinemenu/"]').forEach(el => {
|
||||
const text = el.textContent?.trim() || '';
|
||||
const href = el.getAttribute('href') || '';
|
||||
if (text && !links.some(l => l.href === href)) {
|
||||
links.push({ text: text.slice(0, 50), href });
|
||||
}
|
||||
});
|
||||
return links;
|
||||
});
|
||||
|
||||
console.log('\nNavigation links found:');
|
||||
navLinks.forEach(l => console.log(` "${l.text}" → ${l.href}`));
|
||||
|
||||
// Summary
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('Summary');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
results.sort((a, b) => b.products - a.products);
|
||||
results.forEach(r => {
|
||||
console.log(`${r.products.toString().padStart(4)} products | ${r.path}`);
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,247 +0,0 @@
|
||||
/**
|
||||
* Explore Treez site structure to find full product catalog
|
||||
*
|
||||
* Usage: npx ts-node scripts/explore-treez-structure.ts
|
||||
*/
|
||||
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
const STORE_ID = 'best';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Exploring Treez Site Structure');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
try {
|
||||
// Navigate to base menu URL
|
||||
const baseUrl = `https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`;
|
||||
console.log(`\n[1] Navigating to: ${baseUrl}`);
|
||||
await page.goto(baseUrl, { waitUntil: 'networkidle2', timeout: 60000 });
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate if present
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log('[1] Age gate detected, bypassing...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Get all navigation links
|
||||
console.log('\n[2] Extracting navigation structure...');
|
||||
const navInfo = await page.evaluate(() => {
|
||||
const links: { text: string; href: string }[] = [];
|
||||
|
||||
// Look for nav links
|
||||
document.querySelectorAll('nav a, [class*="nav"] a, [class*="menu"] a, header a').forEach(el => {
|
||||
const text = el.textContent?.trim() || '';
|
||||
const href = el.getAttribute('href') || '';
|
||||
if (text && href && !links.some(l => l.href === href)) {
|
||||
links.push({ text, href });
|
||||
}
|
||||
});
|
||||
|
||||
// Look for category tabs/buttons
|
||||
document.querySelectorAll('[class*="category"], [class*="tab"], [role="tab"]').forEach(el => {
|
||||
const text = el.textContent?.trim() || '';
|
||||
const href = el.getAttribute('href') || el.getAttribute('data-href') || '';
|
||||
if (text && !links.some(l => l.text === text)) {
|
||||
links.push({ text, href: href || `(click: ${el.className})` });
|
||||
}
|
||||
});
|
||||
|
||||
// Get current URL
|
||||
const currentUrl = window.location.href;
|
||||
|
||||
// Count products on page
|
||||
const productCount = document.querySelectorAll('[class*="product_product__"]').length;
|
||||
|
||||
return { links, currentUrl, productCount };
|
||||
});
|
||||
|
||||
console.log(`Current URL: ${navInfo.currentUrl}`);
|
||||
console.log(`Products on homepage: ${navInfo.productCount}`);
|
||||
console.log('\nNavigation links found:');
|
||||
navInfo.links.forEach(l => {
|
||||
console.log(` "${l.text}" → ${l.href}`);
|
||||
});
|
||||
|
||||
// Look for category buttons/tabs specifically
|
||||
console.log('\n[3] Looking for category navigation...');
|
||||
const categories = await page.evaluate(() => {
|
||||
const cats: { text: string; className: string; tagName: string }[] = [];
|
||||
|
||||
// Find all clickable elements that might be categories
|
||||
const selectors = [
|
||||
'[class*="CategoryNav"]',
|
||||
'[class*="category"]',
|
||||
'[class*="Category"]',
|
||||
'[class*="nav"] button',
|
||||
'[class*="tab"]',
|
||||
'[role="tablist"] *',
|
||||
'.MuiTab-root',
|
||||
'[class*="filter"]',
|
||||
];
|
||||
|
||||
selectors.forEach(sel => {
|
||||
document.querySelectorAll(sel).forEach(el => {
|
||||
const text = el.textContent?.trim() || '';
|
||||
if (text && text.length < 50 && !cats.some(c => c.text === text)) {
|
||||
cats.push({
|
||||
text,
|
||||
className: el.className?.toString().slice(0, 80) || '',
|
||||
tagName: el.tagName,
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return cats;
|
||||
});
|
||||
|
||||
console.log('Category-like elements:');
|
||||
categories.forEach(c => {
|
||||
console.log(` [${c.tagName}] "${c.text}" (class: ${c.className})`);
|
||||
});
|
||||
|
||||
// Try clicking on "Flower" or "All" if found
|
||||
console.log('\n[4] Looking for "Flower" or "All Products" link...');
|
||||
const clickTargets = ['Flower', 'All', 'All Products', 'Shop All', 'View All'];
|
||||
|
||||
for (const target of clickTargets) {
|
||||
const element = await page.evaluate((targetText) => {
|
||||
const els = Array.from(document.querySelectorAll('a, button, [role="tab"], [class*="category"]'));
|
||||
const match = els.find(el =>
|
||||
el.textContent?.trim().toLowerCase() === targetText.toLowerCase()
|
||||
);
|
||||
if (match) {
|
||||
return {
|
||||
found: true,
|
||||
text: match.textContent?.trim(),
|
||||
tag: match.tagName,
|
||||
};
|
||||
}
|
||||
return { found: false };
|
||||
}, target);
|
||||
|
||||
if (element.found) {
|
||||
console.log(`Found "${element.text}" (${element.tag}), clicking...`);
|
||||
|
||||
await page.evaluate((targetText) => {
|
||||
const els = Array.from(document.querySelectorAll('a, button, [role="tab"], [class*="category"]'));
|
||||
const match = els.find(el =>
|
||||
el.textContent?.trim().toLowerCase() === targetText.toLowerCase()
|
||||
);
|
||||
if (match) (match as HTMLElement).click();
|
||||
}, target);
|
||||
|
||||
await sleep(3000);
|
||||
|
||||
const newUrl = page.url();
|
||||
const newCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('[class*="product_product__"]').length
|
||||
);
|
||||
|
||||
console.log(` New URL: ${newUrl}`);
|
||||
console.log(` Products after click: ${newCount}`);
|
||||
|
||||
if (newCount > navInfo.productCount) {
|
||||
console.log(` ✓ Found more products! (${navInfo.productCount} → ${newCount})`);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check page height and scroll behavior
|
||||
console.log('\n[5] Checking scroll behavior on current page...');
|
||||
let previousHeight = 0;
|
||||
let scrollCount = 0;
|
||||
let previousProductCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('[class*="product_product__"]').length
|
||||
);
|
||||
|
||||
while (scrollCount < 10) {
|
||||
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
|
||||
if (currentHeight === previousHeight) {
|
||||
console.log(` Scroll ${scrollCount + 1}: No height change, stopping`);
|
||||
break;
|
||||
}
|
||||
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1500);
|
||||
|
||||
const currentProductCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('[class*="product_product__"]').length
|
||||
);
|
||||
|
||||
console.log(` Scroll ${scrollCount + 1}: height=${currentHeight}, products=${currentProductCount}`);
|
||||
|
||||
if (currentProductCount === previousProductCount && scrollCount > 2) {
|
||||
console.log(' No new products loading, stopping');
|
||||
break;
|
||||
}
|
||||
|
||||
previousHeight = currentHeight;
|
||||
previousProductCount = currentProductCount;
|
||||
scrollCount++;
|
||||
}
|
||||
|
||||
// Try direct URL patterns
|
||||
console.log('\n[6] Testing URL patterns...');
|
||||
const urlPatterns = [
|
||||
'/onlinemenu/flower?customerType=ADULT',
|
||||
'/onlinemenu/all?customerType=ADULT',
|
||||
'/onlinemenu?category=flower&customerType=ADULT',
|
||||
'/onlinemenu?view=all&customerType=ADULT',
|
||||
];
|
||||
|
||||
for (const pattern of urlPatterns) {
|
||||
const testUrl = `https://${STORE_ID}.treez.io${pattern}`;
|
||||
console.log(`\nTrying: ${testUrl}`);
|
||||
|
||||
await page.goto(testUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(2000);
|
||||
|
||||
// Bypass age gate again if needed
|
||||
const gate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (gate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
const productCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('[class*="product_product__"]').length
|
||||
);
|
||||
|
||||
console.log(` Products found: ${productCount}`);
|
||||
}
|
||||
|
||||
// Screenshot the final state
|
||||
await page.screenshot({ path: '/tmp/treez-explore.png', fullPage: true });
|
||||
console.log('\n[7] Screenshot saved to /tmp/treez-explore.png');
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('Error:', error.message);
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,138 +0,0 @@
|
||||
/**
|
||||
* Run Jane product discovery for stores in database
|
||||
* Usage: npx ts-node scripts/run-jane-product-discovery.ts [DISPENSARY_ID]
|
||||
* Example: npx ts-node scripts/run-jane-product-discovery.ts 4220
|
||||
* Or run for all Jane stores: npx ts-node scripts/run-jane-product-discovery.ts all
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { fetchProductsByStoreIdDirect } from '../src/platforms/jane';
|
||||
import { saveRawPayload } from '../src/utils/payload-storage';
|
||||
|
||||
async function main() {
|
||||
const arg = process.argv[2];
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log('Jane Product Discovery');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
});
|
||||
|
||||
try {
|
||||
// Get dispensaries to process
|
||||
let dispensaries: any[];
|
||||
|
||||
if (arg === 'all') {
|
||||
const result = await pool.query(
|
||||
`SELECT id, name, menu_url, platform_dispensary_id
|
||||
FROM dispensaries
|
||||
WHERE platform = 'jane' AND menu_url IS NOT NULL
|
||||
ORDER BY id`
|
||||
);
|
||||
dispensaries = result.rows;
|
||||
} else if (arg) {
|
||||
const result = await pool.query(
|
||||
`SELECT id, name, menu_url, platform_dispensary_id
|
||||
FROM dispensaries
|
||||
WHERE id = $1`,
|
||||
[parseInt(arg)]
|
||||
);
|
||||
dispensaries = result.rows;
|
||||
} else {
|
||||
// Default: get first Jane store
|
||||
const result = await pool.query(
|
||||
`SELECT id, name, menu_url, platform_dispensary_id
|
||||
FROM dispensaries
|
||||
WHERE platform = 'jane' AND menu_url IS NOT NULL
|
||||
ORDER BY id LIMIT 1`
|
||||
);
|
||||
dispensaries = result.rows;
|
||||
}
|
||||
|
||||
if (dispensaries.length === 0) {
|
||||
console.log('No Jane dispensaries found');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Processing ${dispensaries.length} dispensary(ies)...\n`);
|
||||
|
||||
let successCount = 0;
|
||||
let failCount = 0;
|
||||
|
||||
for (const disp of dispensaries) {
|
||||
console.log(`\n${'─'.repeat(60)}`);
|
||||
console.log(`${disp.name} (ID: ${disp.id}, Jane ID: ${disp.platform_dispensary_id})`);
|
||||
console.log('─'.repeat(60));
|
||||
|
||||
try {
|
||||
const result = await fetchProductsByStoreIdDirect(disp.platform_dispensary_id);
|
||||
|
||||
if (result.products.length === 0) {
|
||||
console.log(' ✗ No products captured');
|
||||
failCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(` ✓ Captured ${result.products.length} products`);
|
||||
|
||||
// Build payload
|
||||
const rawPayload = {
|
||||
hits: result.products.map(p => p.raw),
|
||||
store: result.store?.raw || null,
|
||||
capturedAt: new Date().toISOString(),
|
||||
platform: 'jane',
|
||||
dispensaryId: disp.id,
|
||||
storeId: disp.platform_dispensary_id,
|
||||
};
|
||||
|
||||
// Save payload
|
||||
const { id: payloadId, sizeBytes } = await saveRawPayload(
|
||||
pool,
|
||||
disp.id,
|
||||
rawPayload,
|
||||
null,
|
||||
result.products.length,
|
||||
'jane'
|
||||
);
|
||||
|
||||
console.log(` ✓ Saved payload ${payloadId} (${Math.round(sizeBytes / 1024)}KB)`);
|
||||
|
||||
// Update dispensary
|
||||
await pool.query(
|
||||
`UPDATE dispensaries
|
||||
SET stage = 'hydrating',
|
||||
last_fetch_at = NOW(),
|
||||
product_count = $2,
|
||||
consecutive_successes = consecutive_successes + 1,
|
||||
consecutive_failures = 0,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1`,
|
||||
[disp.id, result.products.length]
|
||||
);
|
||||
|
||||
console.log(` ✓ Updated dispensary (product_count: ${result.products.length})`);
|
||||
successCount++;
|
||||
|
||||
} catch (error: any) {
|
||||
console.log(` ✗ Error: ${error.message}`);
|
||||
failCount++;
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('RESULTS');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Success: ${successCount}`);
|
||||
console.log(`Failed: ${failCount}`);
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('Error:', error.message);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,137 +0,0 @@
|
||||
/**
|
||||
* Run Jane store discovery and insert into database
|
||||
* Usage: npx ts-node scripts/run-jane-store-discovery.ts [STATE_CODE]
|
||||
* Example: npx ts-node scripts/run-jane-store-discovery.ts AZ
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import { discoverStoresByState } from '../src/platforms/jane';
|
||||
|
||||
/**
|
||||
* Generate slug from store name
|
||||
* e.g., "Hana Meds - Phoenix (REC)" -> "hana-meds-phoenix-rec"
|
||||
*/
|
||||
function generateSlug(name: string): string {
|
||||
return name
|
||||
.toLowerCase()
|
||||
.replace(/[()]/g, '') // Remove parentheses
|
||||
.replace(/[^a-z0-9\s-]/g, '') // Remove special chars
|
||||
.replace(/\s+/g, '-') // Spaces to hyphens
|
||||
.replace(/-+/g, '-') // Collapse multiple hyphens
|
||||
.replace(/^-|-$/g, ''); // Trim hyphens
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const stateCode = process.argv[2] || 'AZ';
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Jane Store Discovery - ${stateCode}`);
|
||||
console.log('='.repeat(60));
|
||||
|
||||
// Connect to database
|
||||
const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
});
|
||||
|
||||
try {
|
||||
// Test connection
|
||||
const testResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']);
|
||||
console.log(`Current Jane stores in DB: ${testResult.rows[0].count}`);
|
||||
|
||||
// Discover stores
|
||||
console.log(`\nDiscovering Jane stores in ${stateCode}...`);
|
||||
const stores = await discoverStoresByState(stateCode);
|
||||
|
||||
if (stores.length === 0) {
|
||||
console.log(`No stores found in ${stateCode}`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`\nFound ${stores.length} stores. Inserting into database...`);
|
||||
|
||||
// Insert stores
|
||||
let inserted = 0;
|
||||
let updated = 0;
|
||||
const newIds: number[] = [];
|
||||
|
||||
for (const store of stores) {
|
||||
const menuUrl = `https://www.iheartjane.com/stores/${store.storeId}/${store.urlSlug || 'menu'}`;
|
||||
const slug = generateSlug(store.name);
|
||||
|
||||
try {
|
||||
const result = await pool.query(
|
||||
`INSERT INTO dispensaries (
|
||||
name, slug, address1, city, state, zipcode,
|
||||
latitude, longitude, menu_url, menu_type, platform,
|
||||
platform_dispensary_id, is_medical, is_recreational,
|
||||
stage, created_at, updated_at
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, NOW(), NOW())
|
||||
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
|
||||
DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
slug = EXCLUDED.slug,
|
||||
address1 = EXCLUDED.address1,
|
||||
city = EXCLUDED.city,
|
||||
latitude = EXCLUDED.latitude,
|
||||
longitude = EXCLUDED.longitude,
|
||||
menu_url = EXCLUDED.menu_url,
|
||||
is_medical = EXCLUDED.is_medical,
|
||||
is_recreational = EXCLUDED.is_recreational,
|
||||
updated_at = NOW()
|
||||
RETURNING id, (xmax = 0) AS is_new`,
|
||||
[
|
||||
store.name,
|
||||
slug,
|
||||
store.address,
|
||||
store.city,
|
||||
stateCode,
|
||||
store.zip,
|
||||
store.lat,
|
||||
store.long,
|
||||
menuUrl,
|
||||
'embedded', // menu_type: how it's displayed
|
||||
'jane', // platform: who provides the menu
|
||||
store.storeId,
|
||||
store.medical,
|
||||
store.recreational,
|
||||
'discovered',
|
||||
]
|
||||
);
|
||||
|
||||
if (result.rows.length > 0) {
|
||||
const { id, is_new } = result.rows[0];
|
||||
if (is_new) {
|
||||
inserted++;
|
||||
newIds.push(id);
|
||||
console.log(` + Inserted: ${store.name} (DB ID: ${id}, Jane ID: ${store.storeId})`);
|
||||
} else {
|
||||
updated++;
|
||||
console.log(` ~ Updated: ${store.name} (DB ID: ${id})`);
|
||||
}
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error(` ! Error inserting ${store.name}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('RESULTS');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Stores discovered: ${stores.length}`);
|
||||
console.log(`New stores inserted: ${inserted}`);
|
||||
console.log(`Existing stores updated: ${updated}`);
|
||||
console.log(`New dispensary IDs: ${newIds.join(', ') || '(none)'}`);
|
||||
|
||||
// Show final count
|
||||
const finalResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']);
|
||||
console.log(`\nTotal Jane stores in DB: ${finalResult.rows[0].count}`);
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('Error:', error.message);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await pool.end();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,179 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('Loading ALL brands from https://shop.bestdispensary.com/brands');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log('Age gate detected, bypassing...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
console.log('Current URL:', page.url());
|
||||
|
||||
// Get initial brand count
|
||||
let brandCount = await page.evaluate(() => {
|
||||
const seen = new Set<string>();
|
||||
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href');
|
||||
if (href) seen.add(href);
|
||||
});
|
||||
return seen.size;
|
||||
});
|
||||
console.log(`Initial brand count: ${brandCount}`);
|
||||
|
||||
// Aggressive scrolling
|
||||
console.log('\nScrolling to load ALL brands...');
|
||||
let previousCount = 0;
|
||||
let sameCount = 0;
|
||||
|
||||
for (let i = 0; i < 50; i++) {
|
||||
// Scroll to bottom
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1000);
|
||||
|
||||
brandCount = await page.evaluate(() => {
|
||||
const seen = new Set<string>();
|
||||
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href');
|
||||
if (href) seen.add(href);
|
||||
});
|
||||
return seen.size;
|
||||
});
|
||||
|
||||
if (brandCount === previousCount) {
|
||||
sameCount++;
|
||||
if (sameCount >= 5) {
|
||||
console.log(` Scroll ${i+1}: ${brandCount} brands (stopping - no change)`);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
sameCount = 0;
|
||||
console.log(` Scroll ${i+1}: ${brandCount} brands`);
|
||||
}
|
||||
previousCount = brandCount;
|
||||
}
|
||||
|
||||
// Get all unique brands
|
||||
const brands = await page.evaluate(() => {
|
||||
const results: { name: string; href: string }[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
const normalizedHref = href.toLowerCase();
|
||||
if (seen.has(normalizedHref)) return;
|
||||
seen.add(normalizedHref);
|
||||
|
||||
// Get brand name
|
||||
let name = '';
|
||||
const heading = a.querySelector('h3, h4, h5, [class*="name"]');
|
||||
if (heading) {
|
||||
name = heading.textContent?.trim() || '';
|
||||
}
|
||||
if (!name) {
|
||||
name = a.textContent?.trim().split('\n')[0] || '';
|
||||
}
|
||||
if (!name) {
|
||||
name = href.split('/brand/')[1]?.replace(/-/g, ' ') || '';
|
||||
}
|
||||
|
||||
results.push({ name: name.slice(0, 50), href });
|
||||
});
|
||||
|
||||
return results.sort((a, b) => a.name.localeCompare(b.name));
|
||||
});
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('TOTAL BRANDS FOUND: ' + brands.length);
|
||||
console.log('='.repeat(60));
|
||||
|
||||
brands.forEach((b, i) => {
|
||||
const num = (i + 1).toString().padStart(3, ' ');
|
||||
console.log(`${num}. ${b.name} (${b.href})`);
|
||||
});
|
||||
|
||||
// Now visit each brand page and count products
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('PRODUCTS PER BRAND');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
const brandProducts: { brand: string; products: number }[] = [];
|
||||
|
||||
for (let i = 0; i < brands.length; i++) {
|
||||
const brand = brands[i];
|
||||
try {
|
||||
const brandUrl = brand.href.startsWith('http')
|
||||
? brand.href
|
||||
: `https://shop.bestdispensary.com${brand.href}`;
|
||||
|
||||
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(1500);
|
||||
|
||||
// Scroll to load products
|
||||
for (let j = 0; j < 10; j++) {
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(800);
|
||||
}
|
||||
|
||||
const productCount = await page.evaluate(() => {
|
||||
const seen = new Set<string>();
|
||||
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
|
||||
const img = a.querySelector('img');
|
||||
const name = img?.getAttribute('alt') || a.textContent?.trim() || '';
|
||||
if (name) seen.add(name);
|
||||
});
|
||||
return seen.size;
|
||||
});
|
||||
|
||||
brandProducts.push({ brand: brand.name, products: productCount });
|
||||
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ${productCount} products`);
|
||||
|
||||
} catch (err: any) {
|
||||
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ERROR - ${err.message?.slice(0, 30)}`);
|
||||
brandProducts.push({ brand: brand.name, products: 0 });
|
||||
}
|
||||
}
|
||||
|
||||
// Summary
|
||||
const totalProducts = brandProducts.reduce((sum, b) => sum + b.products, 0);
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('SUMMARY');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Total brands: ${brands.length}`);
|
||||
console.log(`Total products: ${totalProducts}`);
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,92 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log('Navigating to https://shop.bestdispensary.com/brands');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
// Go directly to the brands page
|
||||
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate if present
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log('Age gate detected, bypassing...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
console.log('Current URL:', page.url());
|
||||
|
||||
// Scroll to load all content
|
||||
console.log('\nScrolling to load all brands...');
|
||||
let previousHeight = 0;
|
||||
for (let i = 0; i < 20; i++) {
|
||||
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
||||
await sleep(1500);
|
||||
|
||||
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
|
||||
if (currentHeight === previousHeight) {
|
||||
console.log(` Scroll ${i+1}: No new content`);
|
||||
break;
|
||||
}
|
||||
previousHeight = currentHeight;
|
||||
|
||||
const brandCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('a[href*="/brand/"]').length
|
||||
);
|
||||
console.log(` Scroll ${i+1}: height=${currentHeight}, brand links=${brandCount}`);
|
||||
}
|
||||
|
||||
// Get all brand links
|
||||
const brands = await page.evaluate(() => {
|
||||
const results: { name: string; href: string }[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
if (seen.has(href)) return;
|
||||
seen.add(href);
|
||||
|
||||
const name = a.textContent?.trim() || href.split('/brand/')[1] || '';
|
||||
results.push({ name, href });
|
||||
});
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log(`\nFound ${brands.length} brands:`);
|
||||
brands.forEach(b => console.log(` - ${b.name} (${b.href})`));
|
||||
|
||||
// Take screenshot
|
||||
await page.screenshot({ path: '/tmp/bestdispensary-brands.png', fullPage: true });
|
||||
console.log('\nScreenshot saved to /tmp/bestdispensary-brands.png');
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,108 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Check Load More button
|
||||
const btnInfo = await page.evaluate(() => {
|
||||
const btn = document.querySelector('button.collection__load-more');
|
||||
if (!btn) return { found: false };
|
||||
|
||||
const rect = btn.getBoundingClientRect();
|
||||
return {
|
||||
found: true,
|
||||
text: btn.textContent?.trim(),
|
||||
visible: rect.width > 0 && rect.height > 0,
|
||||
top: rect.top,
|
||||
disabled: (btn as HTMLButtonElement).disabled,
|
||||
class: btn.className,
|
||||
};
|
||||
});
|
||||
|
||||
console.log('Load More button:', btnInfo);
|
||||
|
||||
// Scroll to button and click
|
||||
console.log('\nScrolling to button and clicking...');
|
||||
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const btn = await page.$('button.collection__load-more');
|
||||
if (!btn) {
|
||||
console.log('Button not found');
|
||||
break;
|
||||
}
|
||||
|
||||
// Scroll button into view
|
||||
await page.evaluate((b) => b.scrollIntoView({ behavior: 'smooth', block: 'center' }), btn);
|
||||
await sleep(500);
|
||||
|
||||
// Check if button is still there and clickable
|
||||
const stillThere = await page.evaluate(() => {
|
||||
const b = document.querySelector('button.collection__load-more');
|
||||
return b ? b.textContent?.trim() : null;
|
||||
});
|
||||
|
||||
if (!stillThere) {
|
||||
console.log('Button disappeared - all loaded');
|
||||
break;
|
||||
}
|
||||
|
||||
// Click it
|
||||
await btn.click();
|
||||
console.log(`Click ${i+1}...`);
|
||||
await sleep(2000);
|
||||
|
||||
const count = await page.evaluate(() =>
|
||||
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').length
|
||||
);
|
||||
console.log(` Brands: ${count}`);
|
||||
}
|
||||
|
||||
// Final count
|
||||
const brands = await page.evaluate(() => {
|
||||
const list: string[] = [];
|
||||
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
|
||||
list.push(a.textContent?.trim() || '');
|
||||
});
|
||||
return list;
|
||||
});
|
||||
|
||||
console.log(`\nTotal brands: ${brands.length}`);
|
||||
console.log(brands.join(', '));
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,157 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
console.log('Bypassing age gate...');
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Click "LOAD MORE" until all brands are loaded
|
||||
console.log('Loading all brands...\n');
|
||||
|
||||
let loadMoreClicks = 0;
|
||||
while (true) {
|
||||
const loadMoreBtn = await page.$('button.collection__load-more');
|
||||
if (!loadMoreBtn) {
|
||||
console.log('No more "Load More" button - all brands loaded!');
|
||||
break;
|
||||
}
|
||||
|
||||
const isVisible = await page.evaluate((btn) => {
|
||||
const rect = btn.getBoundingClientRect();
|
||||
return rect.width > 0 && rect.height > 0;
|
||||
}, loadMoreBtn);
|
||||
|
||||
if (!isVisible) {
|
||||
console.log('Load More button not visible - all brands loaded!');
|
||||
break;
|
||||
}
|
||||
|
||||
await loadMoreBtn.click();
|
||||
loadMoreClicks++;
|
||||
await sleep(1500);
|
||||
|
||||
const brandCount = await page.evaluate(() =>
|
||||
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').length
|
||||
);
|
||||
console.log(` Click ${loadMoreClicks}: ${brandCount} brands loaded`);
|
||||
|
||||
if (loadMoreClicks > 20) break; // Safety limit
|
||||
}
|
||||
|
||||
// Get all brands
|
||||
const brands = await page.evaluate(() => {
|
||||
const results: { name: string; href: string }[] = [];
|
||||
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
const name = a.textContent?.trim() || '';
|
||||
if (name && href) {
|
||||
results.push({ name, href });
|
||||
}
|
||||
});
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log(`TOTAL BRANDS: ${brands.length}`);
|
||||
console.log('='.repeat(60));
|
||||
|
||||
// Visit each brand and count products
|
||||
console.log('\nCounting products per brand...\n');
|
||||
|
||||
const results: { brand: string; products: number }[] = [];
|
||||
|
||||
for (let i = 0; i < brands.length; i++) {
|
||||
const brand = brands[i];
|
||||
const brandUrl = `https://shop.bestdispensary.com${brand.href}`;
|
||||
|
||||
try {
|
||||
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(1000);
|
||||
|
||||
// Click load more on brand page too
|
||||
for (let j = 0; j < 10; j++) {
|
||||
const loadMore = await page.$('button.collection__load-more');
|
||||
if (!loadMore) break;
|
||||
|
||||
const isVisible = await page.evaluate((btn) => {
|
||||
const rect = btn.getBoundingClientRect();
|
||||
return rect.width > 0 && rect.height > 0;
|
||||
}, loadMore);
|
||||
|
||||
if (!isVisible) break;
|
||||
await loadMore.click();
|
||||
await sleep(1000);
|
||||
}
|
||||
|
||||
const productCount = await page.evaluate(() => {
|
||||
const seen = new Set<string>();
|
||||
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href');
|
||||
if (href) seen.add(href);
|
||||
});
|
||||
return seen.size;
|
||||
});
|
||||
|
||||
results.push({ brand: brand.name, products: productCount });
|
||||
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ${productCount} products`);
|
||||
|
||||
} catch (err: any) {
|
||||
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ERROR`);
|
||||
results.push({ brand: brand.name, products: 0 });
|
||||
}
|
||||
}
|
||||
|
||||
// Summary
|
||||
const totalProducts = results.reduce((sum, r) => sum + r.products, 0);
|
||||
const brandsWithProducts = results.filter(r => r.products > 0).length;
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('SUMMARY');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Total brands: ${brands.length}`);
|
||||
console.log(`Brands with products: ${brandsWithProducts}`);
|
||||
console.log(`Total products: ${totalProducts}`);
|
||||
|
||||
// Top brands by product count
|
||||
console.log('\nTop 20 brands by product count:');
|
||||
results
|
||||
.sort((a, b) => b.products - a.products)
|
||||
.slice(0, 20)
|
||||
.forEach((r, i) => console.log(` ${i+1}. ${r.brand}: ${r.products}`));
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,108 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Try clicking Load More multiple times with JS
|
||||
console.log('Loading all brands...');
|
||||
for (let i = 0; i < 15; i++) {
|
||||
const clicked = await page.evaluate(() => {
|
||||
const btn = document.querySelector('button.collection__load-more') as HTMLButtonElement;
|
||||
if (btn) { btn.click(); return true; }
|
||||
return false;
|
||||
});
|
||||
if (!clicked) break;
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Get all brands
|
||||
const brands = await page.evaluate(() => {
|
||||
const list: { name: string; href: string }[] = [];
|
||||
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
|
||||
list.push({
|
||||
name: a.textContent?.trim() || '',
|
||||
href: a.getAttribute('href') || '',
|
||||
});
|
||||
});
|
||||
return list;
|
||||
});
|
||||
|
||||
console.log('Total brands found: ' + brands.length + '\n');
|
||||
console.log('PRODUCTS PER BRAND');
|
||||
console.log('==================\n');
|
||||
|
||||
const results: { brand: string; products: number }[] = [];
|
||||
|
||||
for (let i = 0; i < brands.length; i++) {
|
||||
const brand = brands[i];
|
||||
const url = 'https://shop.bestdispensary.com' + brand.href;
|
||||
|
||||
try {
|
||||
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
|
||||
await sleep(1000);
|
||||
|
||||
// Click load more on brand page
|
||||
for (let j = 0; j < 20; j++) {
|
||||
const clicked = await page.evaluate(() => {
|
||||
const btn = document.querySelector('button.collection__load-more') as HTMLButtonElement;
|
||||
if (btn) { btn.click(); return true; }
|
||||
return false;
|
||||
});
|
||||
if (!clicked) break;
|
||||
await sleep(1000);
|
||||
}
|
||||
|
||||
const productCount = await page.evaluate(() => {
|
||||
const seen = new Set<string>();
|
||||
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href');
|
||||
if (href) seen.add(href);
|
||||
});
|
||||
return seen.size;
|
||||
});
|
||||
|
||||
results.push({ brand: brand.name, products: productCount });
|
||||
const num = (i + 1).toString().padStart(2, ' ');
|
||||
console.log(num + '. ' + brand.name + ': ' + productCount);
|
||||
|
||||
} catch (err) {
|
||||
results.push({ brand: brand.name, products: 0 });
|
||||
const num = (i + 1).toString().padStart(2, ' ');
|
||||
console.log(num + '. ' + brand.name + ': ERROR');
|
||||
}
|
||||
}
|
||||
|
||||
// Summary
|
||||
const total = results.reduce((s, r) => s + r.products, 0);
|
||||
console.log('\n==================');
|
||||
console.log('TOTAL: ' + brands.length + ' brands, ' + total + ' products');
|
||||
console.log('==================');
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,130 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
if (['image', 'font', 'media'].includes(req.resourceType())) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/brands', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Use the selector hint: /html/body/main/section
|
||||
console.log('Looking at main > section structure...\n');
|
||||
|
||||
const sectionInfo = await page.evaluate(() => {
|
||||
const main = document.querySelector('main');
|
||||
if (!main) return { error: 'No main element' };
|
||||
|
||||
const sections = main.querySelectorAll('section');
|
||||
const results: any[] = [];
|
||||
|
||||
sections.forEach((section, i) => {
|
||||
const children = section.children;
|
||||
const childInfo: string[] = [];
|
||||
|
||||
for (let j = 0; j < Math.min(children.length, 10); j++) {
|
||||
const child = children[j];
|
||||
childInfo.push(child.tagName + '.' + (child.className?.slice(0, 30) || ''));
|
||||
}
|
||||
|
||||
results.push({
|
||||
index: i,
|
||||
class: section.className?.slice(0, 50),
|
||||
childCount: children.length,
|
||||
sampleChildren: childInfo,
|
||||
});
|
||||
});
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log('Sections in main:');
|
||||
console.log(JSON.stringify(sectionInfo, null, 2));
|
||||
|
||||
// Look for brand cards within the section
|
||||
console.log('\nLooking for brand cards in main > section...');
|
||||
|
||||
const brandCards = await page.evaluate(() => {
|
||||
const section = document.querySelector('main > section');
|
||||
if (!section) return [];
|
||||
|
||||
// Get all child elements that might be brand cards
|
||||
const cards: { tag: string; text: string; href: string }[] = [];
|
||||
|
||||
section.querySelectorAll('a').forEach((a: Element) => {
|
||||
const href = a.getAttribute('href') || '';
|
||||
const text = a.textContent?.trim().slice(0, 50) || '';
|
||||
cards.push({ tag: 'a', text, href });
|
||||
});
|
||||
|
||||
return cards;
|
||||
});
|
||||
|
||||
console.log(`Found ${brandCards.length} links in section:`);
|
||||
brandCards.slice(0, 30).forEach(c => console.log(` ${c.text} -> ${c.href}`));
|
||||
|
||||
// Get the grid of brand cards
|
||||
console.log('\nLooking for grid container...');
|
||||
|
||||
const gridCards = await page.evaluate(() => {
|
||||
// Look for grid-like containers
|
||||
const grids = document.querySelectorAll('[class*="grid"], [class*="Grid"], main section > div');
|
||||
const results: any[] = [];
|
||||
|
||||
grids.forEach((grid, i) => {
|
||||
const links = grid.querySelectorAll('a[href*="/brand/"]');
|
||||
if (links.length > 5) {
|
||||
const brands: string[] = [];
|
||||
links.forEach((a: Element) => {
|
||||
const text = a.textContent?.trim().split('\n')[0] || '';
|
||||
if (text && !brands.includes(text)) brands.push(text);
|
||||
});
|
||||
results.push({
|
||||
class: grid.className?.slice(0, 40),
|
||||
brandCount: brands.length,
|
||||
brands: brands.slice(0, 50),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log('Grid containers with brands:');
|
||||
gridCards.forEach(g => {
|
||||
console.log(`\n[${g.brandCount} brands] class="${g.class}"`);
|
||||
g.brands.forEach((b: string, i: number) => console.log(` ${i+1}. ${b}`));
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,188 +0,0 @@
|
||||
/**
|
||||
* One-off script to test iHeartJane scraping
|
||||
* Mimics remote worker: Puppeteer + stealth + proxy
|
||||
*
|
||||
* Usage: npx ts-node scripts/test-iheartjane.ts
|
||||
*/
|
||||
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
const TARGET_URL = 'https://theflowershopusa.com/mesa/menu/';
|
||||
const STORE_ID = 2788;
|
||||
|
||||
async function main() {
|
||||
console.log('[iHeartJane Test] Starting...');
|
||||
|
||||
// No proxy for local testing
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Intercept network requests to capture API calls
|
||||
const apiResponses: any[] = [];
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => {
|
||||
// Block heavy resources
|
||||
const type = req.resourceType();
|
||||
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
|
||||
req.abort();
|
||||
} else {
|
||||
req.continue();
|
||||
}
|
||||
});
|
||||
|
||||
page.on('response', async (response) => {
|
||||
const url = response.url();
|
||||
const contentType = response.headers()['content-type'] || '';
|
||||
|
||||
// Capture any JSON response from iheartjane domains
|
||||
if ((url.includes('iheartjane.com') || url.includes('algolia')) && contentType.includes('json')) {
|
||||
try {
|
||||
const json = await response.json();
|
||||
const type = url.includes('store') ? 'STORE' :
|
||||
url.includes('product') ? 'PRODUCT' :
|
||||
url.includes('algolia') ? 'ALGOLIA' : 'API';
|
||||
apiResponses.push({ type, url, data: json });
|
||||
console.log(`[${type}] ${url.substring(0, 120)}...`);
|
||||
} catch {
|
||||
// Not JSON
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
console.log(`[iHeartJane Test] Navigating to ${TARGET_URL}`);
|
||||
|
||||
try {
|
||||
await page.goto(TARGET_URL, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000,
|
||||
});
|
||||
|
||||
console.log('[iHeartJane Test] Menu page loaded, waiting for data...');
|
||||
|
||||
// Wait a bit for all API calls to complete
|
||||
await new Promise(r => setTimeout(r, 3000));
|
||||
|
||||
// Also try to get store info by visiting the store page
|
||||
console.log('[iHeartJane Test] Fetching store info...');
|
||||
const storeInfoUrl = `https://api.iheartjane.com/v1/stores/${STORE_ID}`;
|
||||
|
||||
// Try to fetch store info via page.evaluate (uses browser context)
|
||||
const storeInfo = await page.evaluate(async (storeId) => {
|
||||
try {
|
||||
const resp = await fetch(`https://api.iheartjane.com/v1/stores/${storeId}`);
|
||||
if (resp.ok) return await resp.json();
|
||||
return { error: resp.status };
|
||||
} catch (e: any) {
|
||||
return { error: e.message };
|
||||
}
|
||||
}, STORE_ID);
|
||||
|
||||
if (storeInfo && !storeInfo.error) {
|
||||
apiResponses.push({ type: 'STORE_DIRECT', url: storeInfoUrl, data: storeInfo });
|
||||
console.log('[STORE_DIRECT] Got store info via fetch');
|
||||
} else {
|
||||
console.log(`[STORE_DIRECT] Failed: ${JSON.stringify(storeInfo)}`);
|
||||
}
|
||||
|
||||
console.log('[iHeartJane Test] Processing results...');
|
||||
|
||||
// Wait for products to load
|
||||
await page.waitForSelector('[data-testid="product-card"], .product-card, [class*="ProductCard"]', {
|
||||
timeout: 30000,
|
||||
}).catch(() => console.log('[iHeartJane Test] No product cards found via selector'));
|
||||
|
||||
// Try to extract product data from the page
|
||||
const products = await page.evaluate(() => {
|
||||
// Look for product data in various places
|
||||
const results: any[] = [];
|
||||
|
||||
// Method 1: Look for __INITIAL_STATE__ or similar
|
||||
const scripts = Array.from(document.querySelectorAll('script'));
|
||||
for (const script of scripts) {
|
||||
const text = script.textContent || '';
|
||||
if (text.includes('products') && text.includes('price')) {
|
||||
try {
|
||||
// Try to find JSON object
|
||||
const match = text.match(/\{[\s\S]*"products"[\s\S]*\}/);
|
||||
if (match) {
|
||||
results.push({ source: 'script', data: match[0].substring(0, 500) });
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
// Method 2: Look for product elements in DOM
|
||||
const productElements = document.querySelectorAll('[data-testid="product-card"], .product-card, [class*="product"]');
|
||||
for (const el of Array.from(productElements).slice(0, 5)) {
|
||||
const name = el.querySelector('[class*="name"], h3, h4')?.textContent;
|
||||
const price = el.querySelector('[class*="price"]')?.textContent;
|
||||
if (name) {
|
||||
results.push({ source: 'dom', name, price });
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
});
|
||||
|
||||
console.log('\n[iHeartJane Test] === RESULTS ===');
|
||||
console.log(`Total API responses captured: ${apiResponses.length}`);
|
||||
|
||||
// Group by type
|
||||
const byType: Record<string, any[]> = {};
|
||||
for (const r of apiResponses) {
|
||||
byType[r.type] = byType[r.type] || [];
|
||||
byType[r.type].push(r);
|
||||
}
|
||||
|
||||
for (const [type, items] of Object.entries(byType)) {
|
||||
console.log(`\n--- ${type} (${items.length} responses) ---`);
|
||||
for (const item of items) {
|
||||
console.log(`URL: ${item.url}`);
|
||||
// Show structure
|
||||
if (item.data.hits) {
|
||||
console.log(` Products: ${item.data.hits.length} hits`);
|
||||
if (item.data.hits[0]) {
|
||||
console.log(` Fields: ${Object.keys(item.data.hits[0]).join(', ')}`);
|
||||
}
|
||||
} else if (item.data.store) {
|
||||
console.log(` Store: ${JSON.stringify(item.data.store, null, 2).substring(0, 1000)}`);
|
||||
} else {
|
||||
console.log(` Keys: ${Object.keys(item.data).join(', ')}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write full data to file
|
||||
const fs = await import('fs');
|
||||
fs.writeFileSync('/tmp/iheartjane-data.json', JSON.stringify(apiResponses, null, 2));
|
||||
console.log('\n[iHeartJane Test] Full data saved to /tmp/iheartjane-data.json');
|
||||
|
||||
// Take screenshot
|
||||
await page.screenshot({ path: '/tmp/iheartjane-test.png', fullPage: false });
|
||||
console.log('[iHeartJane Test] Screenshot saved to /tmp/iheartjane-test.png');
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('[iHeartJane Test] Error:', error.message);
|
||||
await page.screenshot({ path: '/tmp/iheartjane-error.png' });
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
console.log('[iHeartJane Test] Done');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,224 +0,0 @@
|
||||
/**
|
||||
* Explore Jane API to understand data structure
|
||||
* Usage: npx ts-node scripts/test-jane-api-explore.ts
|
||||
*/
|
||||
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
async function main() {
|
||||
console.log('Exploring Jane API from browser context...\n');
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: 'new',
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Intercept network requests to find store data API calls
|
||||
const capturedResponses: Array<{ url: string; data: any }> = [];
|
||||
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (req) => req.continue());
|
||||
|
||||
page.on('response', async (response) => {
|
||||
const url = response.url();
|
||||
if (url.includes('iheartjane.com') &&
|
||||
(url.includes('/stores') || url.includes('/search') || url.includes('algolia'))) {
|
||||
try {
|
||||
const text = await response.text();
|
||||
if (text.startsWith('{') || text.startsWith('[')) {
|
||||
const data = JSON.parse(text);
|
||||
capturedResponses.push({ url, data });
|
||||
console.log(`Captured: ${url.substring(0, 100)}...`);
|
||||
}
|
||||
} catch {
|
||||
// Not JSON
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Visit Jane to establish session
|
||||
console.log('Visiting Jane stores page to capture network requests...');
|
||||
await page.goto('https://www.iheartjane.com/stores', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000,
|
||||
});
|
||||
|
||||
console.log(`\nCaptured ${capturedResponses.length} API responses`);
|
||||
|
||||
for (const resp of capturedResponses) {
|
||||
console.log(`\n--- ${resp.url.substring(0, 80)} ---`);
|
||||
const keys = Object.keys(resp.data);
|
||||
console.log('Keys:', keys);
|
||||
|
||||
// Check for stores array
|
||||
if (resp.data.stores && Array.isArray(resp.data.stores)) {
|
||||
console.log(`Stores count: ${resp.data.stores.length}`);
|
||||
const firstStore = resp.data.stores[0];
|
||||
if (firstStore) {
|
||||
console.log('First store keys:', Object.keys(firstStore));
|
||||
console.log('Sample:', JSON.stringify(firstStore, null, 2).substring(0, 500));
|
||||
}
|
||||
}
|
||||
|
||||
// Check for hits (Algolia)
|
||||
if (resp.data.hits && Array.isArray(resp.data.hits)) {
|
||||
console.log(`Hits count: ${resp.data.hits.length}`);
|
||||
const firstHit = resp.data.hits[0];
|
||||
if (firstHit) {
|
||||
console.log('First hit keys:', Object.keys(firstHit));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Look for __NEXT_DATA__ or similar embedded data
|
||||
console.log('\n--- Checking for embedded page data ---');
|
||||
const pageData = await page.evaluate(() => {
|
||||
// Check for Next.js data
|
||||
const nextData = (window as any).__NEXT_DATA__;
|
||||
if (nextData?.props?.pageProps?.stores) {
|
||||
return {
|
||||
source: '__NEXT_DATA__',
|
||||
storeCount: nextData.props.pageProps.stores.length,
|
||||
firstStore: nextData.props.pageProps.stores[0],
|
||||
};
|
||||
}
|
||||
|
||||
// Check for any global store data
|
||||
const win = window as any;
|
||||
if (win.stores) return { source: 'window.stores', data: win.stores };
|
||||
if (win.__stores) return { source: 'window.__stores', data: win.__stores };
|
||||
|
||||
return null;
|
||||
});
|
||||
|
||||
if (pageData) {
|
||||
console.log('Found embedded data:', pageData.source);
|
||||
console.log('Store count:', pageData.storeCount);
|
||||
if (pageData.firstStore) {
|
||||
console.log('First store keys:', Object.keys(pageData.firstStore));
|
||||
console.log('Sample:', JSON.stringify({
|
||||
id: pageData.firstStore.id,
|
||||
name: pageData.firstStore.name,
|
||||
city: pageData.firstStore.city,
|
||||
state: pageData.firstStore.state,
|
||||
}, null, 2));
|
||||
}
|
||||
} else {
|
||||
console.log('No embedded page data found');
|
||||
}
|
||||
|
||||
// Try alternative API endpoints from browser context
|
||||
console.log('\n--- Testing alternative API endpoints ---');
|
||||
|
||||
// Try the map endpoint
|
||||
const mapData = await page.evaluate(async () => {
|
||||
try {
|
||||
const res = await fetch('https://api.iheartjane.com/v1/stores/map?per_page=100');
|
||||
if (res.ok) return await res.json();
|
||||
} catch {}
|
||||
return null;
|
||||
});
|
||||
|
||||
if (mapData) {
|
||||
console.log('\n/v1/stores/map response:');
|
||||
console.log('Keys:', Object.keys(mapData));
|
||||
if (mapData.stores?.[0]) {
|
||||
console.log('First store keys:', Object.keys(mapData.stores[0]));
|
||||
}
|
||||
}
|
||||
|
||||
// Try index endpoint
|
||||
const indexData = await page.evaluate(async () => {
|
||||
try {
|
||||
const res = await fetch('https://api.iheartjane.com/v1/stores/index?per_page=10');
|
||||
if (res.ok) return await res.json();
|
||||
} catch {}
|
||||
return null;
|
||||
});
|
||||
|
||||
if (indexData) {
|
||||
console.log('\n/v1/stores/index response:');
|
||||
console.log('Keys:', Object.keys(indexData));
|
||||
if (indexData.stores?.[0]) {
|
||||
console.log('First store keys:', Object.keys(indexData.stores[0]));
|
||||
}
|
||||
}
|
||||
|
||||
// Try with state parameter
|
||||
const stateData = await page.evaluate(async () => {
|
||||
try {
|
||||
const res = await fetch('https://api.iheartjane.com/v1/stores?state=AZ&per_page=10');
|
||||
if (res.ok) return await res.json();
|
||||
} catch {}
|
||||
return null;
|
||||
});
|
||||
|
||||
if (stateData) {
|
||||
console.log('\n/v1/stores?state=AZ response:');
|
||||
console.log('Keys:', Object.keys(stateData));
|
||||
console.log('Stores count:', stateData.stores?.length);
|
||||
if (stateData.stores?.[0]) {
|
||||
console.log('First store keys:', Object.keys(stateData.stores[0]));
|
||||
console.log('Sample:', JSON.stringify(stateData.stores[0], null, 2).substring(0, 300));
|
||||
}
|
||||
}
|
||||
|
||||
// Try Algolia directly for stores
|
||||
console.log('\n--- Testing Algolia for stores ---');
|
||||
const algoliaStores = await page.evaluate(async () => {
|
||||
try {
|
||||
// Common Algolia search pattern
|
||||
const res = await fetch('https://search.iheartjane.com/1/indexes/stores-production/query', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-Algolia-Application-Id': 'HKXSXRD7RA',
|
||||
'X-Algolia-API-Key': 'YjZhYjQxZjU4ZTNjMTRhYzExZTk2YjU2MzliMGE4ZTE5YjJkMmZkZTI2ODllYTY2MThlMzQ3Y2QxOTFkMjI5Y3RhZ0ZpbHRlcnM9',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
query: 'Arizona',
|
||||
hitsPerPage: 20,
|
||||
}),
|
||||
});
|
||||
if (res.ok) return await res.json();
|
||||
} catch {}
|
||||
return null;
|
||||
});
|
||||
|
||||
if (algoliaStores) {
|
||||
console.log('Algolia stores-production response:');
|
||||
console.log('Keys:', Object.keys(algoliaStores));
|
||||
console.log('Hits count:', algoliaStores.hits?.length);
|
||||
if (algoliaStores.hits?.[0]) {
|
||||
console.log('First hit keys:', Object.keys(algoliaStores.hits[0]));
|
||||
console.log('Sample:', JSON.stringify(algoliaStores.hits[0], null, 2).substring(0, 500));
|
||||
}
|
||||
}
|
||||
|
||||
// Check if there's a /v2 endpoint
|
||||
const v2Data = await page.evaluate(async () => {
|
||||
try {
|
||||
const res = await fetch('https://api.iheartjane.com/v2/stores?per_page=10');
|
||||
if (res.ok) return await res.json();
|
||||
} catch {}
|
||||
return null;
|
||||
});
|
||||
|
||||
if (v2Data) {
|
||||
console.log('\n/v2/stores response:');
|
||||
console.log('Keys:', Object.keys(v2Data));
|
||||
if (v2Data.stores?.[0]) {
|
||||
console.log('First store keys:', Object.keys(v2Data.stores[0]));
|
||||
}
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
console.log('\nDone!');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,126 +0,0 @@
|
||||
/**
|
||||
* Test script for Jane platform client
|
||||
* Tests the new Jane integration with The Flower Shop Mesa
|
||||
*
|
||||
* Usage: npx ts-node scripts/test-jane-client.ts
|
||||
*/
|
||||
|
||||
import {
|
||||
startSession,
|
||||
endSession,
|
||||
fetchProductsFromUrl,
|
||||
resolveStoreFromUrl,
|
||||
} from '../src/platforms/jane';
|
||||
import { JaneNormalizer } from '../src/hydration/normalizers/jane';
|
||||
|
||||
const TEST_URL = 'https://theflowershopusa.com/mesa/menu/';
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Jane Platform Client Test');
|
||||
console.log('='.repeat(60));
|
||||
console.log(`Test URL: ${TEST_URL}`);
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// Test 1: Fetch products from URL
|
||||
console.log('[Test 1] Fetching products from menu URL...');
|
||||
const result = await fetchProductsFromUrl(TEST_URL);
|
||||
|
||||
console.log('');
|
||||
console.log('[Results]');
|
||||
console.log(` Store: ${result.store?.name || 'Not captured'}`);
|
||||
console.log(` Store ID: ${result.store?.id || 'N/A'}`);
|
||||
console.log(` Products captured: ${result.products.length}`);
|
||||
console.log(` API responses: ${result.responses.length}`);
|
||||
|
||||
if (result.store) {
|
||||
console.log('');
|
||||
console.log('[Store Info]');
|
||||
console.log(` Address: ${result.store.address}, ${result.store.city}, ${result.store.state} ${result.store.zip}`);
|
||||
console.log(` Phone: ${result.store.phone}`);
|
||||
console.log(` Coordinates: ${result.store.lat}, ${result.store.long}`);
|
||||
console.log(` Medical: ${result.store.medical}, Recreational: ${result.store.recreational}`);
|
||||
console.log(` Rating: ${result.store.rating} (${result.store.reviews_count} reviews)`);
|
||||
console.log(` Product count (store): ${result.store.product_count}`);
|
||||
}
|
||||
|
||||
if (result.products.length > 0) {
|
||||
console.log('');
|
||||
console.log('[Sample Products (first 5)]');
|
||||
for (const p of result.products.slice(0, 5)) {
|
||||
const price = p.price_gram || p.price_each || 'N/A';
|
||||
console.log(` - ${p.name} (${p.brand}) - $${price}`);
|
||||
console.log(` Kind: ${p.kind}, Category: ${p.category}, THC: ${p.percent_thc}%`);
|
||||
}
|
||||
|
||||
// Test 2: Normalize products
|
||||
console.log('');
|
||||
console.log('[Test 2] Testing normalizer...');
|
||||
const normalizer = new JaneNormalizer();
|
||||
|
||||
// Build a fake payload structure
|
||||
const fakePayload = {
|
||||
id: 'test-payload',
|
||||
dispensary_id: 9999,
|
||||
crawl_run_id: null,
|
||||
platform: 'jane',
|
||||
payload_version: 1,
|
||||
raw_json: { hits: result.products.map(p => p.raw) },
|
||||
product_count: result.products.length,
|
||||
pricing_type: null,
|
||||
crawl_mode: null,
|
||||
fetched_at: new Date(),
|
||||
processed: false,
|
||||
normalized_at: null,
|
||||
hydration_error: null,
|
||||
hydration_attempts: 0,
|
||||
created_at: new Date(),
|
||||
};
|
||||
|
||||
const normalized = normalizer.normalize(fakePayload);
|
||||
|
||||
console.log(` Products normalized: ${normalized.products.length}`);
|
||||
console.log(` Brands extracted: ${normalized.brands.length}`);
|
||||
console.log(` Categories extracted: ${normalized.categories.length}`);
|
||||
console.log(` Errors: ${normalized.errors.length}`);
|
||||
|
||||
if (normalized.products.length > 0) {
|
||||
console.log('');
|
||||
console.log('[Sample Normalized Product]');
|
||||
const np = normalized.products[0];
|
||||
console.log(` External ID: ${np.externalProductId}`);
|
||||
console.log(` Name: ${np.name}`);
|
||||
console.log(` Brand: ${np.brandName}`);
|
||||
console.log(` Category: ${np.category}`);
|
||||
console.log(` Type: ${np.type}`);
|
||||
console.log(` Strain: ${np.strainType}`);
|
||||
console.log(` THC: ${np.thcPercent}%`);
|
||||
console.log(` CBD: ${np.cbdPercent}%`);
|
||||
console.log(` Image: ${np.primaryImageUrl?.slice(0, 60)}...`);
|
||||
|
||||
const pricing = normalized.pricing.get(np.externalProductId);
|
||||
if (pricing) {
|
||||
console.log(` Price (cents): ${pricing.priceRec}`);
|
||||
console.log(` On Special: ${pricing.isOnSpecial}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log('='.repeat(60));
|
||||
console.log('TEST PASSED');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('');
|
||||
console.error('='.repeat(60));
|
||||
console.error('TEST FAILED');
|
||||
console.error('='.repeat(60));
|
||||
console.error(`Error: ${error.message}`);
|
||||
console.error(error.stack);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,50 +0,0 @@
|
||||
/**
|
||||
* Smoke test: Discover Jane stores in Arizona
|
||||
* Usage: npx ts-node scripts/test-jane-discovery-az.ts
|
||||
*/
|
||||
|
||||
import { discoverStoresByState } from '../src/platforms/jane';
|
||||
|
||||
async function main() {
|
||||
console.log('='.repeat(60));
|
||||
console.log('Jane Store Discovery - Arizona Smoke Test');
|
||||
console.log('='.repeat(60));
|
||||
console.log('Using local IP (no proxy)\n');
|
||||
|
||||
try {
|
||||
const stores = await discoverStoresByState('AZ');
|
||||
|
||||
console.log(`\n${'='.repeat(60)}`);
|
||||
console.log(`RESULTS: Found ${stores.length} Jane stores in Arizona`);
|
||||
console.log('='.repeat(60));
|
||||
|
||||
if (stores.length > 0) {
|
||||
console.log('\nSample stores:');
|
||||
for (const store of stores.slice(0, 10)) {
|
||||
console.log(` - ${store.name}`);
|
||||
console.log(` ID: ${store.storeId} | ${store.city}, AZ`);
|
||||
console.log(` Types: ${store.storeTypes?.join(', ') || 'unknown'}`);
|
||||
console.log(` Products: ${store.productCount || 'N/A'}`);
|
||||
console.log('');
|
||||
}
|
||||
|
||||
if (stores.length > 10) {
|
||||
console.log(` ... and ${stores.length - 10} more stores`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('SMOKE TEST PASSED');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('\n' + '='.repeat(60));
|
||||
console.error('SMOKE TEST FAILED');
|
||||
console.error('='.repeat(60));
|
||||
console.error(`Error: ${error.message}`);
|
||||
console.error(error.stack);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
@@ -1,55 +0,0 @@
|
||||
/**
|
||||
* Compare MED vs REC product menus for same location
|
||||
*/
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox'] });
|
||||
const page = await browser.newPage();
|
||||
|
||||
await page.goto('https://www.iheartjane.com/stores', { waitUntil: 'domcontentloaded' });
|
||||
await new Promise(r => setTimeout(r, 2000));
|
||||
|
||||
// Fetch REC products (store 3379)
|
||||
const recProducts: number[] = await page.evaluate(async () => {
|
||||
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=3379' }),
|
||||
});
|
||||
const data = await res.json();
|
||||
return data.hits?.map((h: any) => h.product_id) || [];
|
||||
});
|
||||
|
||||
// Fetch MED products (store 4540)
|
||||
const medProducts: number[] = await page.evaluate(async () => {
|
||||
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=4540' }),
|
||||
});
|
||||
const data = await res.json();
|
||||
return data.hits?.map((h: any) => h.product_id) || [];
|
||||
});
|
||||
|
||||
const recSet = new Set(recProducts);
|
||||
const medSet = new Set(medProducts);
|
||||
|
||||
const recOnly = recProducts.filter(id => !medSet.has(id)).length;
|
||||
const medOnly = medProducts.filter(id => !recSet.has(id)).length;
|
||||
const shared = recProducts.filter(id => medSet.has(id)).length;
|
||||
|
||||
console.log('\nHana Phoenix - MED vs REC comparison (100 products each):');
|
||||
console.log(' REC products fetched:', recProducts.length);
|
||||
console.log(' MED products fetched:', medProducts.length);
|
||||
console.log(' REC-only:', recOnly);
|
||||
console.log(' MED-only:', medOnly);
|
||||
console.log(' Shared:', shared);
|
||||
console.log(' Menus are:', shared === 0 ? 'COMPLETELY DIFFERENT' : shared === recProducts.length ? 'IDENTICAL' : 'PARTIALLY OVERLAPPING');
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,79 +0,0 @@
|
||||
/**
|
||||
* Find ALL differing fields between MED and REC product payloads
|
||||
*/
|
||||
import puppeteer from 'puppeteer-extra';
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox'] });
|
||||
const page = await browser.newPage();
|
||||
|
||||
await page.goto('https://www.iheartjane.com/stores', { waitUntil: 'domcontentloaded' });
|
||||
await new Promise(r => setTimeout(r, 2000));
|
||||
|
||||
// Get full product payload from REC store
|
||||
const recProduct = await page.evaluate(async () => {
|
||||
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: '', hitsPerPage: 1, filters: 'store_id=3379' }),
|
||||
});
|
||||
const data = await res.json();
|
||||
return data.hits?.[0];
|
||||
});
|
||||
|
||||
const productId = recProduct?.product_id;
|
||||
|
||||
// Get same product from MED store
|
||||
const medProduct = await page.evaluate(async (pid: number) => {
|
||||
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=4540' }),
|
||||
});
|
||||
const data = await res.json();
|
||||
return data.hits?.find((h: any) => h.product_id === pid);
|
||||
}, productId);
|
||||
|
||||
console.log('Product:', recProduct?.name, '(ID:', productId, ')\n');
|
||||
|
||||
// Get all keys
|
||||
const allKeys = new Set([...Object.keys(recProduct || {}), ...Object.keys(medProduct || {})]);
|
||||
const sortedKeys = [...allKeys].sort();
|
||||
|
||||
console.log('=== ALL KEYS IN PAYLOAD ===');
|
||||
console.log(sortedKeys.join(', '));
|
||||
|
||||
console.log('\n=== FIELDS THAT DIFFER ===');
|
||||
let diffCount = 0;
|
||||
for (const key of sortedKeys) {
|
||||
const recVal = JSON.stringify(recProduct?.[key]);
|
||||
const medVal = JSON.stringify(medProduct?.[key]);
|
||||
if (recVal !== medVal) {
|
||||
diffCount++;
|
||||
console.log(`${key}:`);
|
||||
console.log(` REC: ${recVal?.substring(0, 100)}`);
|
||||
console.log(` MED: ${medVal?.substring(0, 100)}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (diffCount === 0) {
|
||||
console.log('(none - payloads are identical)');
|
||||
}
|
||||
|
||||
// Check for limit/allowance related fields
|
||||
console.log('\n=== LIMIT-RELATED FIELDS ===');
|
||||
const limitFields = sortedKeys.filter(k =>
|
||||
k.includes('limit') || k.includes('max') || k.includes('allow') ||
|
||||
k.includes('quantity') || k.includes('cart') || k.includes('medical') ||
|
||||
k.includes('rec') || k.includes('weight')
|
||||
);
|
||||
for (const key of limitFields) {
|
||||
console.log(`${key}: REC=${JSON.stringify(recProduct?.[key])} | MED=${JSON.stringify(medProduct?.[key])}`);
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,35 +0,0 @@
|
||||
/**
|
||||
* Test script to capture and save full Jane payload
|
||||
* Usage: npx ts-node scripts/test-jane-payload.ts
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import { fetchProductsFromUrl } from '../src/platforms/jane';
|
||||
|
||||
const TEST_URL = 'https://theflowershopusa.com/mesa/menu/';
|
||||
const OUTPUT_FILE = '/tmp/jane-test-payload.json';
|
||||
|
||||
async function main() {
|
||||
console.log('Fetching Jane payload...');
|
||||
|
||||
const result = await fetchProductsFromUrl(TEST_URL);
|
||||
|
||||
// Build payload structure matching what would be saved
|
||||
const payload = {
|
||||
hits: result.products.map(p => p.raw),
|
||||
store: result.store?.raw || null,
|
||||
capturedAt: new Date().toISOString(),
|
||||
platform: 'jane',
|
||||
storeId: result.store?.id,
|
||||
productCount: result.products.length,
|
||||
responseCount: result.responses.length,
|
||||
};
|
||||
|
||||
// Save to file
|
||||
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(payload, null, 2));
|
||||
console.log(`\nPayload saved to: ${OUTPUT_FILE}`);
|
||||
console.log(`Products: ${result.products.length}`);
|
||||
console.log(`Size: ${Math.round(fs.statSync(OUTPUT_FILE).size / 1024)}KB`);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -1,138 +0,0 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox', '--disable-setuid-sandbox'],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setViewport({ width: 1920, height: 1080 });
|
||||
|
||||
// Capture ALL requests to treez.io
|
||||
const treezRequests: any[] = [];
|
||||
|
||||
page.on('request', (req) => {
|
||||
const url = req.url();
|
||||
if (url.includes('treez.io') && !url.includes('.js') && !url.includes('.css')) {
|
||||
treezRequests.push({
|
||||
url: url,
|
||||
method: req.method(),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Also intercept and capture ES API responses
|
||||
page.on('response', async (res) => {
|
||||
const url = res.url();
|
||||
if (url.includes('gapcommerceapi.com') && res.status() === 200) {
|
||||
try {
|
||||
const json = await res.json();
|
||||
const total = json.hits?.total?.value;
|
||||
const count = json.hits?.hits?.length;
|
||||
if (total || count) {
|
||||
console.log('\nES Response: total=' + total + ', returned=' + count);
|
||||
if (json.hits?.hits?.[0]?._source) {
|
||||
const src = json.hits.hits[0]._source;
|
||||
console.log('First product fields: ' + Object.keys(src).slice(0, 20).join(', '));
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Loading /shop page...\n');
|
||||
|
||||
await page.goto('https://shop.bestdispensary.com/shop', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000
|
||||
});
|
||||
await sleep(3000);
|
||||
|
||||
// Bypass age gate
|
||||
const ageGate = await page.$('[data-testid="age-gate-modal"]');
|
||||
if (ageGate) {
|
||||
const btn = await page.$('[data-testid="age-gate-submit-button"]');
|
||||
if (btn) await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
// Click load more several times
|
||||
console.log('\nClicking Load More...');
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const btn = await page.$('button.collection__load-more');
|
||||
if (!btn) break;
|
||||
await btn.click();
|
||||
await sleep(2000);
|
||||
}
|
||||
|
||||
console.log('\n=== TREEZ API ENDPOINTS CALLED ===\n');
|
||||
const uniqueUrls = [...new Set(treezRequests.map(r => r.url.split('?')[0]))];
|
||||
uniqueUrls.forEach(url => console.log(url));
|
||||
|
||||
// Now intercept the ES response data by making a request from browser context
|
||||
console.log('\n=== FETCHING ALL PRODUCTS VIA BROWSER ===\n');
|
||||
|
||||
const allProducts = await page.evaluate(async () => {
|
||||
const apiKey = 'V3jHL9dFzi3Gj4UISM4lr38Nm0GSxcps5OBz1PbS';
|
||||
const url = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
|
||||
|
||||
const query = {
|
||||
from: 0,
|
||||
size: 1000,
|
||||
query: {
|
||||
bool: {
|
||||
must: [
|
||||
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
|
||||
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
|
||||
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
|
||||
]
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'x-api-key': apiKey,
|
||||
},
|
||||
body: JSON.stringify(query),
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
return {
|
||||
total: data.hits?.total?.value,
|
||||
count: data.hits?.hits?.length,
|
||||
sample: data.hits?.hits?.[0]?._source,
|
||||
allProducts: data.hits?.hits?.map((h: any) => h._source),
|
||||
};
|
||||
} catch (err: any) {
|
||||
return { error: err.message };
|
||||
}
|
||||
});
|
||||
|
||||
if (allProducts.error) {
|
||||
console.log('Error: ' + allProducts.error);
|
||||
} else {
|
||||
console.log('Total products: ' + allProducts.total);
|
||||
console.log('Returned: ' + allProducts.count);
|
||||
|
||||
if (allProducts.sample) {
|
||||
console.log('\n=== PRODUCT FIELDS ===\n');
|
||||
console.log(Object.keys(allProducts.sample).sort().join('\n'));
|
||||
|
||||
console.log('\n=== SAMPLE PRODUCT ===\n');
|
||||
console.log(JSON.stringify(allProducts.sample, null, 2));
|
||||
}
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
}
|
||||
|
||||
main();
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user