Compare commits
70 Commits
feature/ca
...
fix/ci-bui
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
97b1ab23d8 | ||
|
|
7d3e91b2e6 | ||
|
|
74957a9ec5 | ||
|
|
2d035c46cf | ||
|
|
53445fe72a | ||
|
|
37cc8956c5 | ||
|
|
197c82f921 | ||
|
|
2c52493a9c | ||
|
|
2ee2ba6b8c | ||
|
|
bafcf1694a | ||
|
|
95792aab15 | ||
|
|
38ae2c3a3e | ||
|
|
249d3c1b7f | ||
|
|
9647f94f89 | ||
|
|
afc288d2cf | ||
|
|
df01ce6aad | ||
|
|
aea93bc96b | ||
|
|
4e84f30f8b | ||
|
|
b20a0a4fa5 | ||
|
|
6eb1babc86 | ||
|
|
9a9c2f76a2 | ||
|
|
56cc171287 | ||
|
|
0295637ed6 | ||
|
|
9c6dd37316 | ||
|
|
524d13209a | ||
|
|
9199db3927 | ||
|
|
a0652c7c73 | ||
|
|
89c262ee20 | ||
|
|
7f9cf559cf | ||
|
|
bbe039c868 | ||
|
|
4e5c09a2a5 | ||
|
|
7f65598332 | ||
|
|
75315ed91e | ||
|
|
7fe7d17b43 | ||
|
|
7e517b5801 | ||
|
|
38ba9021d1 | ||
|
|
ddebad48d3 | ||
|
|
1cebf2e296 | ||
|
|
1d6e67d837 | ||
|
|
cfb4b6e4ce | ||
|
|
f418c403d6 | ||
|
|
be4221af46 | ||
|
|
ca07606b05 | ||
|
|
baf1bf2eb7 | ||
|
|
4ef3a8d72b | ||
|
|
09dd756eff | ||
|
|
ec8ef6210c | ||
|
|
a9b7a4d7a9 | ||
|
|
5119d5ccf9 | ||
|
|
91efd1d03d | ||
|
|
aa776226b0 | ||
|
|
e9435150e9 | ||
|
|
d399b966e6 | ||
|
|
f5f0e25384 | ||
|
|
04de33e5f7 | ||
|
|
37dfea25e1 | ||
|
|
e2166bc25f | ||
|
|
b5e8f039bf | ||
|
|
346e6d1cd8 | ||
|
|
be434d25e3 | ||
|
|
ecc201e9d4 | ||
|
|
67bfdf47a5 | ||
|
|
3fa22a6ba1 | ||
|
|
9f898f68db | ||
|
|
f78b05360a | ||
|
|
2f483b3084 | ||
|
|
9711d594db | ||
|
|
39aebfcb82 | ||
|
|
5415cac2f3 | ||
|
|
70d2364a6f |
@@ -2,37 +2,77 @@ when:
|
|||||||
- event: [push, pull_request]
|
- event: [push, pull_request]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
# Build checks
|
# ===========================================
|
||||||
|
# PR VALIDATION: Parallel type checks (PRs only)
|
||||||
|
# ===========================================
|
||||||
typecheck-backend:
|
typecheck-backend:
|
||||||
image: node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- cd backend
|
- cd backend
|
||||||
- npm ci
|
- npm ci --prefer-offline
|
||||||
- npx tsc --noEmit || true
|
- npx tsc --noEmit
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
build-cannaiq:
|
typecheck-cannaiq:
|
||||||
image: node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- cd cannaiq
|
- cd cannaiq
|
||||||
- npm ci
|
- npm ci --prefer-offline
|
||||||
- npx tsc --noEmit
|
- npx tsc --noEmit
|
||||||
- npm run build
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
build-findadispo:
|
typecheck-findadispo:
|
||||||
image: node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- cd findadispo/frontend
|
- cd findadispo/frontend
|
||||||
- npm ci
|
- npm ci --prefer-offline
|
||||||
- npm run build
|
- npx tsc --noEmit 2>/dev/null || true
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
build-findagram:
|
typecheck-findagram:
|
||||||
image: node:20
|
image: code.cannabrands.app/creationshop/node:20
|
||||||
commands:
|
commands:
|
||||||
- cd findagram/frontend
|
- cd findagram/frontend
|
||||||
- npm ci
|
- npm ci --prefer-offline
|
||||||
- npm run build
|
- npx tsc --noEmit 2>/dev/null || true
|
||||||
|
depends_on: []
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
# Docker builds - only on master
|
# ===========================================
|
||||||
|
# AUTO-MERGE: Merge PR after all checks pass
|
||||||
|
# ===========================================
|
||||||
|
auto-merge:
|
||||||
|
image: alpine:latest
|
||||||
|
environment:
|
||||||
|
GITEA_TOKEN:
|
||||||
|
from_secret: gitea_token
|
||||||
|
commands:
|
||||||
|
- apk add --no-cache curl
|
||||||
|
- |
|
||||||
|
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||||
|
curl -s -X POST \
|
||||||
|
-H "Authorization: token $GITEA_TOKEN" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"Do":"merge"}' \
|
||||||
|
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||||
|
depends_on:
|
||||||
|
- typecheck-backend
|
||||||
|
- typecheck-cannaiq
|
||||||
|
- typecheck-findadispo
|
||||||
|
- typecheck-findagram
|
||||||
|
when:
|
||||||
|
event: pull_request
|
||||||
|
|
||||||
|
# ===========================================
|
||||||
|
# MASTER DEPLOY: Parallel Docker builds
|
||||||
|
# ===========================================
|
||||||
docker-backend:
|
docker-backend:
|
||||||
image: woodpeckerci/plugin-docker-buildx
|
image: woodpeckerci/plugin-docker-buildx
|
||||||
settings:
|
settings:
|
||||||
@@ -49,6 +89,12 @@ steps:
|
|||||||
from_secret: registry_password
|
from_secret: registry_password
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
|
build_args:
|
||||||
|
- APP_BUILD_VERSION=${CI_COMMIT_SHA:0:8}
|
||||||
|
- APP_GIT_SHA=${CI_COMMIT_SHA}
|
||||||
|
- APP_BUILD_TIME=${CI_PIPELINE_CREATED}
|
||||||
|
- CONTAINER_IMAGE_TAG=${CI_COMMIT_SHA:0:8}
|
||||||
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: push
|
||||||
@@ -69,6 +115,7 @@ steps:
|
|||||||
from_secret: registry_password
|
from_secret: registry_password
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: push
|
||||||
@@ -89,6 +136,7 @@ steps:
|
|||||||
from_secret: registry_password
|
from_secret: registry_password
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: push
|
||||||
@@ -109,32 +157,35 @@ steps:
|
|||||||
from_secret: registry_password
|
from_secret: registry_password
|
||||||
platforms: linux/amd64
|
platforms: linux/amd64
|
||||||
provenance: false
|
provenance: false
|
||||||
|
depends_on: []
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: push
|
||||||
|
|
||||||
# Deploy to Kubernetes
|
# ===========================================
|
||||||
|
# STAGE 3: Deploy (after Docker builds)
|
||||||
|
# ===========================================
|
||||||
deploy:
|
deploy:
|
||||||
image: bitnami/kubectl:latest
|
image: bitnami/kubectl:latest
|
||||||
environment:
|
environment:
|
||||||
KUBECONFIG_CONTENT:
|
KUBECONFIG_CONTENT:
|
||||||
from_secret: kubeconfig_data
|
from_secret: kubeconfig_data
|
||||||
commands:
|
commands:
|
||||||
- echo "Deploying to Kubernetes..."
|
|
||||||
- mkdir -p ~/.kube
|
- mkdir -p ~/.kube
|
||||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||||
- chmod 600 ~/.kube/config
|
- chmod 600 ~/.kube/config
|
||||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/scraper-worker scraper-worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||||
- kubectl rollout status deployment/scraper-worker -n dispensary-scraper --timeout=300s
|
|
||||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||||
- kubectl rollout status deployment/findadispo-frontend -n dispensary-scraper --timeout=120s
|
depends_on:
|
||||||
- kubectl rollout status deployment/findagram-frontend -n dispensary-scraper --timeout=120s
|
- docker-backend
|
||||||
- echo "All deployments complete!"
|
- docker-cannaiq
|
||||||
|
- docker-findadispo
|
||||||
|
- docker-findagram
|
||||||
when:
|
when:
|
||||||
branch: master
|
branch: master
|
||||||
event: push
|
event: push
|
||||||
|
|||||||
224
CLAUDE.md
224
CLAUDE.md
@@ -213,22 +213,23 @@ CannaiQ has **TWO databases** with distinct purposes:
|
|||||||
| Table | Purpose | Row Count |
|
| Table | Purpose | Row Count |
|
||||||
|-------|---------|-----------|
|
|-------|---------|-----------|
|
||||||
| `dispensaries` | Store/dispensary records | ~188+ rows |
|
| `dispensaries` | Store/dispensary records | ~188+ rows |
|
||||||
| `dutchie_products` | Product catalog | ~37,000+ rows |
|
| `store_products` | Product catalog | ~37,000+ rows |
|
||||||
| `dutchie_product_snapshots` | Price/stock history | ~millions |
|
| `store_product_snapshots` | Price/stock history | ~millions |
|
||||||
| `store_products` | Canonical product schema | ~37,000+ rows |
|
|
||||||
| `store_product_snapshots` | Canonical snapshot schema | growing |
|
|
||||||
|
|
||||||
**LEGACY TABLES (EMPTY - DO NOT USE):**
|
**LEGACY TABLES (EMPTY - DO NOT USE):**
|
||||||
|
|
||||||
| Table | Status | Action |
|
| Table | Status | Action |
|
||||||
|-------|--------|--------|
|
|-------|--------|--------|
|
||||||
| `stores` | EMPTY (0 rows) | Use `dispensaries` instead |
|
| `stores` | EMPTY (0 rows) | Use `dispensaries` instead |
|
||||||
| `products` | EMPTY (0 rows) | Use `dutchie_products` or `store_products` |
|
| `products` | EMPTY (0 rows) | Use `store_products` instead |
|
||||||
|
| `dutchie_products` | LEGACY (0 rows) | Use `store_products` instead |
|
||||||
|
| `dutchie_product_snapshots` | LEGACY (0 rows) | Use `store_product_snapshots` instead |
|
||||||
| `categories` | EMPTY (0 rows) | Categories stored in product records |
|
| `categories` | EMPTY (0 rows) | Categories stored in product records |
|
||||||
|
|
||||||
**Code must NEVER:**
|
**Code must NEVER:**
|
||||||
- Query the `stores` table (use `dispensaries`)
|
- Query the `stores` table (use `dispensaries`)
|
||||||
- Query the `products` table (use `dutchie_products` or `store_products`)
|
- Query the `products` table (use `store_products`)
|
||||||
|
- Query the `dutchie_products` table (use `store_products`)
|
||||||
- Query the `categories` table (categories are in product records)
|
- Query the `categories` table (categories are in product records)
|
||||||
|
|
||||||
**CRITICAL RULES:**
|
**CRITICAL RULES:**
|
||||||
@@ -343,23 +344,23 @@ npx tsx src/scripts/etl/042_legacy_import.ts
|
|||||||
- SCHEMA ONLY - no data inserts from legacy tables
|
- SCHEMA ONLY - no data inserts from legacy tables
|
||||||
|
|
||||||
**ETL Script 042** (`backend/src/scripts/etl/042_legacy_import.ts`):
|
**ETL Script 042** (`backend/src/scripts/etl/042_legacy_import.ts`):
|
||||||
- Copies data from `dutchie_products` → `store_products`
|
- Copies data from legacy `dutchie_legacy.dutchie_products` → `store_products`
|
||||||
- Copies data from `dutchie_product_snapshots` → `store_product_snapshots`
|
- Copies data from legacy `dutchie_legacy.dutchie_product_snapshots` → `store_product_snapshots`
|
||||||
- Extracts brands from product data into `brands` table
|
- Extracts brands from product data into `brands` table
|
||||||
- Links dispensaries to chains and states
|
- Links dispensaries to chains and states
|
||||||
- INSERT-ONLY and IDEMPOTENT (uses ON CONFLICT DO NOTHING)
|
- INSERT-ONLY and IDEMPOTENT (uses ON CONFLICT DO NOTHING)
|
||||||
- Run manually: `cd backend && npx tsx src/scripts/etl/042_legacy_import.ts`
|
- Run manually: `cd backend && npx tsx src/scripts/etl/042_legacy_import.ts`
|
||||||
|
|
||||||
**Tables touched by ETL:**
|
**Tables touched by ETL:**
|
||||||
| Source Table | Target Table |
|
| Source Table (dutchie_legacy) | Target Table (dutchie_menus) |
|
||||||
|--------------|--------------|
|
|-------------------------------|------------------------------|
|
||||||
| `dutchie_products` | `store_products` |
|
| `dutchie_products` | `store_products` |
|
||||||
| `dutchie_product_snapshots` | `store_product_snapshots` |
|
| `dutchie_product_snapshots` | `store_product_snapshots` |
|
||||||
| (brand names extracted) | `brands` |
|
| (brand names extracted) | `brands` |
|
||||||
| (state codes mapped) | `dispensaries.state_id` |
|
| (state codes mapped) | `dispensaries.state_id` |
|
||||||
| (chain names matched) | `dispensaries.chain_id` |
|
| (chain names matched) | `dispensaries.chain_id` |
|
||||||
|
|
||||||
**Legacy tables remain intact** - `dutchie_products` and `dutchie_product_snapshots` are not modified.
|
**Note:** The legacy `dutchie_products` and `dutchie_product_snapshots` tables in `dutchie_legacy` are read-only sources. All new crawl data goes directly to `store_products` and `store_product_snapshots`.
|
||||||
|
|
||||||
**Migration 045** (`backend/migrations/045_add_image_columns.sql`):
|
**Migration 045** (`backend/migrations/045_add_image_columns.sql`):
|
||||||
- Adds `thumbnail_url` to `store_products` and `store_product_snapshots`
|
- Adds `thumbnail_url` to `store_products` and `store_product_snapshots`
|
||||||
@@ -459,15 +460,66 @@ const result = await pool.query(`
|
|||||||
### Local Storage Structure
|
### Local Storage Structure
|
||||||
|
|
||||||
```
|
```
|
||||||
/storage/products/{brand}/{state}/{product_id}/
|
/storage/images/products/{state}/{store}/{brand}/{product}/
|
||||||
image-{hash}.webp
|
image-{hash}.webp
|
||||||
image-{hash}-medium.webp
|
|
||||||
image-{hash}-thumb.webp
|
|
||||||
|
|
||||||
/storage/brands/{brand}/
|
/storage/images/brands/{brand}/
|
||||||
logo-{hash}.webp
|
logo-{hash}.webp
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Image Proxy API (On-Demand Resizing)
|
||||||
|
|
||||||
|
Images are stored at full resolution and resized on-demand via the `/img` endpoint.
|
||||||
|
|
||||||
|
**Endpoint:** `GET /img/<path>?<params>`
|
||||||
|
|
||||||
|
**Parameters:**
|
||||||
|
| Param | Description | Example |
|
||||||
|
|-------|-------------|---------|
|
||||||
|
| `w` | Width in pixels (max 4000) | `?w=200` |
|
||||||
|
| `h` | Height in pixels (max 4000) | `?h=200` |
|
||||||
|
| `q` | Quality 1-100 (default 80) | `?q=70` |
|
||||||
|
| `fit` | Resize mode: cover, contain, fill, inside, outside | `?fit=cover` |
|
||||||
|
| `blur` | Blur sigma 0.3-1000 | `?blur=5` |
|
||||||
|
| `gray` | Grayscale (1 = enabled) | `?gray=1` |
|
||||||
|
| `format` | Output: webp, jpeg, png, avif (default webp) | `?format=jpeg` |
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
```bash
|
||||||
|
# Thumbnail (50px)
|
||||||
|
GET /img/products/az/store/brand/product/image-abc123.webp?w=50
|
||||||
|
|
||||||
|
# Card image (200px, cover fit)
|
||||||
|
GET /img/products/az/store/brand/product/image-abc123.webp?w=200&h=200&fit=cover
|
||||||
|
|
||||||
|
# JPEG at 70% quality
|
||||||
|
GET /img/products/az/store/brand/product/image-abc123.webp?w=400&format=jpeg&q=70
|
||||||
|
|
||||||
|
# Grayscale blur
|
||||||
|
GET /img/products/az/store/brand/product/image-abc123.webp?w=200&gray=1&blur=3
|
||||||
|
```
|
||||||
|
|
||||||
|
**Frontend Usage:**
|
||||||
|
```typescript
|
||||||
|
import { getImageUrl, ImageSizes } from '../lib/images';
|
||||||
|
|
||||||
|
// Returns /img/products/.../image.webp?w=50 for local images
|
||||||
|
// Returns original URL for remote images (CDN, etc.)
|
||||||
|
const thumbUrl = getImageUrl(product.image_url, ImageSizes.thumb);
|
||||||
|
const cardUrl = getImageUrl(product.image_url, ImageSizes.medium);
|
||||||
|
const detailUrl = getImageUrl(product.image_url, ImageSizes.detail);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Size Presets:**
|
||||||
|
| Preset | Width | Use Case |
|
||||||
|
|--------|-------|----------|
|
||||||
|
| `thumb` | 50px | Table thumbnails |
|
||||||
|
| `small` | 100px | Small cards |
|
||||||
|
| `medium` | 200px | Grid cards |
|
||||||
|
| `large` | 400px | Large cards |
|
||||||
|
| `detail` | 600px | Product detail |
|
||||||
|
| `full` | - | No resize |
|
||||||
|
|
||||||
### Storage Adapter
|
### Storage Adapter
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
@@ -480,8 +532,9 @@ import { saveImage, getImageUrl } from '../utils/storage-adapter';
|
|||||||
|
|
||||||
| File | Purpose |
|
| File | Purpose |
|
||||||
|------|---------|
|
|------|---------|
|
||||||
| `backend/src/utils/local-storage.ts` | Local filesystem adapter |
|
| `backend/src/utils/image-storage.ts` | Image download and storage |
|
||||||
| `backend/src/utils/storage-adapter.ts` | Unified storage abstraction |
|
| `backend/src/routes/image-proxy.ts` | On-demand image resizing endpoint |
|
||||||
|
| `cannaiq/src/lib/images.ts` | Frontend image URL helper |
|
||||||
| `docker-compose.local.yml` | Local stack without MinIO |
|
| `docker-compose.local.yml` | Local stack without MinIO |
|
||||||
| `start-local.sh` | Convenience startup script |
|
| `start-local.sh` | Convenience startup script |
|
||||||
|
|
||||||
@@ -489,12 +542,78 @@ import { saveImage, getImageUrl } from '../utils/storage-adapter';
|
|||||||
|
|
||||||
## UI ANONYMIZATION RULES
|
## UI ANONYMIZATION RULES
|
||||||
|
|
||||||
- No vendor names in forward-facing URLs: use `/api/az/...`, `/az`, `/az-schedule`
|
- No vendor names in forward-facing URLs
|
||||||
- No "dutchie", "treez", "jane", "weedmaps", "leafly" visible in consumer UIs
|
- No "dutchie", "treez", "jane", "weedmaps", "leafly" visible in consumer UIs
|
||||||
- Internal admin tools may show provider names for debugging
|
- Internal admin tools may show provider names for debugging
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## DUTCHIE DISCOVERY PIPELINE (Added 2025-01)
|
||||||
|
|
||||||
|
### Overview
|
||||||
|
Automated discovery of Dutchie-powered dispensaries across all US states.
|
||||||
|
|
||||||
|
### Flow
|
||||||
|
```
|
||||||
|
1. getAllCitiesByState GraphQL → Get all cities for a state
|
||||||
|
2. ConsumerDispensaries GraphQL → Get stores for each city
|
||||||
|
3. Upsert to dutchie_discovery_locations (keyed by platform_location_id)
|
||||||
|
4. AUTO-VALIDATE: Check required fields
|
||||||
|
5. AUTO-PROMOTE: Create/update dispensaries with crawl_enabled=true
|
||||||
|
6. Log all actions to dutchie_promotion_log
|
||||||
|
```
|
||||||
|
|
||||||
|
### Tables
|
||||||
|
| Table | Purpose |
|
||||||
|
|-------|---------|
|
||||||
|
| `dutchie_discovery_cities` | Cities known to have dispensaries |
|
||||||
|
| `dutchie_discovery_locations` | Raw discovered store data |
|
||||||
|
| `dispensaries` | Canonical stores (promoted from discovery) |
|
||||||
|
| `dutchie_promotion_log` | Audit trail for validation/promotion |
|
||||||
|
|
||||||
|
### Files
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/discovery/discovery-crawler.ts` | Main orchestrator |
|
||||||
|
| `src/discovery/location-discovery.ts` | GraphQL fetching |
|
||||||
|
| `src/discovery/promotion.ts` | Validation & promotion logic |
|
||||||
|
| `src/scripts/run-discovery.ts` | CLI interface |
|
||||||
|
| `migrations/067_promotion_log.sql` | Audit log table |
|
||||||
|
|
||||||
|
### GraphQL Hashes (in `src/platforms/dutchie/client.ts`)
|
||||||
|
| Query | Hash |
|
||||||
|
|-------|------|
|
||||||
|
| `GetAllCitiesByState` | `ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6` |
|
||||||
|
| `ConsumerDispensaries` | `0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b` |
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
```bash
|
||||||
|
# Discover all stores in a state
|
||||||
|
npx tsx src/scripts/run-discovery.ts discover:state AZ
|
||||||
|
npx tsx src/scripts/run-discovery.ts discover:state CA
|
||||||
|
|
||||||
|
# Check stats
|
||||||
|
npx tsx src/scripts/run-discovery.ts stats
|
||||||
|
```
|
||||||
|
|
||||||
|
### Validation Rules
|
||||||
|
A discovery location must have:
|
||||||
|
- `platform_location_id` (MongoDB ObjectId, 24 hex chars)
|
||||||
|
- `name`
|
||||||
|
- `city`
|
||||||
|
- `state_code`
|
||||||
|
- `platform_menu_url`
|
||||||
|
|
||||||
|
Invalid records are marked `status='rejected'` with errors logged.
|
||||||
|
|
||||||
|
### Key Design Decisions
|
||||||
|
- `platform_location_id` MUST be MongoDB ObjectId (not slug)
|
||||||
|
- Old geo-based discovery stored slugs → deleted as garbage data
|
||||||
|
- Rate limit: 2 seconds between city requests to avoid API throttling
|
||||||
|
- Promotion is idempotent via `ON CONFLICT (platform_dispensary_id)`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## FUTURE TODO / PENDING FEATURES
|
## FUTURE TODO / PENDING FEATURES
|
||||||
|
|
||||||
- [ ] Orchestrator observability dashboard
|
- [ ] Orchestrator observability dashboard
|
||||||
@@ -639,16 +758,19 @@ export default defineConfig({
|
|||||||
|
|
||||||
- **DB**: Use the single CannaiQ database via `CANNAIQ_DB_*` env vars. No hardcoded names.
|
- **DB**: Use the single CannaiQ database via `CANNAIQ_DB_*` env vars. No hardcoded names.
|
||||||
- **Images**: No MinIO. Save to local /images/products/<disp>/<prod>-<hash>.webp (and brands); preserve original URL; serve via backend static.
|
- **Images**: No MinIO. Save to local /images/products/<disp>/<prod>-<hash>.webp (and brands); preserve original URL; serve via backend static.
|
||||||
- **Dutchie GraphQL**: Endpoint https://dutchie.com/api-3/graphql. Variables must use productsFilter.dispensaryId (platform_dispensary_id). Mode A: Status="Active". Mode B: Status=null/activeOnly:false.
|
- **Dutchie GraphQL**: Endpoint https://dutchie.com/api-3/graphql. Variables must use productsFilter.dispensaryId (platform_dispensary_id). **CRITICAL: Use `Status: 'Active'`, NOT `null`** (null returns 0 products).
|
||||||
- **cName/slug**: Derive cName from each store's menu_url (/embedded-menu/<cName> or /dispensary/<slug>). No hardcoded defaults.
|
- **cName/slug**: Derive cName from each store's menu_url (/embedded-menu/<cName> or /dispensary/<slug>). No hardcoded defaults.
|
||||||
- **Dual-mode always**: useBothModes:true to get pricing (Mode A) + full coverage (Mode B).
|
|
||||||
- **Batch DB writes**: Chunk products/snapshots/missing (100–200) to avoid OOM.
|
- **Batch DB writes**: Chunk products/snapshots/missing (100–200) to avoid OOM.
|
||||||
- **OOS/missing**: Include inactive/OOS in Mode B. Union A+B, dedupe by external_product_id+dispensary_id.
|
- **API/Frontend**: Use `/api/stores`, `/api/products`, `/api/workers`, `/api/pipeline` endpoints.
|
||||||
- **API/Frontend**: Use /api/az/... endpoints (stores/products/brands/categories/summary/dashboard).
|
|
||||||
- **Scheduling**: Crawl only menu_type='dutchie' AND platform_dispensary_id IS NOT NULL. 4-hour crawl with jitter.
|
- **Scheduling**: Crawl only menu_type='dutchie' AND platform_dispensary_id IS NOT NULL. 4-hour crawl with jitter.
|
||||||
- **Monitor**: /scraper-monitor (and /az-schedule) should show active/recent jobs from job_run_logs/crawl_jobs.
|
- **THC/CBD values**: Clamp to ≤100 - some products report milligrams as percentages.
|
||||||
|
- **Column names**: Use `name_raw`, `brand_name_raw`, `category_raw`, `subcategory_raw` (NOT `name`, `brand_name`, etc.)
|
||||||
|
|
||||||
|
- **Monitor**: `/api/workers` shows active/recent jobs from job queue.
|
||||||
- **No slug guessing**: Never use defaults. Always derive per store from menu_url and resolve platform IDs per location.
|
- **No slug guessing**: Never use defaults. Always derive per store from menu_url and resolve platform IDs per location.
|
||||||
|
|
||||||
|
**📖 Full Documentation: See `docs/DUTCHIE_CRAWL_WORKFLOW.md` for complete pipeline documentation.**
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Detailed Rules
|
### Detailed Rules
|
||||||
@@ -691,7 +813,7 @@ export default defineConfig({
|
|||||||
- Use dutchie GraphQL pipeline only for `menu_type='dutchie'`.
|
- Use dutchie GraphQL pipeline only for `menu_type='dutchie'`.
|
||||||
|
|
||||||
6) **Frontend**
|
6) **Frontend**
|
||||||
- Forward-facing URLs: `/api/az`, `/az`, `/az-schedule`; no vendor names.
|
- Forward-facing URLs should not contain vendor names.
|
||||||
- `/scraper-schedule`: add filters/search, keep as master view for all schedules; reflect platform ID/menu_type status and controls.
|
- `/scraper-schedule`: add filters/search, keep as master view for all schedules; reflect platform ID/menu_type status and controls.
|
||||||
|
|
||||||
7) **No slug guessing**
|
7) **No slug guessing**
|
||||||
@@ -740,24 +862,27 @@ export default defineConfig({
|
|||||||
|
|
||||||
16) **API Route Semantics**
|
16) **API Route Semantics**
|
||||||
|
|
||||||
**Route Groups:**
|
**Route Groups (as registered in `src/index.ts`):**
|
||||||
- `/api/admin/...` = Admin/operator actions (crawl triggers, health checks)
|
- `/api/stores` = Store/dispensary CRUD and listing
|
||||||
- `/api/az/...` = Arizona data slice (stores, products, metrics)
|
- `/api/products` = Product listing and details
|
||||||
|
- `/api/workers` = Job queue monitoring (replaces legacy `/api/dutchie-az/...`)
|
||||||
|
- `/api/pipeline` = Crawl pipeline triggers
|
||||||
|
- `/api/admin/orchestrator` = Orchestrator admin actions
|
||||||
|
- `/api/discovery` = Platform discovery (Dutchie, etc.)
|
||||||
- `/api/v1/...` = Public API for external consumers (WordPress, etc.)
|
- `/api/v1/...` = Public API for external consumers (WordPress, etc.)
|
||||||
|
|
||||||
**Crawl Trigger (CANONICAL):**
|
**Crawl Trigger:**
|
||||||
```
|
Check `/api/pipeline` or `/api/admin/orchestrator` routes for crawl triggers.
|
||||||
POST /api/admin/crawl/:dispensaryId
|
The legacy `POST /api/admin/crawl/:dispensaryId` does NOT exist.
|
||||||
```
|
|
||||||
|
|
||||||
17) **Monitoring and logging**
|
17) **Monitoring and logging**
|
||||||
- /scraper-monitor (and /az-schedule) should show active/recent jobs from job_run_logs/crawl_jobs
|
- `/api/workers` shows active/recent jobs from job queue
|
||||||
- Auto-refresh every 30 seconds
|
- Auto-refresh every 30 seconds
|
||||||
- System Logs page should show real log data, not just startup messages
|
- System Logs page should show real log data, not just startup messages
|
||||||
|
|
||||||
18) **Dashboard Architecture**
|
18) **Dashboard Architecture**
|
||||||
- **Frontend**: Rebuild the frontend with `VITE_API_URL` pointing to the correct backend and redeploy.
|
- **Frontend**: Rebuild the frontend with `VITE_API_URL` pointing to the correct backend and redeploy.
|
||||||
- **Backend**: `/api/dashboard/stats` MUST use the canonical DB pool. Use the correct tables: `dutchie_products`, `dispensaries`, and views like `v_dashboard_stats`, `v_latest_snapshots`.
|
- **Backend**: `/api/dashboard/stats` MUST use the canonical DB pool. Use the correct tables: `store_products`, `dispensaries`, and views like `v_dashboard_stats`, `v_latest_snapshots`.
|
||||||
|
|
||||||
19) **Deployment (Gitea + Kubernetes)**
|
19) **Deployment (Gitea + Kubernetes)**
|
||||||
- **Registry**: Gitea at `code.cannabrands.app/creationshop/dispensary-scraper`
|
- **Registry**: Gitea at `code.cannabrands.app/creationshop/dispensary-scraper`
|
||||||
@@ -783,8 +908,8 @@ export default defineConfig({
|
|||||||
- **Job schedules** (managed in `job_schedules` table):
|
- **Job schedules** (managed in `job_schedules` table):
|
||||||
- `dutchie_az_menu_detection`: Runs daily with 60-min jitter
|
- `dutchie_az_menu_detection`: Runs daily with 60-min jitter
|
||||||
- `dutchie_az_product_crawl`: Runs every 4 hours with 30-min jitter
|
- `dutchie_az_product_crawl`: Runs every 4 hours with 30-min jitter
|
||||||
- **Trigger schedules**: `curl -X POST /api/az/admin/schedules/{id}/trigger`
|
- **Monitor jobs**: `GET /api/workers`
|
||||||
- **Check schedule status**: `curl /api/az/admin/schedules`
|
- **Trigger crawls**: Check `/api/pipeline` routes
|
||||||
|
|
||||||
21) **Frontend Architecture - AVOID OVER-ENGINEERING**
|
21) **Frontend Architecture - AVOID OVER-ENGINEERING**
|
||||||
|
|
||||||
@@ -1123,3 +1248,32 @@ Every analytics v2 endpoint must:
|
|||||||
---
|
---
|
||||||
|
|
||||||
# END Analytics V2 spec extension
|
# END Analytics V2 spec extension
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## WordPress Plugin Versioning
|
||||||
|
|
||||||
|
The WordPress plugin version is tracked in `wordpress-plugin/VERSION`.
|
||||||
|
|
||||||
|
**Current version:** Check `wordpress-plugin/VERSION` for the latest version.
|
||||||
|
|
||||||
|
**Versioning rules:**
|
||||||
|
- **Minor bumps (x.x.N)**: Bug fixes, small improvements - default for most changes
|
||||||
|
- **Middle bumps (x.N.0)**: New features, significant improvements
|
||||||
|
- **Major bumps (N.0.0)**: Breaking changes, major rewrites - only when user explicitly requests
|
||||||
|
|
||||||
|
**When making WP plugin changes:**
|
||||||
|
1. Read `wordpress-plugin/VERSION` to get current version
|
||||||
|
2. Bump the version number (minor by default)
|
||||||
|
3. Update both files:
|
||||||
|
- `wordpress-plugin/VERSION`
|
||||||
|
- Plugin header `Version:` in `cannaiq-menus.php` and/or `crawlsy-menus.php`
|
||||||
|
- The `define('..._VERSION', '...')` constant in each plugin file
|
||||||
|
|
||||||
|
**Plugin files:**
|
||||||
|
| File | Brand | API URL |
|
||||||
|
|------|-------|---------|
|
||||||
|
| `cannaiq-menus.php` | CannaIQ | `https://cannaiq.co/api/v1` |
|
||||||
|
| `crawlsy-menus.php` | Crawlsy (legacy) | `https://cannaiq.co/api/v1` |
|
||||||
|
|
||||||
|
Both plugins use the same API endpoint. The Crawlsy version exists for backward compatibility with existing installations.
|
||||||
|
|||||||
3
backend/.gitignore
vendored
Normal file
3
backend/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
|
||||||
|
# IP2Location database (downloaded separately)
|
||||||
|
data/ip2location/
|
||||||
@@ -1,17 +1,17 @@
|
|||||||
# Build stage
|
# Build stage
|
||||||
# Image: code.cannabrands.app/creationshop/dispensary-scraper
|
# Image: code.cannabrands.app/creationshop/dispensary-scraper
|
||||||
FROM node:20-slim AS builder
|
FROM code.cannabrands.app/creationshop/node:20-slim AS builder
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm ci
|
RUN npm install
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# Production stage
|
# Production stage
|
||||||
FROM node:20-slim
|
FROM code.cannabrands.app/creationshop/node:20-slim
|
||||||
|
|
||||||
# Build arguments for version info
|
# Build arguments for version info
|
||||||
ARG APP_BUILD_VERSION=dev
|
ARG APP_BUILD_VERSION=dev
|
||||||
@@ -43,10 +43,13 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
|||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm ci --omit=dev
|
RUN npm install --omit=dev
|
||||||
|
|
||||||
COPY --from=builder /app/dist ./dist
|
COPY --from=builder /app/dist ./dist
|
||||||
|
|
||||||
|
# Copy migrations for auto-migrate on startup
|
||||||
|
COPY migrations ./migrations
|
||||||
|
|
||||||
# Create local images directory for when MinIO is not configured
|
# Create local images directory for when MinIO is not configured
|
||||||
RUN mkdir -p /app/public/images/products
|
RUN mkdir -p /app/public/images/products
|
||||||
|
|
||||||
|
|||||||
538
backend/docs/CRAWL_PIPELINE.md
Normal file
538
backend/docs/CRAWL_PIPELINE.md
Normal file
@@ -0,0 +1,538 @@
|
|||||||
|
# Crawl Pipeline Documentation
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The crawl pipeline fetches product data from Dutchie dispensary menus and stores it in the canonical database. This document covers the complete flow from task scheduling to data storage.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Pipeline Stages
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ store_discovery │ Find new dispensaries
|
||||||
|
└─────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ entry_point_discovery│ Resolve slug → platform_dispensary_id
|
||||||
|
└─────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ product_discovery │ Initial product crawl
|
||||||
|
└─────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────┐
|
||||||
|
│ product_resync │ Recurring crawl (every 4 hours)
|
||||||
|
└─────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Stage Details
|
||||||
|
|
||||||
|
### 1. Store Discovery
|
||||||
|
**Purpose:** Find new dispensaries to crawl
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/store-discovery.ts`
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
1. Query Dutchie `ConsumerDispensaries` GraphQL for cities/states
|
||||||
|
2. Extract dispensary info (name, address, menu_url)
|
||||||
|
3. Insert into `dutchie_discovery_locations`
|
||||||
|
4. Queue `entry_point_discovery` for each new location
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2. Entry Point Discovery
|
||||||
|
**Purpose:** Resolve menu URL slug to platform_dispensary_id (MongoDB ObjectId)
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/entry-point-discovery.ts`
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
1. Load dispensary from database
|
||||||
|
2. Extract slug from `menu_url`:
|
||||||
|
- `/embedded-menu/<slug>` or `/dispensary/<slug>`
|
||||||
|
3. Start stealth session (fingerprint + proxy)
|
||||||
|
4. Query `resolveDispensaryIdWithDetails(slug)` via GraphQL
|
||||||
|
5. Update dispensary with `platform_dispensary_id`
|
||||||
|
6. Queue `product_discovery` task
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```
|
||||||
|
menu_url: https://dutchie.com/embedded-menu/deeply-rooted
|
||||||
|
slug: deeply-rooted
|
||||||
|
platform_dispensary_id: 6405ef617056e8014d79101b
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3. Product Discovery
|
||||||
|
**Purpose:** Initial crawl of a new dispensary
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/product-discovery.ts`
|
||||||
|
|
||||||
|
Same as product_resync but for first-time crawls.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 4. Product Resync
|
||||||
|
**Purpose:** Recurring crawl to capture price/stock changes
|
||||||
|
|
||||||
|
**Handler:** `src/tasks/handlers/product-resync.ts`
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
|
||||||
|
#### Step 1: Load Dispensary Info
|
||||||
|
```sql
|
||||||
|
SELECT id, name, platform_dispensary_id, menu_url, state
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1 AND crawl_enabled = true
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Step 2: Start Stealth Session
|
||||||
|
- Generate random browser fingerprint
|
||||||
|
- Set locale/timezone matching state
|
||||||
|
- Optional proxy rotation
|
||||||
|
|
||||||
|
#### Step 3: Fetch Products via GraphQL
|
||||||
|
**Endpoint:** `https://dutchie.com/api-3/graphql`
|
||||||
|
|
||||||
|
**Variables:**
|
||||||
|
```javascript
|
||||||
|
{
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId: "<platform_dispensary_id>",
|
||||||
|
pricingType: "rec",
|
||||||
|
Status: "All",
|
||||||
|
types: [],
|
||||||
|
useCache: false,
|
||||||
|
isDefaultSort: true,
|
||||||
|
sortBy: "popularSortIdx",
|
||||||
|
sortDirection: 1,
|
||||||
|
bypassOnlineThresholds: true,
|
||||||
|
isKioskMenu: false,
|
||||||
|
removeProductsBelowOptionThresholds: false
|
||||||
|
},
|
||||||
|
page: 0,
|
||||||
|
perPage: 100
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key Notes:**
|
||||||
|
- `Status: "All"` returns all products (Active returns same count)
|
||||||
|
- `Status: null` returns 0 products (broken)
|
||||||
|
- `pricingType: "rec"` returns BOTH rec and med prices
|
||||||
|
- Paginate until `products.length < perPage` or `allProducts.length >= totalCount`
|
||||||
|
|
||||||
|
#### Step 4: Normalize Data
|
||||||
|
Transform raw Dutchie payload to canonical format via `DutchieNormalizer`.
|
||||||
|
|
||||||
|
#### Step 5: Upsert Products
|
||||||
|
Insert/update `store_products` table with normalized data.
|
||||||
|
|
||||||
|
#### Step 6: Create Snapshots
|
||||||
|
Insert point-in-time record to `store_product_snapshots`.
|
||||||
|
|
||||||
|
#### Step 7: Track Missing Products (OOS Detection)
|
||||||
|
```sql
|
||||||
|
-- Reset consecutive_misses for products IN the feed
|
||||||
|
UPDATE store_products
|
||||||
|
SET consecutive_misses = 0, last_seen_at = NOW()
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND provider = 'dutchie'
|
||||||
|
AND provider_product_id = ANY($2)
|
||||||
|
|
||||||
|
-- Increment for products NOT in feed
|
||||||
|
UPDATE store_products
|
||||||
|
SET consecutive_misses = consecutive_misses + 1
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND provider = 'dutchie'
|
||||||
|
AND provider_product_id NOT IN (...)
|
||||||
|
AND consecutive_misses < 3
|
||||||
|
|
||||||
|
-- Mark OOS at 3 consecutive misses
|
||||||
|
UPDATE store_products
|
||||||
|
SET stock_status = 'oos', is_in_stock = false
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
AND consecutive_misses >= 3
|
||||||
|
AND stock_status != 'oos'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Step 8: Download Images
|
||||||
|
For new products, download and store images locally.
|
||||||
|
|
||||||
|
#### Step 9: Update Dispensary
|
||||||
|
```sql
|
||||||
|
UPDATE dispensaries SET last_crawl_at = NOW() WHERE id = $1
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## GraphQL Payload Structure
|
||||||
|
|
||||||
|
### Product Fields (from filteredProducts.products[])
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `_id` / `id` | string | MongoDB ObjectId (24 hex chars) |
|
||||||
|
| `Name` | string | Product display name |
|
||||||
|
| `brandName` | string | Brand name |
|
||||||
|
| `brand.name` | string | Brand name (nested) |
|
||||||
|
| `brand.description` | string | Brand description |
|
||||||
|
| `type` | string | Category (Flower, Edible, Concentrate, etc.) |
|
||||||
|
| `subcategory` | string | Subcategory |
|
||||||
|
| `strainType` | string | Hybrid, Indica, Sativa, N/A |
|
||||||
|
| `Status` | string | Always "Active" in feed |
|
||||||
|
| `Image` | string | Primary image URL |
|
||||||
|
| `images[]` | array | All product images |
|
||||||
|
|
||||||
|
### Pricing Fields
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `Prices[]` | number[] | Rec prices per option |
|
||||||
|
| `recPrices[]` | number[] | Rec prices |
|
||||||
|
| `medicalPrices[]` | number[] | Medical prices |
|
||||||
|
| `recSpecialPrices[]` | number[] | Rec sale prices |
|
||||||
|
| `medicalSpecialPrices[]` | number[] | Medical sale prices |
|
||||||
|
| `Options[]` | string[] | Size options ("1/8oz", "1g", etc.) |
|
||||||
|
| `rawOptions[]` | string[] | Raw weight options ("3.5g") |
|
||||||
|
|
||||||
|
### Inventory Fields (POSMetaData.children[])
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `quantity` | number | Total inventory count |
|
||||||
|
| `quantityAvailable` | number | Available for online orders |
|
||||||
|
| `kioskQuantityAvailable` | number | Available for kiosk orders |
|
||||||
|
| `option` | string | Which size option this is for |
|
||||||
|
|
||||||
|
### Potency Fields
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `THCContent.range[]` | number[] | THC percentage |
|
||||||
|
| `CBDContent.range[]` | number[] | CBD percentage |
|
||||||
|
| `cannabinoidsV2[]` | array | Detailed cannabinoid breakdown |
|
||||||
|
|
||||||
|
### Specials (specialData.bogoSpecials[])
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| `specialName` | string | Deal name |
|
||||||
|
| `specialType` | string | "bogo", "sale", etc. |
|
||||||
|
| `itemsForAPrice.value` | string | Bundle price |
|
||||||
|
| `bogoRewards[].totalQuantity.quantity` | number | Required quantity |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## OOS Detection Logic
|
||||||
|
|
||||||
|
Products disappear from the Dutchie feed when they go out of stock. We track this via `consecutive_misses`:
|
||||||
|
|
||||||
|
| Scenario | Action |
|
||||||
|
|----------|--------|
|
||||||
|
| Product in feed | `consecutive_misses = 0` |
|
||||||
|
| Product missing 1st time | `consecutive_misses = 1` |
|
||||||
|
| Product missing 2nd time | `consecutive_misses = 2` |
|
||||||
|
| Product missing 3rd time | `consecutive_misses = 3`, mark `stock_status = 'oos'` |
|
||||||
|
| Product returns to feed | `consecutive_misses = 0`, update stock_status |
|
||||||
|
|
||||||
|
**Why 3 misses?**
|
||||||
|
- Protects against false positives from crawl failures
|
||||||
|
- Single bad crawl doesn't trigger mass OOS alerts
|
||||||
|
- Balances detection speed vs accuracy
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Database Tables
|
||||||
|
|
||||||
|
### store_products
|
||||||
|
Current state of each product:
|
||||||
|
- `provider_product_id` - Dutchie's MongoDB ObjectId
|
||||||
|
- `name_raw`, `brand_name_raw` - Raw values from feed
|
||||||
|
- `price_rec`, `price_med` - Current prices
|
||||||
|
- `is_in_stock`, `stock_status` - Availability
|
||||||
|
- `consecutive_misses` - OOS detection counter
|
||||||
|
- `last_seen_at` - Last time product was in feed
|
||||||
|
|
||||||
|
### store_product_snapshots
|
||||||
|
Point-in-time records for historical analysis:
|
||||||
|
- One row per product per crawl
|
||||||
|
- Captures price, stock, potency at that moment
|
||||||
|
- Used for price history, analytics
|
||||||
|
|
||||||
|
### dispensaries
|
||||||
|
Store metadata:
|
||||||
|
- `platform_dispensary_id` - MongoDB ObjectId for GraphQL
|
||||||
|
- `menu_url` - Source URL
|
||||||
|
- `last_crawl_at` - Last successful crawl
|
||||||
|
- `crawl_enabled` - Whether to crawl
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Worker Roles
|
||||||
|
|
||||||
|
Workers pull tasks from the `worker_tasks` queue based on their assigned role.
|
||||||
|
|
||||||
|
| Role | Name | Description | Handler |
|
||||||
|
|------|------|-------------|---------|
|
||||||
|
| `product_resync` | Product Resync | Re-crawl dispensary products for price/stock changes | `handleProductResync` |
|
||||||
|
| `product_discovery` | Product Discovery | Initial product discovery for new dispensaries | `handleProductDiscovery` |
|
||||||
|
| `store_discovery` | Store Discovery | Discover new dispensary locations | `handleStoreDiscovery` |
|
||||||
|
| `entry_point_discovery` | Entry Point Discovery | Resolve platform IDs from menu URLs | `handleEntryPointDiscovery` |
|
||||||
|
| `analytics_refresh` | Analytics Refresh | Refresh materialized views and analytics | `handleAnalyticsRefresh` |
|
||||||
|
|
||||||
|
**API Endpoint:** `GET /api/worker-registry/roles`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Scheduling
|
||||||
|
|
||||||
|
Crawls are scheduled via `worker_tasks` table:
|
||||||
|
|
||||||
|
| Role | Frequency | Description |
|
||||||
|
|------|-----------|-------------|
|
||||||
|
| `product_resync` | Every 4 hours | Regular product refresh |
|
||||||
|
| `product_discovery` | On-demand | First crawl for new stores |
|
||||||
|
| `entry_point_discovery` | On-demand | New store setup |
|
||||||
|
| `store_discovery` | Daily | Find new stores |
|
||||||
|
| `analytics_refresh` | Daily | Refresh analytics materialized views |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Priority & On-Demand Tasks
|
||||||
|
|
||||||
|
Tasks are claimed by workers in order of **priority DESC, created_at ASC**.
|
||||||
|
|
||||||
|
### Priority Levels
|
||||||
|
|
||||||
|
| Priority | Use Case | Example |
|
||||||
|
|----------|----------|---------|
|
||||||
|
| 0 | Scheduled/batch tasks | Daily product_resync generation |
|
||||||
|
| 10 | On-demand/chained tasks | entry_point → product_discovery |
|
||||||
|
| Higher | Urgent/manual triggers | Admin-triggered immediate crawl |
|
||||||
|
|
||||||
|
### Task Chaining
|
||||||
|
|
||||||
|
When a task completes, the system automatically creates follow-up tasks:
|
||||||
|
|
||||||
|
```
|
||||||
|
store_discovery (completed)
|
||||||
|
└─► entry_point_discovery (priority: 10) for each new store
|
||||||
|
|
||||||
|
entry_point_discovery (completed, success)
|
||||||
|
└─► product_discovery (priority: 10) for that store
|
||||||
|
|
||||||
|
product_discovery (completed)
|
||||||
|
└─► [no chain] Store enters regular resync schedule
|
||||||
|
```
|
||||||
|
|
||||||
|
### On-Demand Task Creation
|
||||||
|
|
||||||
|
Use the task service to create high-priority tasks:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Create immediate product resync for a store
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_resync',
|
||||||
|
dispensary_id: 123,
|
||||||
|
platform: 'dutchie',
|
||||||
|
priority: 20, // Higher than batch tasks
|
||||||
|
});
|
||||||
|
|
||||||
|
// Convenience methods with default high priority (10)
|
||||||
|
await taskService.createEntryPointTask(dispensaryId, 'dutchie');
|
||||||
|
await taskService.createProductDiscoveryTask(dispensaryId, 'dutchie');
|
||||||
|
await taskService.createStoreDiscoveryTask('dutchie', 'AZ');
|
||||||
|
```
|
||||||
|
|
||||||
|
### Claim Function
|
||||||
|
|
||||||
|
The `claim_task()` SQL function atomically claims tasks:
|
||||||
|
- Respects priority ordering (higher = first)
|
||||||
|
- Uses `FOR UPDATE SKIP LOCKED` for concurrency
|
||||||
|
- Prevents multiple active tasks per store
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Image Storage
|
||||||
|
|
||||||
|
Images are downloaded from Dutchie's AWS S3 and stored locally with on-demand resizing.
|
||||||
|
|
||||||
|
### Storage Path
|
||||||
|
```
|
||||||
|
/storage/images/products/<state>/<store>/<brand>/<product_id>/image-<hash>.webp
|
||||||
|
/storage/images/brands/<brand>/logo-<hash>.webp
|
||||||
|
```
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```
|
||||||
|
/storage/images/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp
|
||||||
|
```
|
||||||
|
|
||||||
|
### Image Proxy API
|
||||||
|
Served via `/img/*` with on-demand resizing using **sharp**:
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /img/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp?w=200
|
||||||
|
```
|
||||||
|
|
||||||
|
| Param | Description |
|
||||||
|
|-------|-------------|
|
||||||
|
| `w` | Width in pixels (max 4000) |
|
||||||
|
| `h` | Height in pixels (max 4000) |
|
||||||
|
| `q` | Quality 1-100 (default 80) |
|
||||||
|
| `fit` | cover, contain, fill, inside, outside |
|
||||||
|
| `blur` | Blur sigma (0.3-1000) |
|
||||||
|
| `gray` | Grayscale (1 = enabled) |
|
||||||
|
| `format` | webp, jpeg, png, avif (default webp) |
|
||||||
|
|
||||||
|
### Key Files
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/utils/image-storage.ts` | Download & save images to local filesystem |
|
||||||
|
| `src/routes/image-proxy.ts` | On-demand resize/transform at `/img/*` |
|
||||||
|
|
||||||
|
### Download Rules
|
||||||
|
|
||||||
|
| Scenario | Image Action |
|
||||||
|
|----------|--------------|
|
||||||
|
| **New product (first crawl)** | Download if `primaryImageUrl` exists |
|
||||||
|
| **Existing product (refresh)** | Download only if `local_image_path` is NULL (backfill) |
|
||||||
|
| **Product already has local image** | Skip download entirely |
|
||||||
|
|
||||||
|
**Logic:**
|
||||||
|
- Images are downloaded **once** and never re-downloaded on subsequent crawls
|
||||||
|
- `skipIfExists: true` - filesystem check prevents re-download even if queued
|
||||||
|
- First crawl: all products get images
|
||||||
|
- Refresh crawl: only new products or products missing local images
|
||||||
|
|
||||||
|
### Storage Rules
|
||||||
|
- **NO MinIO** - local filesystem only (`STORAGE_DRIVER=local`)
|
||||||
|
- Store full resolution, resize on-demand via `/img` proxy
|
||||||
|
- Convert to webp for consistency using **sharp**
|
||||||
|
- Preserve original Dutchie URL as fallback in `image_url` column
|
||||||
|
- Local path stored in `local_image_path` column
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Stealth & Anti-Detection
|
||||||
|
|
||||||
|
**PROXIES ARE REQUIRED** - Workers will fail to start if no active proxies are available in the database. All HTTP requests to Dutchie go through a proxy.
|
||||||
|
|
||||||
|
Workers automatically initialize anti-detection systems on startup.
|
||||||
|
|
||||||
|
### Components
|
||||||
|
|
||||||
|
| Component | Purpose | Source |
|
||||||
|
|-----------|---------|--------|
|
||||||
|
| **CrawlRotator** | Coordinates proxy + UA rotation | `src/services/crawl-rotator.ts` |
|
||||||
|
| **ProxyRotator** | Round-robin proxy selection, health tracking | `src/services/crawl-rotator.ts` |
|
||||||
|
| **UserAgentRotator** | Cycles through realistic browser fingerprints | `src/services/crawl-rotator.ts` |
|
||||||
|
| **Dutchie Client** | Curl-based HTTP with auto-retry on 403 | `src/platforms/dutchie/client.ts` |
|
||||||
|
|
||||||
|
### Initialization Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Worker Start
|
||||||
|
│
|
||||||
|
├─► initializeStealth()
|
||||||
|
│ │
|
||||||
|
│ ├─► CrawlRotator.initialize()
|
||||||
|
│ │ └─► Load proxies from `proxies` table
|
||||||
|
│ │
|
||||||
|
│ └─► setCrawlRotator(rotator)
|
||||||
|
│ └─► Wire to Dutchie client
|
||||||
|
│
|
||||||
|
└─► Process tasks...
|
||||||
|
```
|
||||||
|
|
||||||
|
### Stealth Session (per task)
|
||||||
|
|
||||||
|
Each crawl task starts a stealth session:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// In product-refresh.ts, entry-point-discovery.ts
|
||||||
|
const session = startSession(dispensary.state || 'AZ', 'America/Phoenix');
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates a new identity with:
|
||||||
|
- **Random fingerprint:** Chrome/Firefox/Safari/Edge on Win/Mac/Linux
|
||||||
|
- **Accept-Language:** Matches timezone (e.g., `America/Phoenix` → `en-US,en;q=0.9`)
|
||||||
|
- **sec-ch-ua headers:** Proper Client Hints for the browser profile
|
||||||
|
|
||||||
|
### On 403 Block
|
||||||
|
|
||||||
|
When Dutchie returns 403, the client automatically:
|
||||||
|
|
||||||
|
1. Records failure on current proxy (increments `failure_count`)
|
||||||
|
2. If proxy has 5+ failures, deactivates it
|
||||||
|
3. Rotates to next healthy proxy
|
||||||
|
4. Rotates fingerprint
|
||||||
|
5. Retries the request
|
||||||
|
|
||||||
|
### Proxy Table Schema
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE proxies (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
host VARCHAR(255) NOT NULL,
|
||||||
|
port INTEGER NOT NULL,
|
||||||
|
username VARCHAR(100),
|
||||||
|
password VARCHAR(100),
|
||||||
|
protocol VARCHAR(10) DEFAULT 'http', -- http, https, socks5
|
||||||
|
is_active BOOLEAN DEFAULT true,
|
||||||
|
last_used_at TIMESTAMPTZ,
|
||||||
|
failure_count INTEGER DEFAULT 0,
|
||||||
|
success_count INTEGER DEFAULT 0,
|
||||||
|
avg_response_time_ms INTEGER,
|
||||||
|
last_failure_at TIMESTAMPTZ,
|
||||||
|
last_error TEXT
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
Proxies are mandatory. There is no environment variable to disable them. Workers will refuse to start without active proxies in the database.
|
||||||
|
|
||||||
|
### Fingerprints Available
|
||||||
|
|
||||||
|
The client includes 6 browser fingerprints:
|
||||||
|
- Chrome 131 on Windows
|
||||||
|
- Chrome 131 on macOS
|
||||||
|
- Chrome 120 on Windows
|
||||||
|
- Firefox 133 on Windows
|
||||||
|
- Safari 17.2 on macOS
|
||||||
|
- Edge 131 on Windows
|
||||||
|
|
||||||
|
Each includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
- **GraphQL errors:** Logged, task marked failed, retried later
|
||||||
|
- **Normalization errors:** Logged as warnings, continue with valid products
|
||||||
|
- **Image download errors:** Non-fatal, logged, continue
|
||||||
|
- **Database errors:** Task fails, will be retried
|
||||||
|
- **403 blocks:** Auto-rotate proxy + fingerprint, retry (up to 3 retries)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/tasks/handlers/product-resync.ts` | Main crawl handler |
|
||||||
|
| `src/tasks/handlers/entry-point-discovery.ts` | Slug → ID resolution |
|
||||||
|
| `src/platforms/dutchie/index.ts` | GraphQL client, session management |
|
||||||
|
| `src/hydration/normalizers/dutchie.ts` | Payload normalization |
|
||||||
|
| `src/hydration/canonical-upsert.ts` | Database upsert logic |
|
||||||
|
| `src/utils/image-storage.ts` | Image download and local storage |
|
||||||
|
| `src/routes/image-proxy.ts` | On-demand image resizing |
|
||||||
|
| `migrations/075_consecutive_misses.sql` | OOS tracking column |
|
||||||
400
backend/docs/WORKER_TASK_ARCHITECTURE.md
Normal file
400
backend/docs/WORKER_TASK_ARCHITECTURE.md
Normal file
@@ -0,0 +1,400 @@
|
|||||||
|
# Worker Task Architecture
|
||||||
|
|
||||||
|
This document describes the unified task-based worker system that replaces the legacy fragmented job systems.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The task worker architecture provides a single, unified system for managing all background work in CannaiQ:
|
||||||
|
|
||||||
|
- **Store discovery** - Find new dispensaries on platforms
|
||||||
|
- **Entry point discovery** - Resolve platform IDs from menu URLs
|
||||||
|
- **Product discovery** - Initial product fetch for new stores
|
||||||
|
- **Product resync** - Regular price/stock updates for existing stores
|
||||||
|
- **Analytics refresh** - Refresh materialized views and analytics
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Database Tables
|
||||||
|
|
||||||
|
**`worker_tasks`** - Central task queue
|
||||||
|
```sql
|
||||||
|
CREATE TABLE worker_tasks (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
role task_role NOT NULL, -- What type of work
|
||||||
|
dispensary_id INTEGER, -- Which store (if applicable)
|
||||||
|
platform VARCHAR(50), -- Which platform (dutchie, etc.)
|
||||||
|
status task_status DEFAULT 'pending',
|
||||||
|
priority INTEGER DEFAULT 0, -- Higher = process first
|
||||||
|
scheduled_for TIMESTAMP, -- Don't process before this time
|
||||||
|
worker_id VARCHAR(100), -- Which worker claimed it
|
||||||
|
claimed_at TIMESTAMP,
|
||||||
|
started_at TIMESTAMP,
|
||||||
|
completed_at TIMESTAMP,
|
||||||
|
last_heartbeat_at TIMESTAMP, -- For stale detection
|
||||||
|
result JSONB, -- Output from handler
|
||||||
|
error_message TEXT,
|
||||||
|
retry_count INTEGER DEFAULT 0,
|
||||||
|
max_retries INTEGER DEFAULT 3,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key indexes:**
|
||||||
|
- `idx_worker_tasks_pending_priority` - For efficient task claiming
|
||||||
|
- `idx_worker_tasks_active_dispensary` - Prevents concurrent tasks per store (partial unique index)
|
||||||
|
|
||||||
|
### Task Roles
|
||||||
|
|
||||||
|
| Role | Purpose | Per-Store | Scheduled |
|
||||||
|
|------|---------|-----------|-----------|
|
||||||
|
| `store_discovery` | Find new stores on a platform | No | Daily |
|
||||||
|
| `entry_point_discovery` | Resolve platform IDs | Yes | On-demand |
|
||||||
|
| `product_discovery` | Initial product fetch | Yes | After entry_point |
|
||||||
|
| `product_resync` | Price/stock updates | Yes | Every 4 hours |
|
||||||
|
| `analytics_refresh` | Refresh MVs | No | Daily |
|
||||||
|
|
||||||
|
### Task Lifecycle
|
||||||
|
|
||||||
|
```
|
||||||
|
pending → claimed → running → completed
|
||||||
|
↓
|
||||||
|
failed
|
||||||
|
```
|
||||||
|
|
||||||
|
1. **pending** - Task is waiting to be picked up
|
||||||
|
2. **claimed** - Worker has claimed it (atomic via SELECT FOR UPDATE SKIP LOCKED)
|
||||||
|
3. **running** - Worker is actively processing
|
||||||
|
4. **completed** - Task finished successfully
|
||||||
|
5. **failed** - Task encountered an error
|
||||||
|
6. **stale** - Task lost its worker (recovered automatically)
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
### Core Files
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/tasks/task-service.ts` | TaskService - CRUD, claiming, capacity metrics |
|
||||||
|
| `src/tasks/task-worker.ts` | TaskWorker - Main worker loop |
|
||||||
|
| `src/tasks/index.ts` | Module exports |
|
||||||
|
| `src/routes/tasks.ts` | API endpoints |
|
||||||
|
| `migrations/074_worker_task_queue.sql` | Database schema |
|
||||||
|
|
||||||
|
### Task Handlers
|
||||||
|
|
||||||
|
| File | Role |
|
||||||
|
|------|------|
|
||||||
|
| `src/tasks/handlers/store-discovery.ts` | `store_discovery` |
|
||||||
|
| `src/tasks/handlers/entry-point-discovery.ts` | `entry_point_discovery` |
|
||||||
|
| `src/tasks/handlers/product-discovery.ts` | `product_discovery` |
|
||||||
|
| `src/tasks/handlers/product-resync.ts` | `product_resync` |
|
||||||
|
| `src/tasks/handlers/analytics-refresh.ts` | `analytics_refresh` |
|
||||||
|
|
||||||
|
## Running Workers
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `WORKER_ROLE` | (required) | Which task role to process |
|
||||||
|
| `WORKER_ID` | auto-generated | Custom worker identifier |
|
||||||
|
| `POLL_INTERVAL_MS` | 5000 | How often to check for tasks |
|
||||||
|
| `HEARTBEAT_INTERVAL_MS` | 30000 | How often to update heartbeat |
|
||||||
|
|
||||||
|
### Starting a Worker
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start a product resync worker
|
||||||
|
WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts
|
||||||
|
|
||||||
|
# Start with custom ID
|
||||||
|
WORKER_ROLE=product_resync WORKER_ID=resync-1 npx tsx src/tasks/task-worker.ts
|
||||||
|
|
||||||
|
# Start multiple workers for different roles
|
||||||
|
WORKER_ROLE=store_discovery npx tsx src/tasks/task-worker.ts &
|
||||||
|
WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts &
|
||||||
|
```
|
||||||
|
|
||||||
|
### Kubernetes Deployment
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: task-worker-resync
|
||||||
|
spec:
|
||||||
|
replicas: 3
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: worker
|
||||||
|
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
|
||||||
|
command: ["npx", "tsx", "src/tasks/task-worker.ts"]
|
||||||
|
env:
|
||||||
|
- name: WORKER_ROLE
|
||||||
|
value: "product_resync"
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Task Management
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks` | GET | List tasks with filters |
|
||||||
|
| `/api/tasks` | POST | Create a new task |
|
||||||
|
| `/api/tasks/:id` | GET | Get task by ID |
|
||||||
|
| `/api/tasks/counts` | GET | Get counts by status |
|
||||||
|
| `/api/tasks/capacity` | GET | Get capacity metrics |
|
||||||
|
| `/api/tasks/capacity/:role` | GET | Get role-specific capacity |
|
||||||
|
| `/api/tasks/recover-stale` | POST | Recover tasks from dead workers |
|
||||||
|
|
||||||
|
### Task Generation
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks/generate/resync` | POST | Generate daily resync tasks |
|
||||||
|
| `/api/tasks/generate/discovery` | POST | Create store discovery task |
|
||||||
|
|
||||||
|
### Migration (from legacy systems)
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks/migration/status` | GET | Compare old vs new systems |
|
||||||
|
| `/api/tasks/migration/disable-old-schedules` | POST | Disable job_schedules |
|
||||||
|
| `/api/tasks/migration/cancel-pending-crawl-jobs` | POST | Cancel old crawl jobs |
|
||||||
|
| `/api/tasks/migration/create-resync-tasks` | POST | Create tasks for all stores |
|
||||||
|
| `/api/tasks/migration/full-migrate` | POST | One-click migration |
|
||||||
|
|
||||||
|
### Role-Specific Endpoints
|
||||||
|
|
||||||
|
| Endpoint | Method | Description |
|
||||||
|
|----------|--------|-------------|
|
||||||
|
| `/api/tasks/role/:role/last-completion` | GET | Last completion time |
|
||||||
|
| `/api/tasks/role/:role/recent` | GET | Recent completions |
|
||||||
|
| `/api/tasks/store/:id/active` | GET | Check if store has active task |
|
||||||
|
|
||||||
|
## Capacity Planning
|
||||||
|
|
||||||
|
The `v_worker_capacity` view provides real-time metrics:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM v_worker_capacity;
|
||||||
|
```
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- `pending_tasks` - Tasks waiting to be claimed
|
||||||
|
- `ready_tasks` - Tasks ready now (scheduled_for is null or past)
|
||||||
|
- `claimed_tasks` - Tasks claimed but not started
|
||||||
|
- `running_tasks` - Tasks actively processing
|
||||||
|
- `completed_last_hour` - Recent completions
|
||||||
|
- `failed_last_hour` - Recent failures
|
||||||
|
- `active_workers` - Workers with recent heartbeats
|
||||||
|
- `avg_duration_sec` - Average task duration
|
||||||
|
- `tasks_per_worker_hour` - Throughput estimate
|
||||||
|
- `estimated_hours_to_drain` - Time to clear queue
|
||||||
|
|
||||||
|
### Scaling Recommendations
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// API: GET /api/tasks/capacity/:role
|
||||||
|
{
|
||||||
|
"role": "product_resync",
|
||||||
|
"pending_tasks": 500,
|
||||||
|
"active_workers": 3,
|
||||||
|
"workers_needed": {
|
||||||
|
"for_1_hour": 10,
|
||||||
|
"for_4_hours": 3,
|
||||||
|
"for_8_hours": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Task Chaining
|
||||||
|
|
||||||
|
Tasks can automatically create follow-up tasks:
|
||||||
|
|
||||||
|
```
|
||||||
|
store_discovery → entry_point_discovery → product_discovery
|
||||||
|
↓
|
||||||
|
(store has platform_dispensary_id)
|
||||||
|
↓
|
||||||
|
Daily resync tasks
|
||||||
|
```
|
||||||
|
|
||||||
|
The `chainNextTask()` method handles this automatically.
|
||||||
|
|
||||||
|
## Stale Task Recovery
|
||||||
|
|
||||||
|
Tasks are considered stale if `last_heartbeat_at` is older than the threshold (default 10 minutes).
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT recover_stale_tasks(10); -- 10 minute threshold
|
||||||
|
```
|
||||||
|
|
||||||
|
Or via API:
|
||||||
|
```bash
|
||||||
|
curl -X POST /api/tasks/recover-stale \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"threshold_minutes": 10}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Migration from Legacy Systems
|
||||||
|
|
||||||
|
### Legacy Systems Replaced
|
||||||
|
|
||||||
|
1. **job_schedules + job_run_logs** - Scheduled job definitions
|
||||||
|
2. **dispensary_crawl_jobs** - Per-dispensary crawl queue
|
||||||
|
3. **SyncOrchestrator + HydrationWorker** - Raw payload processing
|
||||||
|
|
||||||
|
### Migration Steps
|
||||||
|
|
||||||
|
**Option 1: One-Click Migration**
|
||||||
|
```bash
|
||||||
|
curl -X POST /api/tasks/migration/full-migrate
|
||||||
|
```
|
||||||
|
|
||||||
|
This will:
|
||||||
|
1. Disable all job_schedules
|
||||||
|
2. Cancel pending dispensary_crawl_jobs
|
||||||
|
3. Generate resync tasks for all stores
|
||||||
|
4. Create discovery and analytics tasks
|
||||||
|
|
||||||
|
**Option 2: Manual Migration**
|
||||||
|
```bash
|
||||||
|
# 1. Check current status
|
||||||
|
curl /api/tasks/migration/status
|
||||||
|
|
||||||
|
# 2. Disable old schedules
|
||||||
|
curl -X POST /api/tasks/migration/disable-old-schedules
|
||||||
|
|
||||||
|
# 3. Cancel pending crawl jobs
|
||||||
|
curl -X POST /api/tasks/migration/cancel-pending-crawl-jobs
|
||||||
|
|
||||||
|
# 4. Create resync tasks
|
||||||
|
curl -X POST /api/tasks/migration/create-resync-tasks \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"state_code": "AZ"}'
|
||||||
|
|
||||||
|
# 5. Generate daily resync schedule
|
||||||
|
curl -X POST /api/tasks/generate/resync \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"batches_per_day": 6}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Per-Store Locking
|
||||||
|
|
||||||
|
The system prevents concurrent tasks for the same store using a partial unique index:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE UNIQUE INDEX idx_worker_tasks_active_dispensary
|
||||||
|
ON worker_tasks (dispensary_id)
|
||||||
|
WHERE dispensary_id IS NOT NULL
|
||||||
|
AND status IN ('claimed', 'running');
|
||||||
|
```
|
||||||
|
|
||||||
|
This ensures only one task can be active per store at any time.
|
||||||
|
|
||||||
|
## Task Priority
|
||||||
|
|
||||||
|
Tasks are claimed in priority order (higher first), then by creation time:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ORDER BY priority DESC, created_at ASC
|
||||||
|
```
|
||||||
|
|
||||||
|
Default priorities:
|
||||||
|
- `store_discovery`: 0
|
||||||
|
- `entry_point_discovery`: 10 (high - new stores)
|
||||||
|
- `product_discovery`: 10 (high - new stores)
|
||||||
|
- `product_resync`: 0
|
||||||
|
- `analytics_refresh`: 0
|
||||||
|
|
||||||
|
## Scheduled Tasks
|
||||||
|
|
||||||
|
Tasks can be scheduled for future execution:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_resync',
|
||||||
|
dispensary_id: 123,
|
||||||
|
scheduled_for: new Date('2025-01-10T06:00:00Z'),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
The `generate_resync_tasks()` function creates staggered tasks throughout the day:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT generate_resync_tasks(6, '2025-01-10'); -- 6 batches = every 4 hours
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dashboard Integration
|
||||||
|
|
||||||
|
The admin dashboard shows task queue status in the main overview:
|
||||||
|
|
||||||
|
```
|
||||||
|
Task Queue Summary
|
||||||
|
------------------
|
||||||
|
Pending: 45
|
||||||
|
Running: 3
|
||||||
|
Completed: 1,234
|
||||||
|
Failed: 12
|
||||||
|
```
|
||||||
|
|
||||||
|
Full task management is available at `/admin/tasks`.
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
Failed tasks include the error message in `error_message` and can be retried:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- View failed tasks
|
||||||
|
SELECT id, role, dispensary_id, error_message, retry_count
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE status = 'failed'
|
||||||
|
ORDER BY completed_at DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
-- Retry failed tasks
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET status = 'pending', retry_count = retry_count + 1
|
||||||
|
WHERE status = 'failed' AND retry_count < max_retries;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
### Logs
|
||||||
|
|
||||||
|
Workers log to stdout:
|
||||||
|
```
|
||||||
|
[TaskWorker] Starting worker worker-product_resync-a1b2c3d4 for role: product_resync
|
||||||
|
[TaskWorker] Claimed task 123 (product_resync) for dispensary 456
|
||||||
|
[TaskWorker] Task 123 completed successfully
|
||||||
|
```
|
||||||
|
|
||||||
|
### Health Check
|
||||||
|
|
||||||
|
Check if workers are active:
|
||||||
|
```sql
|
||||||
|
SELECT worker_id, role, COUNT(*), MAX(last_heartbeat_at)
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE last_heartbeat_at > NOW() - INTERVAL '5 minutes'
|
||||||
|
GROUP BY worker_id, role;
|
||||||
|
```
|
||||||
|
|
||||||
|
### Metrics
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Tasks by status
|
||||||
|
SELECT status, COUNT(*) FROM worker_tasks GROUP BY status;
|
||||||
|
|
||||||
|
-- Tasks by role
|
||||||
|
SELECT role, status, COUNT(*) FROM worker_tasks GROUP BY role, status;
|
||||||
|
|
||||||
|
-- Average duration by role
|
||||||
|
SELECT role, AVG(EXTRACT(EPOCH FROM (completed_at - started_at))) as avg_seconds
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours'
|
||||||
|
GROUP BY role;
|
||||||
|
```
|
||||||
69
backend/k8s/cronjob-ip2location.yaml
Normal file
69
backend/k8s/cronjob-ip2location.yaml
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
apiVersion: batch/v1
|
||||||
|
kind: CronJob
|
||||||
|
metadata:
|
||||||
|
name: ip2location-update
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
# Run on the 1st of every month at 3am UTC
|
||||||
|
schedule: "0 3 1 * *"
|
||||||
|
concurrencyPolicy: Forbid
|
||||||
|
successfulJobsHistoryLimit: 3
|
||||||
|
failedJobsHistoryLimit: 3
|
||||||
|
jobTemplate:
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: ip2location-updater
|
||||||
|
image: curlimages/curl:latest
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
set -e
|
||||||
|
echo "Downloading IP2Location LITE DB5..."
|
||||||
|
|
||||||
|
# Download to temp
|
||||||
|
cd /tmp
|
||||||
|
curl -L -o ip2location.zip "https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB5LITEBIN"
|
||||||
|
|
||||||
|
# Extract
|
||||||
|
unzip -o ip2location.zip
|
||||||
|
|
||||||
|
# Find and copy the BIN file
|
||||||
|
BIN_FILE=$(ls *.BIN 2>/dev/null | head -1)
|
||||||
|
if [ -z "$BIN_FILE" ]; then
|
||||||
|
echo "ERROR: No BIN file found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Copy to shared volume
|
||||||
|
cp "$BIN_FILE" /data/IP2LOCATION-LITE-DB5.BIN
|
||||||
|
|
||||||
|
echo "Done! Database updated: /data/IP2LOCATION-LITE-DB5.BIN"
|
||||||
|
env:
|
||||||
|
- name: IP2LOCATION_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: dutchie-backend-secret
|
||||||
|
key: IP2LOCATION_TOKEN
|
||||||
|
volumeMounts:
|
||||||
|
- name: ip2location-data
|
||||||
|
mountPath: /data
|
||||||
|
restartPolicy: OnFailure
|
||||||
|
volumes:
|
||||||
|
- name: ip2location-data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ip2location-pvc
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: ip2location-pvc
|
||||||
|
namespace: default
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 100Mi
|
||||||
@@ -26,6 +26,12 @@ spec:
|
|||||||
name: dutchie-backend-config
|
name: dutchie-backend-config
|
||||||
- secretRef:
|
- secretRef:
|
||||||
name: dutchie-backend-secret
|
name: dutchie-backend-secret
|
||||||
|
env:
|
||||||
|
- name: IP2LOCATION_DB_PATH
|
||||||
|
value: /data/ip2location/IP2LOCATION-LITE-DB5.BIN
|
||||||
|
volumeMounts:
|
||||||
|
- name: ip2location-data
|
||||||
|
mountPath: /data/ip2location
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: "256Mi"
|
memory: "256Mi"
|
||||||
@@ -45,3 +51,7 @@ spec:
|
|||||||
port: 3010
|
port: 3010
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
|
volumes:
|
||||||
|
- name: ip2location-data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ip2location-pvc
|
||||||
|
|||||||
119
backend/migrations/051_worker_definitions.sql
Normal file
119
backend/migrations/051_worker_definitions.sql
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
-- Migration 051: Worker Definitions
|
||||||
|
-- Creates a dedicated workers table for named workers with roles and assignments
|
||||||
|
|
||||||
|
-- Workers table - defines named workers with roles
|
||||||
|
CREATE TABLE IF NOT EXISTS workers (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(100) NOT NULL UNIQUE,
|
||||||
|
role VARCHAR(100) NOT NULL,
|
||||||
|
description TEXT,
|
||||||
|
enabled BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Schedule configuration (for dedicated crawl workers)
|
||||||
|
schedule_type VARCHAR(50) DEFAULT 'interval', -- 'interval', 'cron', 'manual'
|
||||||
|
interval_minutes INTEGER DEFAULT 240,
|
||||||
|
cron_expression VARCHAR(100), -- e.g., '0 */4 * * *'
|
||||||
|
jitter_minutes INTEGER DEFAULT 30,
|
||||||
|
|
||||||
|
-- Assignment scope
|
||||||
|
assignment_type VARCHAR(50) DEFAULT 'all', -- 'all', 'state', 'dispensary', 'chain'
|
||||||
|
assigned_state_codes TEXT[], -- e.g., ['AZ', 'CA']
|
||||||
|
assigned_dispensary_ids INTEGER[],
|
||||||
|
assigned_chain_ids INTEGER[],
|
||||||
|
|
||||||
|
-- Job configuration
|
||||||
|
job_type VARCHAR(50) NOT NULL DEFAULT 'dutchie_product_crawl',
|
||||||
|
job_config JSONB DEFAULT '{}',
|
||||||
|
priority INTEGER DEFAULT 0,
|
||||||
|
max_concurrent INTEGER DEFAULT 1,
|
||||||
|
|
||||||
|
-- Status tracking
|
||||||
|
status VARCHAR(50) DEFAULT 'idle', -- 'idle', 'running', 'paused', 'error'
|
||||||
|
last_run_at TIMESTAMPTZ,
|
||||||
|
last_status VARCHAR(50),
|
||||||
|
last_error TEXT,
|
||||||
|
last_duration_ms INTEGER,
|
||||||
|
next_run_at TIMESTAMPTZ,
|
||||||
|
current_job_id INTEGER,
|
||||||
|
|
||||||
|
-- Metrics
|
||||||
|
total_runs INTEGER DEFAULT 0,
|
||||||
|
successful_runs INTEGER DEFAULT 0,
|
||||||
|
failed_runs INTEGER DEFAULT 0,
|
||||||
|
avg_duration_ms INTEGER,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Worker run history
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_runs (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id INTEGER NOT NULL REFERENCES workers(id) ON DELETE CASCADE,
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
completed_at TIMESTAMPTZ,
|
||||||
|
status VARCHAR(50) DEFAULT 'running', -- 'running', 'success', 'error', 'cancelled'
|
||||||
|
duration_ms INTEGER,
|
||||||
|
|
||||||
|
-- What was processed
|
||||||
|
jobs_created INTEGER DEFAULT 0,
|
||||||
|
jobs_completed INTEGER DEFAULT 0,
|
||||||
|
jobs_failed INTEGER DEFAULT 0,
|
||||||
|
dispensaries_crawled INTEGER DEFAULT 0,
|
||||||
|
products_found INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
error_message TEXT,
|
||||||
|
metadata JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for efficient lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_workers_enabled ON workers(enabled) WHERE enabled = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_workers_next_run ON workers(next_run_at) WHERE enabled = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_workers_status ON workers(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_runs_worker_id ON worker_runs(worker_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_runs_started_at ON worker_runs(started_at DESC);
|
||||||
|
|
||||||
|
-- Add worker_id to dispensary_crawl_jobs if not exists
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'assigned_worker_id'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE dispensary_crawl_jobs ADD COLUMN assigned_worker_id INTEGER REFERENCES workers(id);
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- Migrate existing job_schedules workers to new workers table
|
||||||
|
INSERT INTO workers (name, role, description, enabled, interval_minutes, jitter_minutes, job_type, job_config, last_run_at, last_status, last_error, last_duration_ms, next_run_at)
|
||||||
|
SELECT
|
||||||
|
worker_name,
|
||||||
|
worker_role,
|
||||||
|
description,
|
||||||
|
enabled,
|
||||||
|
base_interval_minutes,
|
||||||
|
jitter_minutes,
|
||||||
|
job_name,
|
||||||
|
job_config,
|
||||||
|
last_run_at,
|
||||||
|
last_status,
|
||||||
|
last_error_message,
|
||||||
|
last_duration_ms,
|
||||||
|
next_run_at
|
||||||
|
FROM job_schedules
|
||||||
|
WHERE worker_name IS NOT NULL
|
||||||
|
ON CONFLICT (name) DO UPDATE SET
|
||||||
|
updated_at = NOW();
|
||||||
|
|
||||||
|
-- Available worker roles (reference)
|
||||||
|
COMMENT ON TABLE workers IS 'Named workers with specific roles and assignments. Roles include:
|
||||||
|
- product_sync: Crawls products from dispensary menus
|
||||||
|
- store_discovery: Discovers new dispensary locations
|
||||||
|
- entry_point_finder: Detects menu providers and resolves platform IDs
|
||||||
|
- analytics_refresh: Refreshes materialized views and analytics
|
||||||
|
- price_monitor: Monitors price changes and triggers alerts
|
||||||
|
- inventory_sync: Syncs inventory levels
|
||||||
|
- image_processor: Downloads and processes product images
|
||||||
|
- data_validator: Validates data integrity';
|
||||||
49
backend/migrations/052_seo_settings.sql
Normal file
49
backend/migrations/052_seo_settings.sql
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
-- Migration 052: SEO Settings Table
|
||||||
|
-- Key/value store for SEO Orchestrator configuration
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS seo_settings (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
key TEXT UNIQUE NOT NULL,
|
||||||
|
value JSONB NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create index on key for fast lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_seo_settings_key ON seo_settings(key);
|
||||||
|
|
||||||
|
-- Seed with default settings
|
||||||
|
INSERT INTO seo_settings (key, value) VALUES
|
||||||
|
-- Section 1: Global Content Generation Settings
|
||||||
|
('primary_prompt_template', '"You are a cannabis industry content expert. Generate SEO-optimized content for {{page_type}} pages about {{subject}}. Focus on: {{focus_areas}}. Maintain a {{tone}} tone and keep content {{length}}."'),
|
||||||
|
('regeneration_prompt_template', '"Regenerate the following SEO content with fresh perspectives. Original topic: {{subject}}. Improve upon: {{improvement_areas}}. Maintain compliance with cannabis industry standards."'),
|
||||||
|
('default_content_length', '"medium"'),
|
||||||
|
('tone_voice', '"informational"'),
|
||||||
|
|
||||||
|
-- Section 2: Automatic Refresh Rules
|
||||||
|
('auto_refresh_interval', '"weekly"'),
|
||||||
|
('trigger_pct_product_change', 'true'),
|
||||||
|
('trigger_pct_brand_change', 'true'),
|
||||||
|
('trigger_new_stores', 'true'),
|
||||||
|
('trigger_market_shift', 'false'),
|
||||||
|
('webhook_url', '""'),
|
||||||
|
('notify_on_trigger', 'false'),
|
||||||
|
|
||||||
|
-- Section 3: Page-Level Defaults
|
||||||
|
('default_title_template', '"{{state_name}} Dispensaries | Find Cannabis Near You | CannaiQ"'),
|
||||||
|
('default_meta_description_template', '"Discover the best dispensaries in {{state_name}}. Browse {{dispensary_count}}+ licensed retailers, compare prices, and find cannabis products near you."'),
|
||||||
|
('default_slug_template', '"dispensaries-{{state_code_lower}}"'),
|
||||||
|
('default_og_image_template', '"/images/seo/og-{{state_code_lower}}.jpg"'),
|
||||||
|
('enable_ai_images', 'false'),
|
||||||
|
|
||||||
|
-- Section 4: Crawl / Dataset Configuration
|
||||||
|
('primary_data_provider', '"cannaiq"'),
|
||||||
|
('fallback_data_provider', '"dutchie"'),
|
||||||
|
('min_data_freshness_hours', '24'),
|
||||||
|
('stale_data_behavior', '"allow_with_warning"')
|
||||||
|
ON CONFLICT (key) DO NOTHING;
|
||||||
|
|
||||||
|
-- Record migration
|
||||||
|
INSERT INTO schema_migrations (version, name, applied_at)
|
||||||
|
VALUES ('052', 'seo_settings', NOW())
|
||||||
|
ON CONFLICT (version) DO NOTHING;
|
||||||
140
backend/migrations/066_dutchie_field_alignment.sql
Normal file
140
backend/migrations/066_dutchie_field_alignment.sql
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
-- Migration 066: Align dispensaries and discovery_locations tables with Dutchie field names
|
||||||
|
-- Uses snake_case convention (Postgres standard) mapped from Dutchie's camelCase
|
||||||
|
--
|
||||||
|
-- Changes:
|
||||||
|
-- 1. dispensaries: rename address→address1, zip→zipcode, remove company_name
|
||||||
|
-- 2. dispensaries: add missing Dutchie fields
|
||||||
|
-- 3. dutchie_discovery_locations: add missing Dutchie fields
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- DISPENSARIES TABLE
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- Rename address to address1 (matches Dutchie's address1)
|
||||||
|
ALTER TABLE dispensaries RENAME COLUMN address TO address1;
|
||||||
|
|
||||||
|
-- Rename zip to zipcode (matches Dutchie's zip, but we use zipcode for clarity)
|
||||||
|
ALTER TABLE dispensaries RENAME COLUMN zip TO zipcode;
|
||||||
|
|
||||||
|
-- Drop company_name (redundant with name)
|
||||||
|
ALTER TABLE dispensaries DROP COLUMN IF EXISTS company_name;
|
||||||
|
|
||||||
|
-- Add address2
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS address2 VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add country
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States';
|
||||||
|
|
||||||
|
-- Add timezone
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add email
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS email VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add description
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS description TEXT;
|
||||||
|
|
||||||
|
-- Add logo_image (Dutchie: logoImage)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS logo_image TEXT;
|
||||||
|
|
||||||
|
-- Add banner_image (Dutchie: bannerImage)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS banner_image TEXT;
|
||||||
|
|
||||||
|
-- Add offer_pickup (Dutchie: offerPickup)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_pickup BOOLEAN DEFAULT TRUE;
|
||||||
|
|
||||||
|
-- Add offer_delivery (Dutchie: offerDelivery)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_delivery BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add offer_curbside_pickup (Dutchie: offerCurbsidePickup)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_curbside_pickup BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add is_medical (Dutchie: isMedical)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_medical BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add is_recreational (Dutchie: isRecreational)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_recreational BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Add chain_slug (Dutchie: chain)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add enterprise_id (Dutchie: retailer.enterpriseId)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100);
|
||||||
|
|
||||||
|
-- Add status (Dutchie: status - open/closed)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS status VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add c_name (Dutchie: cName - the URL slug used in embedded menus)
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- DUTCHIE_DISCOVERY_LOCATIONS TABLE
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- Add phone
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS phone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add website (Dutchie: embedBackUrl)
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS website TEXT;
|
||||||
|
|
||||||
|
-- Add email
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS email VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add description
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS description TEXT;
|
||||||
|
|
||||||
|
-- Add logo_image
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS logo_image TEXT;
|
||||||
|
|
||||||
|
-- Add banner_image
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS banner_image TEXT;
|
||||||
|
|
||||||
|
-- Add chain_slug
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add enterprise_id
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100);
|
||||||
|
|
||||||
|
-- Add c_name
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
|
||||||
|
|
||||||
|
-- Add country
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States';
|
||||||
|
|
||||||
|
-- Add store status
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS store_status VARCHAR(50);
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- INDEXES
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- Index for chain lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_chain_slug ON dispensaries(chain_slug) WHERE chain_slug IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_chain_slug ON dutchie_discovery_locations(chain_slug) WHERE chain_slug IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for enterprise lookups (for multi-location chains)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_enterprise_id ON dispensaries(enterprise_id) WHERE enterprise_id IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_enterprise_id ON dutchie_discovery_locations(enterprise_id) WHERE enterprise_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for c_name lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_c_name ON dispensaries(c_name) WHERE c_name IS NOT NULL;
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- COMMENTS
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
COMMENT ON COLUMN dispensaries.address1 IS 'Street address line 1 (Dutchie: address1)';
|
||||||
|
COMMENT ON COLUMN dispensaries.address2 IS 'Street address line 2 (Dutchie: address2)';
|
||||||
|
COMMENT ON COLUMN dispensaries.zipcode IS 'ZIP/postal code (Dutchie: zip)';
|
||||||
|
COMMENT ON COLUMN dispensaries.c_name IS 'Dutchie URL slug for embedded menus (Dutchie: cName)';
|
||||||
|
COMMENT ON COLUMN dispensaries.chain_slug IS 'Chain identifier slug (Dutchie: chain)';
|
||||||
|
COMMENT ON COLUMN dispensaries.enterprise_id IS 'Parent enterprise UUID (Dutchie: retailer.enterpriseId)';
|
||||||
|
COMMENT ON COLUMN dispensaries.logo_image IS 'Logo image URL (Dutchie: logoImage)';
|
||||||
|
COMMENT ON COLUMN dispensaries.banner_image IS 'Banner image URL (Dutchie: bannerImage)';
|
||||||
|
COMMENT ON COLUMN dispensaries.offer_pickup IS 'Offers in-store pickup (Dutchie: offerPickup)';
|
||||||
|
COMMENT ON COLUMN dispensaries.offer_delivery IS 'Offers delivery (Dutchie: offerDelivery)';
|
||||||
|
COMMENT ON COLUMN dispensaries.offer_curbside_pickup IS 'Offers curbside pickup (Dutchie: offerCurbsidePickup)';
|
||||||
|
COMMENT ON COLUMN dispensaries.is_medical IS 'Licensed for medical sales (Dutchie: isMedical)';
|
||||||
|
COMMENT ON COLUMN dispensaries.is_recreational IS 'Licensed for recreational sales (Dutchie: isRecreational)';
|
||||||
|
|
||||||
|
SELECT 'Migration 066 completed: Dutchie field alignment' as status;
|
||||||
24
backend/migrations/067_promotion_log.sql
Normal file
24
backend/migrations/067_promotion_log.sql
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
-- Promotion log table for tracking discovery → dispensary promotions
|
||||||
|
-- Tracks validation and promotion actions for audit/review
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS dutchie_promotion_log (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
discovery_id INTEGER REFERENCES dutchie_discovery_locations(id) ON DELETE SET NULL,
|
||||||
|
dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE SET NULL,
|
||||||
|
action VARCHAR(50) NOT NULL, -- 'validated', 'rejected', 'promoted_create', 'promoted_update', 'skipped'
|
||||||
|
state_code VARCHAR(10),
|
||||||
|
store_name VARCHAR(255),
|
||||||
|
validation_errors TEXT[], -- Array of error messages if rejected
|
||||||
|
field_changes JSONB, -- Before/after snapshot of changed fields
|
||||||
|
triggered_by VARCHAR(100) DEFAULT 'auto', -- 'auto', 'manual', 'api'
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for efficient querying
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_discovery_id ON dutchie_promotion_log(discovery_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_dispensary_id ON dutchie_promotion_log(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_action ON dutchie_promotion_log(action);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_state_code ON dutchie_promotion_log(state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_promotion_log_created_at ON dutchie_promotion_log(created_at DESC);
|
||||||
|
|
||||||
|
COMMENT ON TABLE dutchie_promotion_log IS 'Audit log for discovery location validation and promotion to dispensaries';
|
||||||
95
backend/migrations/068_crawler_status_alerts.sql
Normal file
95
backend/migrations/068_crawler_status_alerts.sql
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
-- Migration 068: Crawler Status Alerts
|
||||||
|
-- Creates status_alerts table for dashboard notifications and status change logging
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STATUS ALERTS TABLE
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS crawler_status_alerts (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- References
|
||||||
|
dispensary_id INTEGER REFERENCES dispensaries(id),
|
||||||
|
profile_id INTEGER REFERENCES dispensary_crawler_profiles(id),
|
||||||
|
|
||||||
|
-- Alert info
|
||||||
|
alert_type VARCHAR(50) NOT NULL, -- 'status_change', 'crawl_error', 'validation_failed', 'promoted', 'demoted'
|
||||||
|
severity VARCHAR(20) DEFAULT 'info', -- 'info', 'warning', 'error', 'critical'
|
||||||
|
|
||||||
|
-- Status transition
|
||||||
|
previous_status VARCHAR(50),
|
||||||
|
new_status VARCHAR(50),
|
||||||
|
|
||||||
|
-- Context
|
||||||
|
message TEXT,
|
||||||
|
error_details JSONB,
|
||||||
|
metadata JSONB, -- Additional context (product counts, error codes, etc.)
|
||||||
|
|
||||||
|
-- Tracking
|
||||||
|
acknowledged BOOLEAN DEFAULT FALSE,
|
||||||
|
acknowledged_at TIMESTAMP WITH TIME ZONE,
|
||||||
|
acknowledged_by VARCHAR(100),
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_dispensary ON crawler_status_alerts(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_type ON crawler_status_alerts(alert_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_severity ON crawler_status_alerts(severity);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_unack ON crawler_status_alerts(acknowledged) WHERE acknowledged = FALSE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_created ON crawler_status_alerts(created_at DESC);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STATUS DEFINITIONS (for reference/validation)
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
COMMENT ON TABLE crawler_status_alerts IS 'Crawler status change notifications for dashboard alerting';
|
||||||
|
COMMENT ON COLUMN crawler_status_alerts.alert_type IS 'Type: status_change, crawl_error, validation_failed, promoted, demoted';
|
||||||
|
COMMENT ON COLUMN crawler_status_alerts.severity IS 'Severity: info, warning, error, critical';
|
||||||
|
COMMENT ON COLUMN crawler_status_alerts.previous_status IS 'Previous crawler status before change';
|
||||||
|
COMMENT ON COLUMN crawler_status_alerts.new_status IS 'New crawler status after change';
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STATUS TRACKING ON PROFILES
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Add columns for status tracking if not exists
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- Consecutive success count for auto-promotion
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_successes') THEN
|
||||||
|
ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_successes INTEGER DEFAULT 0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Consecutive failure count for auto-demotion
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_failures') THEN
|
||||||
|
ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_failures INTEGER DEFAULT 0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Last status change timestamp
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_changed_at') THEN
|
||||||
|
ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_changed_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Status change reason
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_reason') THEN
|
||||||
|
ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_reason TEXT;
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- VALID STATUS VALUES
|
||||||
|
-- ============================================================
|
||||||
|
-- Status values for dispensary_crawler_profiles.status:
|
||||||
|
-- 'sandbox' - Newly created, being validated
|
||||||
|
-- 'production' - Healthy, actively crawled
|
||||||
|
-- 'needs_manual' - Requires human intervention
|
||||||
|
-- 'failing' - Multiple consecutive failures
|
||||||
|
-- 'disabled' - Manually disabled
|
||||||
|
-- 'legacy' - No profile, uses default method (virtual status)
|
||||||
163
backend/migrations/069_six_stage_status.sql
Normal file
163
backend/migrations/069_six_stage_status.sql
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
-- Migration 069: Seven-Stage Status System
|
||||||
|
--
|
||||||
|
-- Implements explicit 7-stage pipeline for store lifecycle:
|
||||||
|
-- 1. discovered - Found via Dutchie API, raw data
|
||||||
|
-- 2. validated - Passed field checks, ready for promotion
|
||||||
|
-- 3. promoted - In dispensaries table, has crawler profile
|
||||||
|
-- 4. sandbox - First crawl attempted, testing
|
||||||
|
-- 5. hydrating - Products are being loaded/updated
|
||||||
|
-- 6. production - Healthy, scheduled crawls via Horizon
|
||||||
|
-- 7. failing - Crawl errors, needs attention
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STAGE ENUM TYPE
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- Create enum if not exists
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'store_stage') THEN
|
||||||
|
CREATE TYPE store_stage AS ENUM (
|
||||||
|
'discovered',
|
||||||
|
'validated',
|
||||||
|
'promoted',
|
||||||
|
'sandbox',
|
||||||
|
'hydrating',
|
||||||
|
'production',
|
||||||
|
'failing'
|
||||||
|
);
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- UPDATE DISCOVERY LOCATIONS TABLE
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Add stage column to discovery locations (replaces status)
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dutchie_discovery_locations' AND column_name = 'stage') THEN
|
||||||
|
ALTER TABLE dutchie_discovery_locations ADD COLUMN stage VARCHAR(20) DEFAULT 'discovered';
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- Migrate existing status values to stage
|
||||||
|
UPDATE dutchie_discovery_locations
|
||||||
|
SET stage = CASE
|
||||||
|
WHEN status = 'discovered' THEN 'discovered'
|
||||||
|
WHEN status = 'verified' THEN 'validated'
|
||||||
|
WHEN status = 'rejected' THEN 'failing'
|
||||||
|
WHEN status = 'merged' THEN 'validated'
|
||||||
|
ELSE 'discovered'
|
||||||
|
END
|
||||||
|
WHERE stage IS NULL OR stage = '';
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- UPDATE CRAWLER PROFILES TABLE
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
-- Ensure status column exists and update to new values
|
||||||
|
UPDATE dispensary_crawler_profiles
|
||||||
|
SET status = CASE
|
||||||
|
WHEN status = 'sandbox' THEN 'sandbox'
|
||||||
|
WHEN status = 'production' THEN 'production'
|
||||||
|
WHEN status = 'needs_manual' THEN 'failing'
|
||||||
|
WHEN status = 'failing' THEN 'failing'
|
||||||
|
WHEN status = 'disabled' THEN 'failing'
|
||||||
|
WHEN status IS NULL THEN 'promoted'
|
||||||
|
ELSE 'promoted'
|
||||||
|
END;
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- ADD STAGE TRACKING TO DISPENSARIES
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
-- Add stage column to dispensaries for quick filtering
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensaries' AND column_name = 'stage') THEN
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN stage VARCHAR(20) DEFAULT 'promoted';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Add stage_changed_at for tracking
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensaries' AND column_name = 'stage_changed_at') THEN
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN stage_changed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Add first_crawl_at to track sandbox → production transition
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensaries' AND column_name = 'first_crawl_at') THEN
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN first_crawl_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- Add last_successful_crawl_at
|
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'dispensaries' AND column_name = 'last_successful_crawl_at') THEN
|
||||||
|
ALTER TABLE dispensaries ADD COLUMN last_successful_crawl_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
-- Set initial stage for existing dispensaries based on their crawler profile status
|
||||||
|
UPDATE dispensaries d
|
||||||
|
SET stage = COALESCE(
|
||||||
|
(SELECT dcp.status FROM dispensary_crawler_profiles dcp
|
||||||
|
WHERE dcp.dispensary_id = d.id AND dcp.enabled = true
|
||||||
|
ORDER BY dcp.updated_at DESC LIMIT 1),
|
||||||
|
'promoted'
|
||||||
|
)
|
||||||
|
WHERE d.stage IS NULL OR d.stage = '';
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- INDEXES FOR STAGE-BASED QUERIES
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_stage ON dispensaries(stage);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_dispensaries_stage_state ON dispensaries(stage, state);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_discovery_locations_stage ON dutchie_discovery_locations(stage);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_status ON dispensary_crawler_profiles(status);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- STAGE TRANSITION LOG
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS stage_transitions (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- What changed
|
||||||
|
entity_type VARCHAR(20) NOT NULL, -- 'discovery_location' or 'dispensary'
|
||||||
|
entity_id INTEGER NOT NULL,
|
||||||
|
|
||||||
|
-- Stage change
|
||||||
|
from_stage VARCHAR(20),
|
||||||
|
to_stage VARCHAR(20) NOT NULL,
|
||||||
|
|
||||||
|
-- Context
|
||||||
|
trigger_type VARCHAR(50) NOT NULL, -- 'api', 'scheduler', 'manual', 'auto'
|
||||||
|
trigger_endpoint VARCHAR(200),
|
||||||
|
|
||||||
|
-- Outcome
|
||||||
|
success BOOLEAN DEFAULT TRUE,
|
||||||
|
error_message TEXT,
|
||||||
|
metadata JSONB,
|
||||||
|
|
||||||
|
-- Timing
|
||||||
|
duration_ms INTEGER,
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_stage_transitions_entity ON stage_transitions(entity_type, entity_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_stage_transitions_to_stage ON stage_transitions(to_stage);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_stage_transitions_created ON stage_transitions(created_at DESC);
|
||||||
|
|
||||||
|
-- ============================================================
|
||||||
|
-- COMMENTS
|
||||||
|
-- ============================================================
|
||||||
|
|
||||||
|
COMMENT ON TABLE stage_transitions IS 'Audit log for all stage transitions in the pipeline';
|
||||||
|
COMMENT ON COLUMN dispensaries.stage IS 'Current pipeline stage: discovered, validated, promoted, sandbox, production, failing';
|
||||||
|
COMMENT ON COLUMN dispensaries.stage_changed_at IS 'When the stage was last changed';
|
||||||
|
COMMENT ON COLUMN dispensaries.first_crawl_at IS 'When the first crawl was attempted (sandbox stage)';
|
||||||
|
COMMENT ON COLUMN dispensaries.last_successful_crawl_at IS 'When the last successful crawl completed';
|
||||||
239
backend/migrations/070_product_variants.sql
Normal file
239
backend/migrations/070_product_variants.sql
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
-- ============================================================================
|
||||||
|
-- Migration 070: Product Variants Tables
|
||||||
|
-- ============================================================================
|
||||||
|
--
|
||||||
|
-- Purpose: Store variant-level pricing and inventory as first-class entities
|
||||||
|
-- to enable time-series analytics, price comparisons, and sale tracking.
|
||||||
|
--
|
||||||
|
-- Enables queries like:
|
||||||
|
-- - Price history for a specific variant (1g Blue Dream over time)
|
||||||
|
-- - Sale frequency analysis (how often is this on special?)
|
||||||
|
-- - Cross-store price comparison (who has cheapest 1g flower?)
|
||||||
|
-- - Current specials across all stores
|
||||||
|
--
|
||||||
|
-- RULES:
|
||||||
|
-- - STRICTLY ADDITIVE (no DROP, DELETE, TRUNCATE)
|
||||||
|
-- - All new tables use IF NOT EXISTS
|
||||||
|
-- - All indexes use IF NOT EXISTS
|
||||||
|
--
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SECTION 1: PRODUCT_VARIANTS TABLE (Current State)
|
||||||
|
-- ============================================================================
|
||||||
|
-- One row per product+option combination. Tracks current pricing/inventory.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS product_variants (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
store_product_id INTEGER NOT NULL REFERENCES store_products(id) ON DELETE CASCADE,
|
||||||
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
|
||||||
|
-- Variant identity (from Dutchie POSMetaData.children)
|
||||||
|
option VARCHAR(100) NOT NULL, -- "1g", "3.5g", "1/8oz", "100mg"
|
||||||
|
canonical_sku VARCHAR(100), -- Dutchie canonicalSKU
|
||||||
|
canonical_id VARCHAR(100), -- Dutchie canonicalID
|
||||||
|
canonical_name VARCHAR(500), -- Dutchie canonicalName
|
||||||
|
|
||||||
|
-- Current pricing (in dollars, not cents)
|
||||||
|
price_rec NUMERIC(10,2),
|
||||||
|
price_med NUMERIC(10,2),
|
||||||
|
price_rec_special NUMERIC(10,2),
|
||||||
|
price_med_special NUMERIC(10,2),
|
||||||
|
|
||||||
|
-- Current inventory
|
||||||
|
quantity INTEGER,
|
||||||
|
quantity_available INTEGER,
|
||||||
|
in_stock BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Special/sale status
|
||||||
|
is_on_special BOOLEAN DEFAULT FALSE,
|
||||||
|
|
||||||
|
-- Weight/size parsing (for analytics)
|
||||||
|
weight_value NUMERIC(10,2), -- 1, 3.5, 28, etc.
|
||||||
|
weight_unit VARCHAR(20), -- g, oz, mg, ml, etc.
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_seen_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_price_change_at TIMESTAMPTZ,
|
||||||
|
last_stock_change_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(store_product_id, option)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_store_product ON product_variants(store_product_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_dispensary ON product_variants(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_option ON product_variants(option);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_in_stock ON product_variants(dispensary_id, in_stock) WHERE in_stock = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_on_special ON product_variants(dispensary_id, is_on_special) WHERE is_on_special = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_canonical_sku ON product_variants(canonical_sku) WHERE canonical_sku IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variants_price_rec ON product_variants(price_rec) WHERE price_rec IS NOT NULL;
|
||||||
|
|
||||||
|
COMMENT ON TABLE product_variants IS 'Current state of each product variant (weight/size option). One row per product+option.';
|
||||||
|
COMMENT ON COLUMN product_variants.option IS 'Weight/size option string from Dutchie (e.g., "1g", "3.5g", "1/8oz")';
|
||||||
|
COMMENT ON COLUMN product_variants.canonical_sku IS 'Dutchie POS SKU for cross-store matching';
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SECTION 2: PRODUCT_VARIANT_SNAPSHOTS TABLE (Historical Data)
|
||||||
|
-- ============================================================================
|
||||||
|
-- Time-series data for variant pricing. One row per variant per crawl.
|
||||||
|
-- CRITICAL: NEVER DELETE from this table.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS product_variant_snapshots (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
product_variant_id INTEGER NOT NULL REFERENCES product_variants(id) ON DELETE CASCADE,
|
||||||
|
store_product_id INTEGER REFERENCES store_products(id) ON DELETE SET NULL,
|
||||||
|
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
|
||||||
|
|
||||||
|
-- Variant identity (denormalized for query performance)
|
||||||
|
option VARCHAR(100) NOT NULL,
|
||||||
|
|
||||||
|
-- Pricing at time of capture
|
||||||
|
price_rec NUMERIC(10,2),
|
||||||
|
price_med NUMERIC(10,2),
|
||||||
|
price_rec_special NUMERIC(10,2),
|
||||||
|
price_med_special NUMERIC(10,2),
|
||||||
|
|
||||||
|
-- Inventory at time of capture
|
||||||
|
quantity INTEGER,
|
||||||
|
in_stock BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Special status at time of capture
|
||||||
|
is_on_special BOOLEAN DEFAULT FALSE,
|
||||||
|
|
||||||
|
-- Feed presence (FALSE = variant missing from crawl)
|
||||||
|
is_present_in_feed BOOLEAN DEFAULT TRUE,
|
||||||
|
|
||||||
|
-- Capture timestamp
|
||||||
|
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for time-series queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_variant ON product_variant_snapshots(product_variant_id, captured_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_dispensary ON product_variant_snapshots(dispensary_id, captured_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_crawl ON product_variant_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_captured ON product_variant_snapshots(captured_at DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_special ON product_variant_snapshots(is_on_special, captured_at DESC) WHERE is_on_special = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_option ON product_variant_snapshots(option, captured_at DESC);
|
||||||
|
|
||||||
|
COMMENT ON TABLE product_variant_snapshots IS 'Historical variant pricing/inventory. One row per variant per crawl. NEVER DELETE.';
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SECTION 3: USEFUL VIEWS
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- View: Current specials across all stores
|
||||||
|
CREATE OR REPLACE VIEW v_current_specials AS
|
||||||
|
SELECT
|
||||||
|
pv.id as variant_id,
|
||||||
|
sp.id as product_id,
|
||||||
|
sp.name_raw as product_name,
|
||||||
|
sp.brand_name_raw as brand_name,
|
||||||
|
sp.category_raw as category,
|
||||||
|
d.id as dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
pv.option,
|
||||||
|
pv.price_rec,
|
||||||
|
pv.price_rec_special,
|
||||||
|
ROUND(((pv.price_rec - pv.price_rec_special) / NULLIF(pv.price_rec, 0)) * 100, 1) as discount_percent,
|
||||||
|
pv.quantity,
|
||||||
|
pv.in_stock,
|
||||||
|
pv.last_seen_at
|
||||||
|
FROM product_variants pv
|
||||||
|
JOIN store_products sp ON sp.id = pv.store_product_id
|
||||||
|
JOIN dispensaries d ON d.id = pv.dispensary_id
|
||||||
|
WHERE pv.is_on_special = TRUE
|
||||||
|
AND pv.in_stock = TRUE
|
||||||
|
AND pv.price_rec_special IS NOT NULL
|
||||||
|
AND pv.price_rec_special < pv.price_rec;
|
||||||
|
|
||||||
|
COMMENT ON VIEW v_current_specials IS 'All products currently on special across all stores';
|
||||||
|
|
||||||
|
|
||||||
|
-- View: Price comparison for a product across stores
|
||||||
|
CREATE OR REPLACE VIEW v_price_comparison AS
|
||||||
|
SELECT
|
||||||
|
sp.name_raw as product_name,
|
||||||
|
sp.brand_name_raw as brand_name,
|
||||||
|
sp.category_raw as category,
|
||||||
|
pv.option,
|
||||||
|
d.id as dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
pv.price_rec,
|
||||||
|
pv.price_rec_special,
|
||||||
|
pv.is_on_special,
|
||||||
|
pv.in_stock,
|
||||||
|
pv.quantity,
|
||||||
|
RANK() OVER (PARTITION BY sp.name_raw, pv.option ORDER BY COALESCE(pv.price_rec_special, pv.price_rec) ASC) as price_rank
|
||||||
|
FROM product_variants pv
|
||||||
|
JOIN store_products sp ON sp.id = pv.store_product_id
|
||||||
|
JOIN dispensaries d ON d.id = pv.dispensary_id
|
||||||
|
WHERE pv.in_stock = TRUE
|
||||||
|
AND (pv.price_rec IS NOT NULL OR pv.price_rec_special IS NOT NULL);
|
||||||
|
|
||||||
|
COMMENT ON VIEW v_price_comparison IS 'Compare prices for same product across stores, ranked by price';
|
||||||
|
|
||||||
|
|
||||||
|
-- View: Latest snapshot per variant
|
||||||
|
CREATE OR REPLACE VIEW v_latest_variant_snapshots AS
|
||||||
|
SELECT DISTINCT ON (product_variant_id)
|
||||||
|
pvs.*
|
||||||
|
FROM product_variant_snapshots pvs
|
||||||
|
ORDER BY product_variant_id, captured_at DESC;
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- SECTION 4: HELPER FUNCTION FOR SALE FREQUENCY
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
-- Function to calculate sale frequency for a variant
|
||||||
|
CREATE OR REPLACE FUNCTION get_variant_sale_stats(p_variant_id INTEGER, p_days INTEGER DEFAULT 30)
|
||||||
|
RETURNS TABLE (
|
||||||
|
total_snapshots BIGINT,
|
||||||
|
times_on_special BIGINT,
|
||||||
|
special_frequency_pct NUMERIC,
|
||||||
|
avg_discount_pct NUMERIC,
|
||||||
|
min_price NUMERIC,
|
||||||
|
max_price NUMERIC,
|
||||||
|
avg_price NUMERIC
|
||||||
|
) AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN QUERY
|
||||||
|
SELECT
|
||||||
|
COUNT(*)::BIGINT as total_snapshots,
|
||||||
|
COUNT(*) FILTER (WHERE is_on_special)::BIGINT as times_on_special,
|
||||||
|
ROUND((COUNT(*) FILTER (WHERE is_on_special)::NUMERIC / NULLIF(COUNT(*), 0)) * 100, 1) as special_frequency_pct,
|
||||||
|
ROUND(AVG(
|
||||||
|
CASE WHEN is_on_special AND price_rec_special IS NOT NULL AND price_rec IS NOT NULL
|
||||||
|
THEN ((price_rec - price_rec_special) / NULLIF(price_rec, 0)) * 100
|
||||||
|
END
|
||||||
|
), 1) as avg_discount_pct,
|
||||||
|
MIN(COALESCE(price_rec_special, price_rec)) as min_price,
|
||||||
|
MAX(price_rec) as max_price,
|
||||||
|
ROUND(AVG(COALESCE(price_rec_special, price_rec)), 2) as avg_price
|
||||||
|
FROM product_variant_snapshots
|
||||||
|
WHERE product_variant_id = p_variant_id
|
||||||
|
AND captured_at >= NOW() - (p_days || ' days')::INTERVAL;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
COMMENT ON FUNCTION get_variant_sale_stats IS 'Get sale frequency and price stats for a variant over N days';
|
||||||
|
|
||||||
|
|
||||||
|
-- ============================================================================
|
||||||
|
-- DONE
|
||||||
|
-- ============================================================================
|
||||||
|
|
||||||
|
SELECT 'Migration 070 completed. Product variants tables ready for time-series analytics.' AS status;
|
||||||
53
backend/migrations/071_harmonize_store_products.sql
Normal file
53
backend/migrations/071_harmonize_store_products.sql
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
-- Migration 071: Harmonize store_products with dutchie_products
|
||||||
|
-- Adds missing columns to store_products to consolidate on a single canonical table
|
||||||
|
|
||||||
|
-- Product details
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS description TEXT;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weight VARCHAR(50);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weights JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS measurements JSONB;
|
||||||
|
|
||||||
|
-- Cannabinoid/terpene data
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS effects JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS terpenes JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabinoids_v2 JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS thc_content NUMERIC(10,4);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cbd_content NUMERIC(10,4);
|
||||||
|
|
||||||
|
-- Images
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS images JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS primary_image_url TEXT;
|
||||||
|
|
||||||
|
-- Inventory
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS total_quantity_available INTEGER DEFAULT 0;
|
||||||
|
|
||||||
|
-- Status/flags
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS status VARCHAR(50);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS featured BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS coming_soon BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_restored_at TIMESTAMP WITH TIME ZONE;
|
||||||
|
|
||||||
|
-- Threshold flags (Dutchie-specific)
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_threshold BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_threshold BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS certificate_of_analysis_enabled BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
|
-- Platform metadata
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS external_product_id VARCHAR(100);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS c_name VARCHAR(500);
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS past_c_names TEXT[];
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS latest_raw_payload JSONB;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS created_at_platform TIMESTAMP WITH TIME ZONE;
|
||||||
|
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS updated_at_platform TIMESTAMP WITH TIME ZONE;
|
||||||
|
|
||||||
|
-- Indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_external_id ON store_products(external_product_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_visibility_lost ON store_products(visibility_lost) WHERE visibility_lost = TRUE;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_status ON store_products(status);
|
||||||
|
|
||||||
|
-- Add comment
|
||||||
|
COMMENT ON TABLE store_products IS 'Canonical product table - consolidated from dutchie_products';
|
||||||
74
backend/migrations/072_product_views.sql
Normal file
74
backend/migrations/072_product_views.sql
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
-- Migration 072: Create compatibility views for store_products and store_product_snapshots
|
||||||
|
-- These views provide backward-compatible column names for API routes
|
||||||
|
|
||||||
|
-- v_products view - aliases store_products columns to match legacy dutchie_products naming
|
||||||
|
CREATE OR REPLACE VIEW v_products AS
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
dispensary_id,
|
||||||
|
provider_product_id as external_product_id,
|
||||||
|
provider_product_id as dutchie_id,
|
||||||
|
name_raw as name,
|
||||||
|
brand_name_raw as brand_name,
|
||||||
|
category_raw as type,
|
||||||
|
subcategory_raw as subcategory,
|
||||||
|
strain_type,
|
||||||
|
thc_percent as thc,
|
||||||
|
cbd_percent as cbd,
|
||||||
|
stock_status,
|
||||||
|
is_in_stock,
|
||||||
|
stock_quantity,
|
||||||
|
image_url,
|
||||||
|
primary_image_url,
|
||||||
|
images,
|
||||||
|
effects,
|
||||||
|
description,
|
||||||
|
is_on_special,
|
||||||
|
featured,
|
||||||
|
medical_only,
|
||||||
|
rec_only,
|
||||||
|
external_product_id as external_id,
|
||||||
|
provider,
|
||||||
|
created_at,
|
||||||
|
updated_at
|
||||||
|
FROM store_products;
|
||||||
|
|
||||||
|
-- v_product_snapshots view - aliases store_product_snapshots columns to match legacy naming
|
||||||
|
CREATE OR REPLACE VIEW v_product_snapshots AS
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
store_product_id,
|
||||||
|
dispensary_id,
|
||||||
|
provider,
|
||||||
|
provider_product_id,
|
||||||
|
crawl_run_id,
|
||||||
|
captured_at as crawled_at,
|
||||||
|
name_raw,
|
||||||
|
brand_name_raw,
|
||||||
|
category_raw,
|
||||||
|
subcategory_raw,
|
||||||
|
-- Convert price_rec (dollars) to rec_min_price_cents (cents)
|
||||||
|
CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_min_price_cents,
|
||||||
|
CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_max_price_cents,
|
||||||
|
CASE WHEN price_rec_special IS NOT NULL THEN (price_rec_special * 100)::integer END as rec_min_special_price_cents,
|
||||||
|
CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_min_price_cents,
|
||||||
|
CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_max_price_cents,
|
||||||
|
CASE WHEN price_med_special IS NOT NULL THEN (price_med_special * 100)::integer END as med_min_special_price_cents,
|
||||||
|
is_on_special as special,
|
||||||
|
discount_percent,
|
||||||
|
is_in_stock,
|
||||||
|
stock_quantity,
|
||||||
|
stock_status,
|
||||||
|
stock_quantity as total_quantity_available,
|
||||||
|
thc_percent,
|
||||||
|
cbd_percent,
|
||||||
|
image_url,
|
||||||
|
raw_data as options,
|
||||||
|
created_at
|
||||||
|
FROM store_product_snapshots;
|
||||||
|
|
||||||
|
-- Add indexes for the views' underlying tables
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_dispensary ON store_products(dispensary_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_stock ON store_products(stock_status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_snapshots_product ON store_product_snapshots(store_product_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_snapshots_captured ON store_product_snapshots(captured_at DESC);
|
||||||
12
backend/migrations/073_proxy_timezone.sql
Normal file
12
backend/migrations/073_proxy_timezone.sql
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
-- Add timezone column to proxies table for geo-consistent fingerprinting
|
||||||
|
-- This allows matching Accept-Language and other headers to proxy location
|
||||||
|
|
||||||
|
ALTER TABLE proxies
|
||||||
|
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Add timezone to failed_proxies as well
|
||||||
|
ALTER TABLE failed_proxies
|
||||||
|
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
|
||||||
|
|
||||||
|
-- Comment explaining usage
|
||||||
|
COMMENT ON COLUMN proxies.timezone IS 'IANA timezone (e.g., America/Phoenix) for geo-consistent fingerprinting';
|
||||||
322
backend/migrations/074_worker_task_queue.sql
Normal file
322
backend/migrations/074_worker_task_queue.sql
Normal file
@@ -0,0 +1,322 @@
|
|||||||
|
-- Migration 074: Worker Task Queue System
|
||||||
|
-- Implements role-based task queue with per-store locking and capacity tracking
|
||||||
|
|
||||||
|
-- Task queue table
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_tasks (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Task identification
|
||||||
|
role VARCHAR(50) NOT NULL, -- store_discovery, entry_point_discovery, product_discovery, product_resync, analytics_refresh
|
||||||
|
dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||||
|
platform VARCHAR(20), -- dutchie, jane, treez, etc.
|
||||||
|
|
||||||
|
-- Task state
|
||||||
|
status VARCHAR(20) NOT NULL DEFAULT 'pending',
|
||||||
|
priority INTEGER DEFAULT 0, -- Higher = more urgent
|
||||||
|
|
||||||
|
-- Scheduling
|
||||||
|
scheduled_for TIMESTAMPTZ, -- For batch scheduling (e.g., every 4 hours)
|
||||||
|
|
||||||
|
-- Ownership
|
||||||
|
worker_id VARCHAR(100), -- Pod name or worker ID
|
||||||
|
claimed_at TIMESTAMPTZ,
|
||||||
|
started_at TIMESTAMPTZ,
|
||||||
|
completed_at TIMESTAMPTZ,
|
||||||
|
last_heartbeat_at TIMESTAMPTZ,
|
||||||
|
|
||||||
|
-- Results
|
||||||
|
result JSONB, -- Task output data
|
||||||
|
error_message TEXT,
|
||||||
|
retry_count INTEGER DEFAULT 0,
|
||||||
|
max_retries INTEGER DEFAULT 3,
|
||||||
|
|
||||||
|
-- Metadata
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
-- Constraints
|
||||||
|
CONSTRAINT valid_status CHECK (status IN ('pending', 'claimed', 'running', 'completed', 'failed', 'stale'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for efficient task claiming
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_pending
|
||||||
|
ON worker_tasks(role, priority DESC, created_at ASC)
|
||||||
|
WHERE status = 'pending';
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_claimed
|
||||||
|
ON worker_tasks(worker_id, claimed_at)
|
||||||
|
WHERE status = 'claimed';
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_running
|
||||||
|
ON worker_tasks(worker_id, last_heartbeat_at)
|
||||||
|
WHERE status = 'running';
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_dispensary
|
||||||
|
ON worker_tasks(dispensary_id)
|
||||||
|
WHERE dispensary_id IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_scheduled
|
||||||
|
ON worker_tasks(scheduled_for)
|
||||||
|
WHERE status = 'pending' AND scheduled_for IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_tasks_history
|
||||||
|
ON worker_tasks(role, completed_at DESC)
|
||||||
|
WHERE status IN ('completed', 'failed');
|
||||||
|
|
||||||
|
-- Partial unique index to prevent duplicate active tasks per store
|
||||||
|
-- Only one task can be claimed/running for a given dispensary at a time
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_tasks_unique_active_store
|
||||||
|
ON worker_tasks(dispensary_id)
|
||||||
|
WHERE status IN ('claimed', 'running') AND dispensary_id IS NOT NULL;
|
||||||
|
|
||||||
|
-- Worker registration table (tracks active workers)
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_registry (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id VARCHAR(100) UNIQUE NOT NULL,
|
||||||
|
role VARCHAR(50) NOT NULL,
|
||||||
|
pod_name VARCHAR(100),
|
||||||
|
hostname VARCHAR(100),
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
tasks_completed INTEGER DEFAULT 0,
|
||||||
|
tasks_failed INTEGER DEFAULT 0,
|
||||||
|
status VARCHAR(20) DEFAULT 'active',
|
||||||
|
|
||||||
|
CONSTRAINT valid_worker_status CHECK (status IN ('active', 'idle', 'offline'))
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_role
|
||||||
|
ON worker_registry(role, status);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat
|
||||||
|
ON worker_registry(last_heartbeat_at)
|
||||||
|
WHERE status = 'active';
|
||||||
|
|
||||||
|
-- Task completion tracking (summarized history)
|
||||||
|
CREATE TABLE IF NOT EXISTS task_completion_log (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
role VARCHAR(50) NOT NULL,
|
||||||
|
date DATE NOT NULL DEFAULT CURRENT_DATE,
|
||||||
|
hour INTEGER NOT NULL DEFAULT EXTRACT(HOUR FROM NOW()),
|
||||||
|
|
||||||
|
tasks_created INTEGER DEFAULT 0,
|
||||||
|
tasks_completed INTEGER DEFAULT 0,
|
||||||
|
tasks_failed INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
avg_duration_sec NUMERIC(10,2),
|
||||||
|
min_duration_sec NUMERIC(10,2),
|
||||||
|
max_duration_sec NUMERIC(10,2),
|
||||||
|
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
|
||||||
|
UNIQUE(role, date, hour)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Capacity planning view
|
||||||
|
CREATE OR REPLACE VIEW v_worker_capacity AS
|
||||||
|
SELECT
|
||||||
|
role,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending') as pending_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending' AND (scheduled_for IS NULL OR scheduled_for <= NOW())) as ready_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'claimed') as claimed_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'running') as running_tasks,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') as completed_last_hour,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '1 hour') as failed_last_hour,
|
||||||
|
COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) as active_workers,
|
||||||
|
AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||||
|
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') as avg_duration_sec,
|
||||||
|
-- Capacity planning metrics
|
||||||
|
CASE
|
||||||
|
WHEN COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') > 0
|
||||||
|
THEN 3600.0 / NULLIF(AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||||
|
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour'), 0)
|
||||||
|
ELSE NULL
|
||||||
|
END as tasks_per_worker_hour,
|
||||||
|
-- Estimated time to drain queue
|
||||||
|
CASE
|
||||||
|
WHEN COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) > 0
|
||||||
|
AND COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') > 0
|
||||||
|
THEN COUNT(*) FILTER (WHERE status = 'pending') / NULLIF(
|
||||||
|
COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) *
|
||||||
|
(3600.0 / NULLIF(AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||||
|
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour'), 0)),
|
||||||
|
0
|
||||||
|
)
|
||||||
|
ELSE NULL
|
||||||
|
END as estimated_hours_to_drain
|
||||||
|
FROM worker_tasks
|
||||||
|
GROUP BY role;
|
||||||
|
|
||||||
|
-- Task history view (for UI)
|
||||||
|
CREATE OR REPLACE VIEW v_task_history AS
|
||||||
|
SELECT
|
||||||
|
t.id,
|
||||||
|
t.role,
|
||||||
|
t.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
t.platform,
|
||||||
|
t.status,
|
||||||
|
t.priority,
|
||||||
|
t.worker_id,
|
||||||
|
t.scheduled_for,
|
||||||
|
t.claimed_at,
|
||||||
|
t.started_at,
|
||||||
|
t.completed_at,
|
||||||
|
t.error_message,
|
||||||
|
t.retry_count,
|
||||||
|
t.created_at,
|
||||||
|
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
|
||||||
|
FROM worker_tasks t
|
||||||
|
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
|
||||||
|
ORDER BY t.created_at DESC;
|
||||||
|
|
||||||
|
-- Function to claim a task atomically
|
||||||
|
CREATE OR REPLACE FUNCTION claim_task(
|
||||||
|
p_role VARCHAR(50),
|
||||||
|
p_worker_id VARCHAR(100)
|
||||||
|
) RETURNS worker_tasks AS $$
|
||||||
|
DECLARE
|
||||||
|
claimed_task worker_tasks;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'claimed',
|
||||||
|
worker_id = p_worker_id,
|
||||||
|
claimed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_tasks
|
||||||
|
WHERE role = p_role
|
||||||
|
AND status = 'pending'
|
||||||
|
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||||
|
-- Exclude stores that already have an active task
|
||||||
|
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||||
|
SELECT dispensary_id FROM worker_tasks
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND dispensary_id IS NOT NULL
|
||||||
|
))
|
||||||
|
ORDER BY priority DESC, created_at ASC
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING * INTO claimed_task;
|
||||||
|
|
||||||
|
RETURN claimed_task;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to mark stale tasks (workers that died)
|
||||||
|
CREATE OR REPLACE FUNCTION recover_stale_tasks(
|
||||||
|
stale_threshold_minutes INTEGER DEFAULT 10
|
||||||
|
) RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
recovered_count INTEGER;
|
||||||
|
BEGIN
|
||||||
|
WITH stale AS (
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'pending',
|
||||||
|
worker_id = NULL,
|
||||||
|
claimed_at = NULL,
|
||||||
|
started_at = NULL,
|
||||||
|
retry_count = retry_count + 1,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
|
||||||
|
AND retry_count < max_retries
|
||||||
|
RETURNING id
|
||||||
|
)
|
||||||
|
SELECT COUNT(*) INTO recovered_count FROM stale;
|
||||||
|
|
||||||
|
-- Mark tasks that exceeded retries as failed
|
||||||
|
UPDATE worker_tasks
|
||||||
|
SET
|
||||||
|
status = 'failed',
|
||||||
|
error_message = 'Exceeded max retries after worker failures',
|
||||||
|
completed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE status IN ('claimed', 'running')
|
||||||
|
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
|
||||||
|
AND retry_count >= max_retries;
|
||||||
|
|
||||||
|
RETURN recovered_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to generate daily resync tasks
|
||||||
|
CREATE OR REPLACE FUNCTION generate_resync_tasks(
|
||||||
|
p_batches_per_day INTEGER DEFAULT 6, -- Every 4 hours
|
||||||
|
p_date DATE DEFAULT CURRENT_DATE
|
||||||
|
) RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
store_count INTEGER;
|
||||||
|
stores_per_batch INTEGER;
|
||||||
|
batch_num INTEGER;
|
||||||
|
scheduled_time TIMESTAMPTZ;
|
||||||
|
created_count INTEGER := 0;
|
||||||
|
BEGIN
|
||||||
|
-- Count active stores that need resync
|
||||||
|
SELECT COUNT(*) INTO store_count
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE crawl_enabled = true
|
||||||
|
AND menu_type = 'dutchie'
|
||||||
|
AND platform_dispensary_id IS NOT NULL;
|
||||||
|
|
||||||
|
IF store_count = 0 THEN
|
||||||
|
RETURN 0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
stores_per_batch := CEIL(store_count::NUMERIC / p_batches_per_day);
|
||||||
|
|
||||||
|
FOR batch_num IN 0..(p_batches_per_day - 1) LOOP
|
||||||
|
scheduled_time := p_date + (batch_num * 4 || ' hours')::INTERVAL;
|
||||||
|
|
||||||
|
INSERT INTO worker_tasks (role, dispensary_id, platform, scheduled_for, priority)
|
||||||
|
SELECT
|
||||||
|
'product_resync',
|
||||||
|
d.id,
|
||||||
|
'dutchie',
|
||||||
|
scheduled_time,
|
||||||
|
0
|
||||||
|
FROM (
|
||||||
|
SELECT id, ROW_NUMBER() OVER (ORDER BY id) as rn
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE crawl_enabled = true
|
||||||
|
AND menu_type = 'dutchie'
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
|
) d
|
||||||
|
WHERE d.rn > (batch_num * stores_per_batch)
|
||||||
|
AND d.rn <= ((batch_num + 1) * stores_per_batch)
|
||||||
|
ON CONFLICT DO NOTHING;
|
||||||
|
|
||||||
|
GET DIAGNOSTICS created_count = created_count + ROW_COUNT;
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
RETURN created_count;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Trigger to update timestamp
|
||||||
|
CREATE OR REPLACE FUNCTION update_worker_tasks_timestamp()
|
||||||
|
RETURNS TRIGGER AS $$
|
||||||
|
BEGIN
|
||||||
|
NEW.updated_at = NOW();
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
DROP TRIGGER IF EXISTS worker_tasks_updated_at ON worker_tasks;
|
||||||
|
CREATE TRIGGER worker_tasks_updated_at
|
||||||
|
BEFORE UPDATE ON worker_tasks
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE FUNCTION update_worker_tasks_timestamp();
|
||||||
|
|
||||||
|
-- Comments
|
||||||
|
COMMENT ON TABLE worker_tasks IS 'Central task queue for all worker roles';
|
||||||
|
COMMENT ON TABLE worker_registry IS 'Registry of active workers and their stats';
|
||||||
|
COMMENT ON TABLE task_completion_log IS 'Hourly aggregated task completion metrics';
|
||||||
|
COMMENT ON VIEW v_worker_capacity IS 'Real-time capacity planning metrics per role';
|
||||||
|
COMMENT ON VIEW v_task_history IS 'Task history with dispensary details for UI';
|
||||||
|
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task for a worker, respecting per-store locking';
|
||||||
|
COMMENT ON FUNCTION recover_stale_tasks IS 'Release tasks from dead workers back to pending';
|
||||||
|
COMMENT ON FUNCTION generate_resync_tasks IS 'Generate daily product resync tasks in batches';
|
||||||
13
backend/migrations/075_consecutive_misses.sql
Normal file
13
backend/migrations/075_consecutive_misses.sql
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
-- Migration 075: Add consecutive_misses column to store_products
|
||||||
|
-- Used to track how many consecutive crawls a product has been missing from the feed
|
||||||
|
-- After 3 consecutive misses, product is marked as OOS
|
||||||
|
|
||||||
|
ALTER TABLE store_products
|
||||||
|
ADD COLUMN IF NOT EXISTS consecutive_misses INTEGER NOT NULL DEFAULT 0;
|
||||||
|
|
||||||
|
-- Index for finding products that need OOS check
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_store_products_consecutive_misses
|
||||||
|
ON store_products (dispensary_id, consecutive_misses)
|
||||||
|
WHERE consecutive_misses > 0;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN store_products.consecutive_misses IS 'Number of consecutive crawls where product was not in feed. Reset to 0 when seen. At 3, mark OOS.';
|
||||||
71
backend/migrations/076_visitor_analytics.sql
Normal file
71
backend/migrations/076_visitor_analytics.sql
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
-- Visitor location analytics for Findagram
|
||||||
|
-- Tracks visitor locations to understand popular areas
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS visitor_locations (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
|
||||||
|
-- Location data (from IP lookup)
|
||||||
|
ip_hash VARCHAR(64), -- Hashed IP for privacy (SHA256)
|
||||||
|
city VARCHAR(100),
|
||||||
|
state VARCHAR(100),
|
||||||
|
state_code VARCHAR(10),
|
||||||
|
country VARCHAR(100),
|
||||||
|
country_code VARCHAR(10),
|
||||||
|
latitude DECIMAL(10, 7),
|
||||||
|
longitude DECIMAL(10, 7),
|
||||||
|
|
||||||
|
-- Visit metadata
|
||||||
|
domain VARCHAR(50) NOT NULL, -- 'findagram.co', 'findadispo.com', etc.
|
||||||
|
page_path VARCHAR(255), -- '/products', '/dispensaries/123', etc.
|
||||||
|
referrer VARCHAR(500),
|
||||||
|
user_agent VARCHAR(500),
|
||||||
|
|
||||||
|
-- Session tracking
|
||||||
|
session_id VARCHAR(64), -- For grouping page views in a session
|
||||||
|
|
||||||
|
-- Timestamps
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for analytics queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_domain ON visitor_locations(domain);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_city_state ON visitor_locations(city, state_code);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_created_at ON visitor_locations(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_locations_session ON visitor_locations(session_id);
|
||||||
|
|
||||||
|
-- Aggregated daily stats (materialized for performance)
|
||||||
|
CREATE TABLE IF NOT EXISTS visitor_location_stats (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
date DATE NOT NULL,
|
||||||
|
domain VARCHAR(50) NOT NULL,
|
||||||
|
city VARCHAR(100),
|
||||||
|
state VARCHAR(100),
|
||||||
|
state_code VARCHAR(10),
|
||||||
|
country_code VARCHAR(10),
|
||||||
|
|
||||||
|
-- Metrics
|
||||||
|
visit_count INTEGER DEFAULT 0,
|
||||||
|
unique_sessions INTEGER DEFAULT 0,
|
||||||
|
|
||||||
|
UNIQUE(date, domain, city, state_code, country_code)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_date ON visitor_location_stats(date);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_domain ON visitor_location_stats(domain);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_visitor_stats_state ON visitor_location_stats(state_code);
|
||||||
|
|
||||||
|
-- View for easy querying of top locations
|
||||||
|
CREATE OR REPLACE VIEW v_top_visitor_locations AS
|
||||||
|
SELECT
|
||||||
|
domain,
|
||||||
|
city,
|
||||||
|
state,
|
||||||
|
state_code,
|
||||||
|
country_code,
|
||||||
|
COUNT(*) as total_visits,
|
||||||
|
COUNT(DISTINCT session_id) as unique_sessions,
|
||||||
|
MAX(created_at) as last_visit
|
||||||
|
FROM visitor_locations
|
||||||
|
WHERE created_at > NOW() - INTERVAL '30 days'
|
||||||
|
GROUP BY domain, city, state, state_code, country_code
|
||||||
|
ORDER BY total_visits DESC;
|
||||||
141
backend/migrations/076_worker_registry.sql
Normal file
141
backend/migrations/076_worker_registry.sql
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
-- Migration 076: Worker Registry for Dynamic Workers
|
||||||
|
-- Workers register on startup, receive a friendly name, and report heartbeats
|
||||||
|
|
||||||
|
-- Name pool for workers (expandable, no hardcoding)
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_name_pool (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(50) UNIQUE NOT NULL,
|
||||||
|
in_use BOOLEAN DEFAULT FALSE,
|
||||||
|
assigned_to VARCHAR(100), -- worker_id
|
||||||
|
assigned_at TIMESTAMPTZ,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Seed with initial names (can add more via API)
|
||||||
|
INSERT INTO worker_name_pool (name) VALUES
|
||||||
|
('Alice'), ('Bella'), ('Clara'), ('Diana'), ('Elena'),
|
||||||
|
('Fiona'), ('Grace'), ('Hazel'), ('Iris'), ('Julia'),
|
||||||
|
('Katie'), ('Luna'), ('Mia'), ('Nora'), ('Olive'),
|
||||||
|
('Pearl'), ('Quinn'), ('Rosa'), ('Sara'), ('Tara'),
|
||||||
|
('Uma'), ('Vera'), ('Wendy'), ('Xena'), ('Yuki'), ('Zara'),
|
||||||
|
('Amber'), ('Blake'), ('Coral'), ('Dawn'), ('Echo'),
|
||||||
|
('Fleur'), ('Gem'), ('Haven'), ('Ivy'), ('Jade'),
|
||||||
|
('Kira'), ('Lotus'), ('Maple'), ('Nova'), ('Onyx'),
|
||||||
|
('Pixel'), ('Quest'), ('Raven'), ('Sage'), ('Terra'),
|
||||||
|
('Unity'), ('Violet'), ('Willow'), ('Xylo'), ('Yara'), ('Zen')
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
|
|
||||||
|
-- Worker registry - tracks active workers
|
||||||
|
CREATE TABLE IF NOT EXISTS worker_registry (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
worker_id VARCHAR(100) UNIQUE NOT NULL, -- e.g., "pod-abc123" or uuid
|
||||||
|
friendly_name VARCHAR(50), -- assigned from pool
|
||||||
|
role VARCHAR(50) NOT NULL, -- task role
|
||||||
|
pod_name VARCHAR(100), -- k8s pod name
|
||||||
|
hostname VARCHAR(100), -- machine hostname
|
||||||
|
ip_address VARCHAR(50), -- worker IP
|
||||||
|
status VARCHAR(20) DEFAULT 'starting', -- starting, active, idle, offline, terminated
|
||||||
|
started_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
last_task_at TIMESTAMPTZ,
|
||||||
|
tasks_completed INTEGER DEFAULT 0,
|
||||||
|
tasks_failed INTEGER DEFAULT 0,
|
||||||
|
current_task_id INTEGER,
|
||||||
|
metadata JSONB DEFAULT '{}',
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Indexes for worker registry
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_status ON worker_registry(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_role ON worker_registry(role);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat ON worker_registry(last_heartbeat_at);
|
||||||
|
|
||||||
|
-- Function to assign a name to a new worker
|
||||||
|
CREATE OR REPLACE FUNCTION assign_worker_name(p_worker_id VARCHAR(100))
|
||||||
|
RETURNS VARCHAR(50) AS $$
|
||||||
|
DECLARE
|
||||||
|
v_name VARCHAR(50);
|
||||||
|
BEGIN
|
||||||
|
-- Try to get an unused name
|
||||||
|
UPDATE worker_name_pool
|
||||||
|
SET in_use = TRUE, assigned_to = p_worker_id, assigned_at = NOW()
|
||||||
|
WHERE id = (
|
||||||
|
SELECT id FROM worker_name_pool
|
||||||
|
WHERE in_use = FALSE
|
||||||
|
ORDER BY RANDOM()
|
||||||
|
LIMIT 1
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
)
|
||||||
|
RETURNING name INTO v_name;
|
||||||
|
|
||||||
|
-- If no names available, generate one
|
||||||
|
IF v_name IS NULL THEN
|
||||||
|
v_name := 'Worker-' || SUBSTRING(p_worker_id FROM 1 FOR 8);
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN v_name;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to release a worker's name back to the pool
|
||||||
|
CREATE OR REPLACE FUNCTION release_worker_name(p_worker_id VARCHAR(100))
|
||||||
|
RETURNS VOID AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_name_pool
|
||||||
|
SET in_use = FALSE, assigned_to = NULL, assigned_at = NULL
|
||||||
|
WHERE assigned_to = p_worker_id;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Function to mark stale workers as offline
|
||||||
|
CREATE OR REPLACE FUNCTION mark_stale_workers(stale_threshold_minutes INTEGER DEFAULT 5)
|
||||||
|
RETURNS INTEGER AS $$
|
||||||
|
DECLARE
|
||||||
|
v_count INTEGER;
|
||||||
|
BEGIN
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET status = 'offline', updated_at = NOW()
|
||||||
|
WHERE status IN ('active', 'idle', 'starting')
|
||||||
|
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
|
||||||
|
RETURNING COUNT(*) INTO v_count;
|
||||||
|
|
||||||
|
-- Release names from offline workers
|
||||||
|
PERFORM release_worker_name(worker_id)
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE status = 'offline'
|
||||||
|
AND last_heartbeat_at < NOW() - INTERVAL '30 minutes';
|
||||||
|
|
||||||
|
RETURN COALESCE(v_count, 0);
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- View for dashboard
|
||||||
|
CREATE OR REPLACE VIEW v_active_workers AS
|
||||||
|
SELECT
|
||||||
|
wr.id,
|
||||||
|
wr.worker_id,
|
||||||
|
wr.friendly_name,
|
||||||
|
wr.role,
|
||||||
|
wr.status,
|
||||||
|
wr.pod_name,
|
||||||
|
wr.hostname,
|
||||||
|
wr.started_at,
|
||||||
|
wr.last_heartbeat_at,
|
||||||
|
wr.last_task_at,
|
||||||
|
wr.tasks_completed,
|
||||||
|
wr.tasks_failed,
|
||||||
|
wr.current_task_id,
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
|
CASE
|
||||||
|
WHEN wr.status = 'offline' THEN 'offline'
|
||||||
|
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
|
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
|
||||||
|
ELSE 'ready'
|
||||||
|
END as health_status
|
||||||
|
FROM worker_registry wr
|
||||||
|
WHERE wr.status != 'terminated'
|
||||||
|
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
|
||||||
|
|
||||||
|
COMMENT ON TABLE worker_registry IS 'Tracks all workers that have registered with the system';
|
||||||
|
COMMENT ON TABLE worker_name_pool IS 'Pool of friendly names for workers - expandable via API';
|
||||||
35
backend/migrations/077_click_events_location.sql
Normal file
35
backend/migrations/077_click_events_location.sql
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
-- Migration: Add visitor location and dispensary name to click events
|
||||||
|
-- Captures where visitors are clicking from and which dispensary
|
||||||
|
|
||||||
|
-- Add visitor location columns
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_city VARCHAR(100);
|
||||||
|
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_state VARCHAR(10);
|
||||||
|
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_lat DECIMAL(10, 7);
|
||||||
|
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS visitor_lng DECIMAL(10, 7);
|
||||||
|
|
||||||
|
-- Add dispensary name for easier reporting
|
||||||
|
ALTER TABLE product_click_events
|
||||||
|
ADD COLUMN IF NOT EXISTS dispensary_name VARCHAR(255);
|
||||||
|
|
||||||
|
-- Create index for location-based analytics
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_state
|
||||||
|
ON product_click_events(visitor_state)
|
||||||
|
WHERE visitor_state IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_city
|
||||||
|
ON product_click_events(visitor_city)
|
||||||
|
WHERE visitor_city IS NOT NULL;
|
||||||
|
|
||||||
|
-- Add comments
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_city IS 'City where the visitor is located (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_state IS 'State where the visitor is located (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_lat IS 'Visitor latitude (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.visitor_lng IS 'Visitor longitude (from IP geolocation)';
|
||||||
|
COMMENT ON COLUMN product_click_events.dispensary_name IS 'Name of the dispensary (denormalized for easier reporting)';
|
||||||
19
backend/node_modules/.package-lock.json
generated
vendored
19
backend/node_modules/.package-lock.json
generated
vendored
@@ -1026,6 +1026,17 @@
|
|||||||
"url": "https://github.com/sponsors/fb55"
|
"url": "https://github.com/sponsors/fb55"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/csv-parser": {
|
||||||
|
"version": "3.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
|
||||||
|
"integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
|
||||||
|
"bin": {
|
||||||
|
"csv-parser": "bin/csv-parser"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/data-uri-to-buffer": {
|
"node_modules/data-uri-to-buffer": {
|
||||||
"version": "6.0.2",
|
"version": "6.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
||||||
@@ -2235,6 +2246,14 @@
|
|||||||
"node": ">= 12"
|
"node": ">= 12"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/ip2location-nodejs": {
|
||||||
|
"version": "9.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/ip2location-nodejs/-/ip2location-nodejs-9.7.0.tgz",
|
||||||
|
"integrity": "sha512-eQ4T5TXm1cx0+pQcRycPiuaiRuoDEMd9O89Be7Ugk555qi9UY9enXSznkkqr3kQRyUaXx7zj5dORC5LGTPOttA==",
|
||||||
|
"dependencies": {
|
||||||
|
"csv-parser": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/ipaddr.js": {
|
"node_modules/ipaddr.js": {
|
||||||
"version": "2.2.0",
|
"version": "2.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
||||||
|
|||||||
20
backend/package-lock.json
generated
20
backend/package-lock.json
generated
@@ -21,6 +21,7 @@
|
|||||||
"helmet": "^7.1.0",
|
"helmet": "^7.1.0",
|
||||||
"https-proxy-agent": "^7.0.2",
|
"https-proxy-agent": "^7.0.2",
|
||||||
"ioredis": "^5.8.2",
|
"ioredis": "^5.8.2",
|
||||||
|
"ip2location-nodejs": "^9.7.0",
|
||||||
"ipaddr.js": "^2.2.0",
|
"ipaddr.js": "^2.2.0",
|
||||||
"jsonwebtoken": "^9.0.2",
|
"jsonwebtoken": "^9.0.2",
|
||||||
"minio": "^7.1.3",
|
"minio": "^7.1.3",
|
||||||
@@ -1531,6 +1532,17 @@
|
|||||||
"url": "https://github.com/sponsors/fb55"
|
"url": "https://github.com/sponsors/fb55"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/csv-parser": {
|
||||||
|
"version": "3.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
|
||||||
|
"integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
|
||||||
|
"bin": {
|
||||||
|
"csv-parser": "bin/csv-parser"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/data-uri-to-buffer": {
|
"node_modules/data-uri-to-buffer": {
|
||||||
"version": "6.0.2",
|
"version": "6.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
|
||||||
@@ -2754,6 +2766,14 @@
|
|||||||
"node": ">= 12"
|
"node": ">= 12"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/ip2location-nodejs": {
|
||||||
|
"version": "9.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/ip2location-nodejs/-/ip2location-nodejs-9.7.0.tgz",
|
||||||
|
"integrity": "sha512-eQ4T5TXm1cx0+pQcRycPiuaiRuoDEMd9O89Be7Ugk555qi9UY9enXSznkkqr3kQRyUaXx7zj5dORC5LGTPOttA==",
|
||||||
|
"dependencies": {
|
||||||
|
"csv-parser": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/ipaddr.js": {
|
"node_modules/ipaddr.js": {
|
||||||
"version": "2.2.0",
|
"version": "2.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
||||||
|
|||||||
@@ -35,6 +35,7 @@
|
|||||||
"helmet": "^7.1.0",
|
"helmet": "^7.1.0",
|
||||||
"https-proxy-agent": "^7.0.2",
|
"https-proxy-agent": "^7.0.2",
|
||||||
"ioredis": "^5.8.2",
|
"ioredis": "^5.8.2",
|
||||||
|
"ip2location-nodejs": "^9.7.0",
|
||||||
"ipaddr.js": "^2.2.0",
|
"ipaddr.js": "^2.2.0",
|
||||||
"jsonwebtoken": "^9.0.2",
|
"jsonwebtoken": "^9.0.2",
|
||||||
"minio": "^7.1.3",
|
"minio": "^7.1.3",
|
||||||
|
|||||||
BIN
backend/public/downloads/cannaiq-menus-1.5.4.zip
Normal file
BIN
backend/public/downloads/cannaiq-menus-1.5.4.zip
Normal file
Binary file not shown.
65
backend/scripts/download-ip2location.sh
Executable file
65
backend/scripts/download-ip2location.sh
Executable file
@@ -0,0 +1,65 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Download IP2Location LITE DB3 (City-level) database
|
||||||
|
# Free for commercial use with attribution
|
||||||
|
# https://lite.ip2location.com/database/db3-ip-country-region-city
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
DATA_DIR="${1:-./data/ip2location}"
|
||||||
|
DB_FILE="IP2LOCATION-LITE-DB3.BIN"
|
||||||
|
|
||||||
|
mkdir -p "$DATA_DIR"
|
||||||
|
cd "$DATA_DIR"
|
||||||
|
|
||||||
|
echo "Downloading IP2Location LITE DB3 database..."
|
||||||
|
|
||||||
|
# IP2Location LITE DB3 - includes city, region, country, lat/lng
|
||||||
|
# You need to register at https://lite.ip2location.com/ to get a download token
|
||||||
|
# Then set IP2LOCATION_TOKEN environment variable
|
||||||
|
|
||||||
|
if [ -z "$IP2LOCATION_TOKEN" ]; then
|
||||||
|
echo ""
|
||||||
|
echo "ERROR: IP2LOCATION_TOKEN not set"
|
||||||
|
echo ""
|
||||||
|
echo "To download the database:"
|
||||||
|
echo "1. Register free at https://lite.ip2location.com/"
|
||||||
|
echo "2. Get your download token from the dashboard"
|
||||||
|
echo "3. Run: IP2LOCATION_TOKEN=your_token ./scripts/download-ip2location.sh"
|
||||||
|
echo ""
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Download DB3.LITE (IPv4 + City)
|
||||||
|
DOWNLOAD_URL="https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB3LITEBIN"
|
||||||
|
|
||||||
|
echo "Downloading from IP2Location..."
|
||||||
|
curl -L -o ip2location.zip "$DOWNLOAD_URL"
|
||||||
|
|
||||||
|
echo "Extracting..."
|
||||||
|
unzip -o ip2location.zip
|
||||||
|
|
||||||
|
# Rename to standard name
|
||||||
|
if [ -f "IP2LOCATION-LITE-DB3.BIN" ]; then
|
||||||
|
echo "Database ready: $DATA_DIR/IP2LOCATION-LITE-DB3.BIN"
|
||||||
|
elif [ -f "IP-COUNTRY-REGION-CITY.BIN" ]; then
|
||||||
|
mv "IP-COUNTRY-REGION-CITY.BIN" "$DB_FILE"
|
||||||
|
echo "Database ready: $DATA_DIR/$DB_FILE"
|
||||||
|
else
|
||||||
|
# Find whatever BIN file was extracted
|
||||||
|
BIN_FILE=$(ls *.BIN 2>/dev/null | head -1)
|
||||||
|
if [ -n "$BIN_FILE" ]; then
|
||||||
|
mv "$BIN_FILE" "$DB_FILE"
|
||||||
|
echo "Database ready: $DATA_DIR/$DB_FILE"
|
||||||
|
else
|
||||||
|
echo "ERROR: No BIN file found in archive"
|
||||||
|
ls -la
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
rm -f ip2location.zip *.txt LICENSE* README*
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Done! Database saved to: $DATA_DIR/$DB_FILE"
|
||||||
|
echo "Update monthly by re-running this script."
|
||||||
@@ -29,6 +29,11 @@ const TRUSTED_ORIGINS = [
|
|||||||
'http://localhost:5173',
|
'http://localhost:5173',
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// Pattern-based trusted origins (wildcards)
|
||||||
|
const TRUSTED_ORIGIN_PATTERNS = [
|
||||||
|
/^https:\/\/.*\.cannabrands\.app$/, // *.cannabrands.app
|
||||||
|
];
|
||||||
|
|
||||||
// Trusted IPs for internal pod-to-pod communication
|
// Trusted IPs for internal pod-to-pod communication
|
||||||
const TRUSTED_IPS = [
|
const TRUSTED_IPS = [
|
||||||
'127.0.0.1',
|
'127.0.0.1',
|
||||||
@@ -42,8 +47,16 @@ const TRUSTED_IPS = [
|
|||||||
function isTrustedRequest(req: Request): boolean {
|
function isTrustedRequest(req: Request): boolean {
|
||||||
// Check origin header
|
// Check origin header
|
||||||
const origin = req.headers.origin;
|
const origin = req.headers.origin;
|
||||||
if (origin && TRUSTED_ORIGINS.includes(origin)) {
|
if (origin) {
|
||||||
return true;
|
if (TRUSTED_ORIGINS.includes(origin)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// Check pattern-based origins (wildcards like *.cannabrands.app)
|
||||||
|
for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
|
||||||
|
if (pattern.test(origin)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check referer header (for same-origin requests without CORS)
|
// Check referer header (for same-origin requests without CORS)
|
||||||
@@ -54,6 +67,18 @@ function isTrustedRequest(req: Request): boolean {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Check pattern-based referers
|
||||||
|
try {
|
||||||
|
const refererUrl = new URL(referer);
|
||||||
|
const refererOrigin = refererUrl.origin;
|
||||||
|
for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
|
||||||
|
if (pattern.test(refererOrigin)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Invalid referer URL, skip
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check IP for internal requests (pod-to-pod, localhost)
|
// Check IP for internal requests (pod-to-pod, localhost)
|
||||||
|
|||||||
141
backend/src/db/auto-migrate.ts
Normal file
141
backend/src/db/auto-migrate.ts
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
/**
|
||||||
|
* Auto-Migration System
|
||||||
|
*
|
||||||
|
* Runs SQL migration files from the migrations/ folder automatically on server startup.
|
||||||
|
* Uses a schema_migrations table to track which migrations have been applied.
|
||||||
|
*
|
||||||
|
* Safe to run multiple times - only applies new migrations.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import fs from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
|
||||||
|
const MIGRATIONS_DIR = path.join(__dirname, '../../migrations');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure schema_migrations table exists
|
||||||
|
*/
|
||||||
|
async function ensureMigrationsTable(pool: Pool): Promise<void> {
|
||||||
|
await pool.query(`
|
||||||
|
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get list of already-applied migrations
|
||||||
|
*/
|
||||||
|
async function getAppliedMigrations(pool: Pool): Promise<Set<string>> {
|
||||||
|
const result = await pool.query('SELECT name FROM schema_migrations');
|
||||||
|
return new Set(result.rows.map(row => row.name));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get list of migration files from disk
|
||||||
|
*/
|
||||||
|
function getMigrationFiles(): string[] {
|
||||||
|
if (!fs.existsSync(MIGRATIONS_DIR)) {
|
||||||
|
console.log('[AutoMigrate] No migrations directory found');
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return fs.readdirSync(MIGRATIONS_DIR)
|
||||||
|
.filter(f => f.endsWith('.sql'))
|
||||||
|
.sort(); // Sort alphabetically (001_, 002_, etc.)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run a single migration file
|
||||||
|
*/
|
||||||
|
async function runMigration(pool: Pool, filename: string): Promise<void> {
|
||||||
|
const filepath = path.join(MIGRATIONS_DIR, filename);
|
||||||
|
const sql = fs.readFileSync(filepath, 'utf8');
|
||||||
|
|
||||||
|
const client = await pool.connect();
|
||||||
|
try {
|
||||||
|
await client.query('BEGIN');
|
||||||
|
|
||||||
|
// Run the migration SQL
|
||||||
|
await client.query(sql);
|
||||||
|
|
||||||
|
// Record that this migration was applied
|
||||||
|
await client.query(
|
||||||
|
'INSERT INTO schema_migrations (name) VALUES ($1) ON CONFLICT (name) DO NOTHING',
|
||||||
|
[filename]
|
||||||
|
);
|
||||||
|
|
||||||
|
await client.query('COMMIT');
|
||||||
|
console.log(`[AutoMigrate] ✓ Applied: ${filename}`);
|
||||||
|
} catch (error: any) {
|
||||||
|
await client.query('ROLLBACK');
|
||||||
|
console.error(`[AutoMigrate] ✗ Failed: ${filename}`);
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run all pending migrations
|
||||||
|
*
|
||||||
|
* @param pool - Database connection pool
|
||||||
|
* @returns Number of migrations applied
|
||||||
|
*/
|
||||||
|
export async function runAutoMigrations(pool: Pool): Promise<number> {
|
||||||
|
console.log('[AutoMigrate] Checking for pending migrations...');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Ensure migrations table exists
|
||||||
|
await ensureMigrationsTable(pool);
|
||||||
|
|
||||||
|
// Get applied and available migrations
|
||||||
|
const applied = await getAppliedMigrations(pool);
|
||||||
|
const available = getMigrationFiles();
|
||||||
|
|
||||||
|
// Find pending migrations
|
||||||
|
const pending = available.filter(f => !applied.has(f));
|
||||||
|
|
||||||
|
if (pending.length === 0) {
|
||||||
|
console.log('[AutoMigrate] No pending migrations');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[AutoMigrate] Found ${pending.length} pending migrations`);
|
||||||
|
|
||||||
|
// Run each pending migration in order
|
||||||
|
for (const filename of pending) {
|
||||||
|
await runMigration(pool, filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[AutoMigrate] Successfully applied ${pending.length} migrations`);
|
||||||
|
return pending.length;
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[AutoMigrate] Migration failed:', error.message);
|
||||||
|
// Don't crash the server - log and continue
|
||||||
|
// The specific failing migration will have been rolled back
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check migration status without running anything
|
||||||
|
*/
|
||||||
|
export async function checkMigrationStatus(pool: Pool): Promise<{
|
||||||
|
applied: string[];
|
||||||
|
pending: string[];
|
||||||
|
}> {
|
||||||
|
await ensureMigrationsTable(pool);
|
||||||
|
|
||||||
|
const applied = await getAppliedMigrations(pool);
|
||||||
|
const available = getMigrationFiles();
|
||||||
|
|
||||||
|
return {
|
||||||
|
applied: available.filter(f => applied.has(f)),
|
||||||
|
pending: available.filter(f => !applied.has(f)),
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -372,6 +372,51 @@ async function runMigrations() {
|
|||||||
ON CONFLICT (key) DO NOTHING;
|
ON CONFLICT (key) DO NOTHING;
|
||||||
`);
|
`);
|
||||||
|
|
||||||
|
// SEO Pages table
|
||||||
|
await client.query(`
|
||||||
|
CREATE TABLE IF NOT EXISTS seo_pages (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
type VARCHAR(50) NOT NULL,
|
||||||
|
slug VARCHAR(255) NOT NULL UNIQUE,
|
||||||
|
page_key VARCHAR(255) NOT NULL,
|
||||||
|
primary_keyword VARCHAR(255),
|
||||||
|
status VARCHAR(50) DEFAULT 'pending_generation',
|
||||||
|
data_source VARCHAR(100),
|
||||||
|
meta_title VARCHAR(255),
|
||||||
|
meta_description TEXT,
|
||||||
|
last_generated_at TIMESTAMPTZ,
|
||||||
|
last_reviewed_at TIMESTAMPTZ,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_seo_pages_type ON seo_pages(type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_seo_pages_status ON seo_pages(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_seo_pages_slug ON seo_pages(slug);
|
||||||
|
`);
|
||||||
|
|
||||||
|
// SEO Page Contents table
|
||||||
|
await client.query(`
|
||||||
|
CREATE TABLE IF NOT EXISTS seo_page_contents (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
page_id INTEGER NOT NULL REFERENCES seo_pages(id) ON DELETE CASCADE,
|
||||||
|
version INTEGER DEFAULT 1,
|
||||||
|
blocks JSONB NOT NULL DEFAULT '[]',
|
||||||
|
meta JSONB NOT NULL DEFAULT '{}',
|
||||||
|
meta_title VARCHAR(255),
|
||||||
|
meta_description TEXT,
|
||||||
|
h1 VARCHAR(255),
|
||||||
|
canonical_url TEXT,
|
||||||
|
og_title VARCHAR(255),
|
||||||
|
og_description TEXT,
|
||||||
|
og_image_url TEXT,
|
||||||
|
generated_by VARCHAR(50) DEFAULT 'claude',
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ DEFAULT NOW(),
|
||||||
|
UNIQUE(page_id, version)
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_seo_page_contents_page ON seo_page_contents(page_id);
|
||||||
|
`);
|
||||||
|
|
||||||
await client.query('COMMIT');
|
await client.query('COMMIT');
|
||||||
console.log('✅ Migrations completed successfully');
|
console.log('✅ Migrations completed successfully');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|||||||
200
backend/src/db/run-migrations.ts
Normal file
200
backend/src/db/run-migrations.ts
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
#!/usr/bin/env npx tsx
|
||||||
|
/**
|
||||||
|
* Database Migration Runner
|
||||||
|
*
|
||||||
|
* Runs SQL migrations from backend/migrations/*.sql in order.
|
||||||
|
* Tracks applied migrations in schema_migrations table.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/db/run-migrations.ts
|
||||||
|
*
|
||||||
|
* Environment:
|
||||||
|
* DATABASE_URL or CANNAIQ_DB_* variables
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import * as fs from 'fs/promises';
|
||||||
|
import * as path from 'path';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
function getConnectionString(): string {
|
||||||
|
if (process.env.DATABASE_URL) {
|
||||||
|
return process.env.DATABASE_URL;
|
||||||
|
}
|
||||||
|
if (process.env.CANNAIQ_DB_URL) {
|
||||||
|
return process.env.CANNAIQ_DB_URL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||||
|
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||||
|
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||||
|
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||||
|
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||||
|
|
||||||
|
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MigrationFile {
|
||||||
|
filename: string;
|
||||||
|
number: number;
|
||||||
|
path: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getMigrationFiles(migrationsDir: string): Promise<MigrationFile[]> {
|
||||||
|
const files = await fs.readdir(migrationsDir);
|
||||||
|
|
||||||
|
const migrations: MigrationFile[] = files
|
||||||
|
.filter(f => f.endsWith('.sql'))
|
||||||
|
.map(filename => {
|
||||||
|
// Extract number from filename like "005_api_tokens.sql" or "073_proxy_timezone.sql"
|
||||||
|
const match = filename.match(/^(\d+)_/);
|
||||||
|
if (!match) return null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
filename,
|
||||||
|
number: parseInt(match[1], 10),
|
||||||
|
path: path.join(migrationsDir, filename),
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter((m): m is MigrationFile => m !== null)
|
||||||
|
.sort((a, b) => a.number - b.number);
|
||||||
|
|
||||||
|
return migrations;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function ensureMigrationsTable(pool: Pool): Promise<void> {
|
||||||
|
// Migrate to filename-based tracking (handles duplicate version numbers)
|
||||||
|
// Check if old version-based PK exists
|
||||||
|
const pkCheck = await pool.query(`
|
||||||
|
SELECT constraint_name FROM information_schema.table_constraints
|
||||||
|
WHERE table_name = 'schema_migrations' AND constraint_type = 'PRIMARY KEY'
|
||||||
|
`);
|
||||||
|
|
||||||
|
if (pkCheck.rows.length === 0) {
|
||||||
|
// Table doesn't exist, create with filename as PK
|
||||||
|
await pool.query(`
|
||||||
|
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||||
|
filename VARCHAR(255) NOT NULL PRIMARY KEY,
|
||||||
|
version VARCHAR(10),
|
||||||
|
name VARCHAR(255),
|
||||||
|
applied_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
} else {
|
||||||
|
// Table exists - add filename column if missing
|
||||||
|
await pool.query(`
|
||||||
|
ALTER TABLE schema_migrations ADD COLUMN IF NOT EXISTS filename VARCHAR(255)
|
||||||
|
`);
|
||||||
|
// Populate filename from version+name for existing rows
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE schema_migrations SET filename = version || '_' || name || '.sql'
|
||||||
|
WHERE filename IS NULL
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getAppliedMigrations(pool: Pool): Promise<Set<string>> {
|
||||||
|
// Try filename first, fall back to version_name combo
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT COALESCE(filename, version || '_' || name || '.sql') as filename
|
||||||
|
FROM schema_migrations
|
||||||
|
`);
|
||||||
|
return new Set(result.rows.map(r => r.filename));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function applyMigration(pool: Pool, migration: MigrationFile): Promise<void> {
|
||||||
|
const sql = await fs.readFile(migration.path, 'utf-8');
|
||||||
|
|
||||||
|
// Extract version and name from filename like "005_api_tokens.sql"
|
||||||
|
const version = String(migration.number).padStart(3, '0');
|
||||||
|
const name = migration.filename.replace(/^\d+_/, '').replace(/\.sql$/, '');
|
||||||
|
|
||||||
|
const client = await pool.connect();
|
||||||
|
try {
|
||||||
|
await client.query('BEGIN');
|
||||||
|
|
||||||
|
// Run the migration SQL
|
||||||
|
await client.query(sql);
|
||||||
|
|
||||||
|
// Record that it was applied - use INSERT with ON CONFLICT for safety
|
||||||
|
await client.query(`
|
||||||
|
INSERT INTO schema_migrations (filename, version, name)
|
||||||
|
VALUES ($1, $2, $3)
|
||||||
|
ON CONFLICT DO NOTHING
|
||||||
|
`, [migration.filename, version, name]);
|
||||||
|
|
||||||
|
await client.query('COMMIT');
|
||||||
|
} catch (error) {
|
||||||
|
await client.query('ROLLBACK');
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const pool = new Pool({ connectionString: getConnectionString() });
|
||||||
|
|
||||||
|
// Migrations directory relative to this file
|
||||||
|
const migrationsDir = path.resolve(__dirname, '../../migrations');
|
||||||
|
|
||||||
|
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||||
|
console.log('║ DATABASE MIGRATION RUNNER ║');
|
||||||
|
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||||
|
console.log(`Migrations dir: ${migrationsDir}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Ensure tracking table exists
|
||||||
|
await ensureMigrationsTable(pool);
|
||||||
|
|
||||||
|
// Get all migration files
|
||||||
|
const allMigrations = await getMigrationFiles(migrationsDir);
|
||||||
|
console.log(`Found ${allMigrations.length} migration files`);
|
||||||
|
|
||||||
|
// Get already-applied migrations
|
||||||
|
const applied = await getAppliedMigrations(pool);
|
||||||
|
console.log(`Already applied: ${applied.size} migrations`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Find pending migrations (compare by filename)
|
||||||
|
const pending = allMigrations.filter(m => !applied.has(m.filename));
|
||||||
|
|
||||||
|
if (pending.length === 0) {
|
||||||
|
console.log('✅ No pending migrations. Database is up to date.');
|
||||||
|
await pool.end();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Pending migrations: ${pending.length}`);
|
||||||
|
console.log('─'.repeat(60));
|
||||||
|
|
||||||
|
// Apply each pending migration
|
||||||
|
for (const migration of pending) {
|
||||||
|
process.stdout.write(` ${migration.filename}... `);
|
||||||
|
try {
|
||||||
|
await applyMigration(pool, migration);
|
||||||
|
console.log('✅');
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log('❌');
|
||||||
|
console.error(`\nError applying ${migration.filename}:`);
|
||||||
|
console.error(error.message);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('');
|
||||||
|
console.log('═'.repeat(60));
|
||||||
|
console.log(`✅ Applied ${pending.length} migrations successfully`);
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Migration runner failed:', error.message);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
@@ -3,14 +3,23 @@
|
|||||||
*
|
*
|
||||||
* Main orchestrator for the Dutchie store discovery pipeline.
|
* Main orchestrator for the Dutchie store discovery pipeline.
|
||||||
*
|
*
|
||||||
* Flow:
|
* AUTOMATED FLOW (as of 2025-01):
|
||||||
* 1. Discover cities from Dutchie (or use seeded cities)
|
* 1. Fetch cities dynamically from Dutchie GraphQL (getAllCitiesByState)
|
||||||
* 2. For each city, discover store locations
|
* 2. For each city, discover store locations via ConsumerDispensaries query
|
||||||
* 3. Upsert all data to discovery tables
|
* 3. Upsert locations to dutchie_discovery_locations (keyed by platform_location_id)
|
||||||
* 4. Admin verifies locations manually
|
* 4. AUTO-VALIDATE: Check required fields (name, city, state, platform_menu_url, platform_location_id)
|
||||||
* 5. Verified locations are promoted to canonical dispensaries
|
* 5. AUTO-PROMOTE: Valid locations are upserted to dispensaries table with crawl_enabled=true
|
||||||
|
* 6. All actions logged to dutchie_promotion_log for audit
|
||||||
*
|
*
|
||||||
* This module does NOT create canonical dispensaries automatically.
|
* Tables involved:
|
||||||
|
* - dutchie_discovery_cities: Known cities for each state
|
||||||
|
* - dutchie_discovery_locations: Raw discovered store data
|
||||||
|
* - dispensaries: Canonical store records (promoted from discovery)
|
||||||
|
* - dutchie_promotion_log: Audit trail for validation/promotion
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/scripts/run-discovery.ts discover:state AZ
|
||||||
|
* npx tsx src/scripts/run-discovery.ts discover:state CA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Pool } from 'pg';
|
import { Pool } from 'pg';
|
||||||
@@ -24,11 +33,12 @@ import {
|
|||||||
getCitiesToCrawl,
|
getCitiesToCrawl,
|
||||||
getCityBySlug,
|
getCityBySlug,
|
||||||
seedKnownCities,
|
seedKnownCities,
|
||||||
ARIZONA_CITIES,
|
|
||||||
} from './city-discovery';
|
} from './city-discovery';
|
||||||
import {
|
import {
|
||||||
discoverLocationsForCity,
|
discoverLocationsForCity,
|
||||||
|
getCitiesForState,
|
||||||
} from './location-discovery';
|
} from './location-discovery';
|
||||||
|
import { promoteDiscoveredLocations } from './promotion';
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// FULL DISCOVERY
|
// FULL DISCOVERY
|
||||||
@@ -162,6 +172,42 @@ export async function runFullDiscovery(
|
|||||||
console.log(`Errors: ${totalErrors}`);
|
console.log(`Errors: ${totalErrors}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Step 4: Auto-validate and promote discovered locations
|
||||||
|
if (!dryRun && totalLocationsUpserted > 0) {
|
||||||
|
console.log('\n[Discovery] Step 4: Auto-promoting discovered locations...');
|
||||||
|
const promotionResult = await promoteDiscoveredLocations(stateCode, false);
|
||||||
|
console.log(`[Discovery] Promotion complete:`);
|
||||||
|
console.log(` Created: ${promotionResult.created} new dispensaries`);
|
||||||
|
console.log(` Updated: ${promotionResult.updated} existing dispensaries`);
|
||||||
|
console.log(` Rejected: ${promotionResult.rejected} (validation failed)`);
|
||||||
|
if (promotionResult.rejectedRecords.length > 0) {
|
||||||
|
console.log(` Rejection reasons:`);
|
||||||
|
promotionResult.rejectedRecords.slice(0, 5).forEach(r => {
|
||||||
|
console.log(` - ${r.name}: ${r.errors.join(', ')}`);
|
||||||
|
});
|
||||||
|
if (promotionResult.rejectedRecords.length > 5) {
|
||||||
|
console.log(` ... and ${promotionResult.rejectedRecords.length - 5} more`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 5: Detect dropped stores (in DB but not in discovery results)
|
||||||
|
if (!dryRun) {
|
||||||
|
console.log('\n[Discovery] Step 5: Detecting dropped stores...');
|
||||||
|
const droppedResult = await detectDroppedStores(pool, stateCode);
|
||||||
|
if (droppedResult.droppedCount > 0) {
|
||||||
|
console.log(`[Discovery] Found ${droppedResult.droppedCount} dropped stores:`);
|
||||||
|
droppedResult.droppedStores.slice(0, 10).forEach(s => {
|
||||||
|
console.log(` - ${s.name} (${s.city}, ${s.state}) - last seen: ${s.lastSeenAt}`);
|
||||||
|
});
|
||||||
|
if (droppedResult.droppedCount > 10) {
|
||||||
|
console.log(` ... and ${droppedResult.droppedCount - 10} more`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(`[Discovery] No dropped stores detected`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
cities: cityResult,
|
cities: cityResult,
|
||||||
locations: locationResults,
|
locations: locationResults,
|
||||||
@@ -171,6 +217,107 @@ export async function runFullDiscovery(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// DROPPED STORE DETECTION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface DroppedStoreResult {
|
||||||
|
droppedCount: number;
|
||||||
|
droppedStores: Array<{
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
city: string;
|
||||||
|
state: string;
|
||||||
|
platformDispensaryId: string;
|
||||||
|
lastSeenAt: string;
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect stores that exist in dispensaries but were not found in discovery.
|
||||||
|
* Marks them as status='dropped' for manual review.
|
||||||
|
*
|
||||||
|
* A store is considered "dropped" if:
|
||||||
|
* 1. It has a platform_dispensary_id (was verified via Dutchie)
|
||||||
|
* 2. It was NOT seen in the latest discovery crawl (last_seen_at in discovery < 24h ago)
|
||||||
|
* 3. It's currently marked as 'open' status
|
||||||
|
*/
|
||||||
|
export async function detectDroppedStores(
|
||||||
|
pool: Pool,
|
||||||
|
stateCode?: string
|
||||||
|
): Promise<DroppedStoreResult> {
|
||||||
|
// Find dispensaries that:
|
||||||
|
// 1. Have platform_dispensary_id (verified Dutchie stores)
|
||||||
|
// 2. Are currently 'open' status
|
||||||
|
// 3. Have a linked discovery record that wasn't seen in the last discovery run
|
||||||
|
// (last_seen_at in dutchie_discovery_locations is older than 24 hours)
|
||||||
|
const params: any[] = [];
|
||||||
|
let stateFilter = '';
|
||||||
|
|
||||||
|
if (stateCode) {
|
||||||
|
stateFilter = ` AND d.state = $1`;
|
||||||
|
params.push(stateCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
const query = `
|
||||||
|
WITH recently_seen AS (
|
||||||
|
SELECT DISTINCT platform_location_id
|
||||||
|
FROM dutchie_discovery_locations
|
||||||
|
WHERE last_seen_at > NOW() - INTERVAL '24 hours'
|
||||||
|
AND active = true
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
d.id,
|
||||||
|
d.name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
d.platform_dispensary_id,
|
||||||
|
d.updated_at as last_seen_at
|
||||||
|
FROM dispensaries d
|
||||||
|
WHERE d.platform_dispensary_id IS NOT NULL
|
||||||
|
AND d.platform = 'dutchie'
|
||||||
|
AND (d.status = 'open' OR d.status IS NULL)
|
||||||
|
AND d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id NOT IN (SELECT platform_location_id FROM recently_seen)
|
||||||
|
${stateFilter}
|
||||||
|
ORDER BY d.name
|
||||||
|
`;
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
const droppedStores = result.rows;
|
||||||
|
|
||||||
|
// Mark these stores as 'dropped' status
|
||||||
|
if (droppedStores.length > 0) {
|
||||||
|
const ids = droppedStores.map(s => s.id);
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dispensaries
|
||||||
|
SET status = 'dropped', updated_at = NOW()
|
||||||
|
WHERE id = ANY($1::int[])
|
||||||
|
`, [ids]);
|
||||||
|
|
||||||
|
// Log to promotion log for audit
|
||||||
|
for (const store of droppedStores) {
|
||||||
|
await pool.query(`
|
||||||
|
INSERT INTO dutchie_promotion_log
|
||||||
|
(dispensary_id, action, state_code, store_name, triggered_by)
|
||||||
|
VALUES ($1, 'dropped', $2, $3, 'discovery_detection')
|
||||||
|
`, [store.id, store.state, store.name]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
droppedCount: droppedStores.length,
|
||||||
|
droppedStores: droppedStores.map(s => ({
|
||||||
|
id: s.id,
|
||||||
|
name: s.name,
|
||||||
|
city: s.city,
|
||||||
|
state: s.state,
|
||||||
|
platformDispensaryId: s.platform_dispensary_id,
|
||||||
|
lastSeenAt: s.last_seen_at,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// SINGLE CITY DISCOVERY
|
// SINGLE CITY DISCOVERY
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -235,11 +382,19 @@ export async function discoverState(
|
|||||||
|
|
||||||
console.log(`[Discovery] Discovering state: ${stateCode}`);
|
console.log(`[Discovery] Discovering state: ${stateCode}`);
|
||||||
|
|
||||||
// Seed known cities for this state
|
// Dynamically fetch and seed cities for this state
|
||||||
if (stateCode === 'AZ') {
|
console.log(`[Discovery] Fetching cities for ${stateCode} from Dutchie...`);
|
||||||
console.log('[Discovery] Seeding Arizona cities...');
|
const cityNames = await getCitiesForState(stateCode);
|
||||||
const seeded = await seedKnownCities(pool, ARIZONA_CITIES);
|
if (cityNames.length > 0) {
|
||||||
console.log(`[Discovery] Seeded ${seeded.created} new cities, ${seeded.updated} updated`);
|
const cities = cityNames.map(name => ({
|
||||||
|
name,
|
||||||
|
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
|
||||||
|
stateCode,
|
||||||
|
}));
|
||||||
|
const seeded = await seedKnownCities(pool, cities);
|
||||||
|
console.log(`[Discovery] Seeded ${seeded.created} new cities, ${seeded.updated} updated for ${stateCode}`);
|
||||||
|
} else {
|
||||||
|
console.log(`[Discovery] No cities found for ${stateCode}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run full discovery for this state
|
// Run full discovery for this state
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ export {
|
|||||||
getCitiesToCrawl,
|
getCitiesToCrawl,
|
||||||
getCityBySlug,
|
getCityBySlug,
|
||||||
seedKnownCities,
|
seedKnownCities,
|
||||||
ARIZONA_CITIES,
|
|
||||||
} from './city-discovery';
|
} from './city-discovery';
|
||||||
|
|
||||||
// Location Discovery
|
// Location Discovery
|
||||||
@@ -33,5 +32,17 @@ export {
|
|||||||
DiscoveryStats,
|
DiscoveryStats,
|
||||||
} from './discovery-crawler';
|
} from './discovery-crawler';
|
||||||
|
|
||||||
|
// Promotion
|
||||||
|
export {
|
||||||
|
validateForPromotion,
|
||||||
|
validateDiscoveredLocations,
|
||||||
|
promoteDiscoveredLocations,
|
||||||
|
promoteSingleLocation,
|
||||||
|
ValidationResult,
|
||||||
|
ValidationSummary,
|
||||||
|
PromotionResult,
|
||||||
|
PromotionSummary,
|
||||||
|
} from './promotion';
|
||||||
|
|
||||||
// Routes
|
// Routes
|
||||||
export { createDiscoveryRoutes } from './routes';
|
export { createDiscoveryRoutes } from './routes';
|
||||||
|
|||||||
@@ -134,10 +134,10 @@ export interface StateWithCities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetch all states with their cities from Dutchie's __NEXT_DATA__
|
* Fetch all states with their cities via direct GraphQL query
|
||||||
*
|
*
|
||||||
* This fetches a city page and extracts the statesWithDispensaries data
|
* Uses the getAllCitiesByState persisted query which returns all states
|
||||||
* which contains all states and their cities where Dutchie has dispensaries.
|
* and cities where Dutchie has dispensaries.
|
||||||
*/
|
*/
|
||||||
export async function fetchStatesWithDispensaries(
|
export async function fetchStatesWithDispensaries(
|
||||||
options: { verbose?: boolean } = {}
|
options: { verbose?: boolean } = {}
|
||||||
@@ -147,84 +147,53 @@ export async function fetchStatesWithDispensaries(
|
|||||||
// Initialize proxy if USE_PROXY=true
|
// Initialize proxy if USE_PROXY=true
|
||||||
await initDiscoveryProxy();
|
await initDiscoveryProxy();
|
||||||
|
|
||||||
console.log('[LocationDiscovery] Fetching statesWithDispensaries from Dutchie...');
|
console.log('[LocationDiscovery] Fetching statesWithDispensaries via GraphQL...');
|
||||||
|
|
||||||
// Fetch any city page to get the __NEXT_DATA__ with statesWithDispensaries
|
try {
|
||||||
// Using a known city that's likely to exist
|
// Use direct GraphQL query - much cleaner than scraping __NEXT_DATA__
|
||||||
const result = await fetchPage('/dispensaries/az/phoenix', { maxRetries: 3 });
|
const result = await executeGraphQL(
|
||||||
|
'getAllCitiesByState',
|
||||||
|
{}, // No variables needed
|
||||||
|
GRAPHQL_HASHES.GetAllCitiesByState,
|
||||||
|
{ maxRetries: 3, retryOn403: true }
|
||||||
|
);
|
||||||
|
|
||||||
if (!result || result.status !== 200) {
|
const statesData = result?.data?.statesWithDispensaries;
|
||||||
console.error('[LocationDiscovery] Failed to fetch city page');
|
if (!Array.isArray(statesData)) {
|
||||||
return [];
|
console.error('[LocationDiscovery] statesWithDispensaries not found in response');
|
||||||
}
|
return [];
|
||||||
|
|
||||||
const nextData = extractNextData(result.html);
|
|
||||||
if (!nextData) {
|
|
||||||
console.error('[LocationDiscovery] No __NEXT_DATA__ found');
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract statesWithDispensaries from Apollo state
|
|
||||||
const apolloState = nextData.props?.pageProps?.initialApolloState;
|
|
||||||
if (!apolloState) {
|
|
||||||
console.error('[LocationDiscovery] No initialApolloState found');
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find ROOT_QUERY.statesWithDispensaries
|
|
||||||
const rootQuery = apolloState['ROOT_QUERY'];
|
|
||||||
if (!rootQuery) {
|
|
||||||
console.error('[LocationDiscovery] No ROOT_QUERY found');
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// The statesWithDispensaries is at ROOT_QUERY.statesWithDispensaries
|
|
||||||
const statesRefs = rootQuery.statesWithDispensaries;
|
|
||||||
if (!Array.isArray(statesRefs)) {
|
|
||||||
console.error('[LocationDiscovery] statesWithDispensaries not found or not an array');
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resolve the references to actual state data
|
|
||||||
const states: StateWithCities[] = [];
|
|
||||||
for (const ref of statesRefs) {
|
|
||||||
// ref might be { __ref: "StateWithDispensaries:0" } or direct object
|
|
||||||
let stateData: any;
|
|
||||||
|
|
||||||
if (ref && ref.__ref) {
|
|
||||||
stateData = apolloState[ref.__ref];
|
|
||||||
} else {
|
|
||||||
stateData = ref;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stateData && stateData.name) {
|
// Map to our StateWithCities format
|
||||||
// Parse cities JSON array if it's a string
|
const states: StateWithCities[] = [];
|
||||||
let cities = stateData.cities;
|
for (const state of statesData) {
|
||||||
if (typeof cities === 'string') {
|
if (state && state.name) {
|
||||||
try {
|
// Filter out null cities
|
||||||
cities = JSON.parse(cities);
|
const cities = Array.isArray(state.cities)
|
||||||
} catch {
|
? state.cities.filter((c: string | null) => c !== null)
|
||||||
cities = [];
|
: [];
|
||||||
}
|
|
||||||
|
states.push({
|
||||||
|
name: state.name,
|
||||||
|
country: state.country || 'US',
|
||||||
|
cities,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
states.push({
|
|
||||||
name: stateData.name,
|
|
||||||
country: stateData.country || 'US',
|
|
||||||
cities: Array.isArray(cities) ? cities : [],
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
console.log(`[LocationDiscovery] Found ${states.length} states`);
|
console.log(`[LocationDiscovery] Found ${states.length} states`);
|
||||||
for (const state of states) {
|
for (const state of states) {
|
||||||
console.log(` ${state.name}: ${state.cities.length} cities`);
|
console.log(` ${state.name}: ${state.cities.length} cities`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
|
console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
|
||||||
return states;
|
return states;
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`[LocationDiscovery] Failed to fetch states: ${error.message}`);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -751,31 +720,57 @@ async function scrapeLocationCards(
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Normalize a raw location response to a consistent format.
|
* Normalize a raw location response to a consistent format.
|
||||||
|
* Maps Dutchie camelCase fields to our snake_case equivalents.
|
||||||
*/
|
*/
|
||||||
function normalizeLocationResponse(raw: any): DutchieLocationResponse {
|
function normalizeLocationResponse(raw: any): DutchieLocationResponse {
|
||||||
const slug = raw.slug || raw.cName || raw.urlSlug || '';
|
const slug = raw.slug || raw.cName || raw.urlSlug || '';
|
||||||
const id = raw.id || raw._id || raw.dispensaryId || '';
|
const id = raw.id || raw._id || raw.dispensaryId || '';
|
||||||
|
|
||||||
|
// Extract location data - GraphQL response nests address info in .location
|
||||||
|
const loc = raw.location || {};
|
||||||
|
|
||||||
|
// Extract coordinates from geometry.coordinates [longitude, latitude]
|
||||||
|
const coords = loc.geometry?.coordinates || [];
|
||||||
|
const longitude = coords[0] || raw.longitude || raw.lng || loc.longitude || loc.lng;
|
||||||
|
const latitude = coords[1] || raw.latitude || raw.lat || loc.latitude || loc.lat;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id,
|
id,
|
||||||
name: raw.name || raw.dispensaryName || '',
|
name: raw.name || raw.dispensaryName || '',
|
||||||
slug,
|
slug,
|
||||||
address: raw.address || raw.fullAddress || '',
|
cName: raw.cName || raw.slug || '',
|
||||||
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || '',
|
address: raw.address || raw.fullAddress || loc.ln1 || '',
|
||||||
address2: raw.address2 || raw.addressLine2 || '',
|
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || loc.ln1 || '',
|
||||||
city: raw.city || '',
|
address2: raw.address2 || raw.addressLine2 || loc.ln2 || '',
|
||||||
state: raw.state || raw.stateCode || '',
|
city: raw.city || loc.city || '',
|
||||||
zip: raw.zip || raw.zipCode || raw.postalCode || '',
|
state: raw.state || raw.stateCode || loc.state || '',
|
||||||
country: raw.country || raw.countryCode || 'US',
|
zip: raw.zip || raw.zipCode || raw.postalCode || loc.zipcode || loc.zip || '',
|
||||||
latitude: raw.latitude || raw.lat || raw.location?.latitude,
|
country: raw.country || raw.countryCode || loc.country || 'United States',
|
||||||
longitude: raw.longitude || raw.lng || raw.location?.longitude,
|
latitude,
|
||||||
|
longitude,
|
||||||
timezone: raw.timezone || raw.tz || '',
|
timezone: raw.timezone || raw.tz || '',
|
||||||
menuUrl: raw.menuUrl || (slug ? `https://dutchie.com/dispensary/${slug}` : ''),
|
menuUrl: raw.menuUrl || (slug ? `https://dutchie.com/dispensary/${slug}` : ''),
|
||||||
retailType: raw.retailType || raw.type || '',
|
retailType: raw.retailType || raw.type || '',
|
||||||
|
// Service offerings
|
||||||
offerPickup: raw.offerPickup ?? raw.storeSettings?.offerPickup ?? true,
|
offerPickup: raw.offerPickup ?? raw.storeSettings?.offerPickup ?? true,
|
||||||
offerDelivery: raw.offerDelivery ?? raw.storeSettings?.offerDelivery ?? false,
|
offerDelivery: raw.offerDelivery ?? raw.storeSettings?.offerDelivery ?? false,
|
||||||
isRecreational: raw.isRecreational ?? raw.retailType?.includes('Recreational') ?? true,
|
offerCurbsidePickup: raw.offerCurbsidePickup ?? false,
|
||||||
isMedical: raw.isMedical ?? raw.retailType?.includes('Medical') ?? true,
|
// License types
|
||||||
|
isRecreational: raw.isRecreational ?? raw.recDispensary ?? raw.retailType?.includes('Recreational') ?? true,
|
||||||
|
isMedical: raw.isMedical ?? raw.medicalDispensary ?? raw.retailType?.includes('Medical') ?? true,
|
||||||
|
// Contact info
|
||||||
|
phone: raw.phone || '',
|
||||||
|
email: raw.email || '',
|
||||||
|
website: raw.embedBackUrl || '',
|
||||||
|
// Branding
|
||||||
|
description: raw.description || '',
|
||||||
|
logoImage: raw.logoImage || '',
|
||||||
|
bannerImage: raw.bannerImage || '',
|
||||||
|
// Chain/enterprise info
|
||||||
|
chainSlug: raw.chain || '',
|
||||||
|
enterpriseId: raw.retailer?.enterpriseId || '',
|
||||||
|
// Status
|
||||||
|
status: raw.status || '',
|
||||||
// Preserve raw data
|
// Preserve raw data
|
||||||
...raw,
|
...raw,
|
||||||
};
|
};
|
||||||
@@ -826,15 +821,27 @@ export async function upsertLocation(
|
|||||||
offers_pickup,
|
offers_pickup,
|
||||||
is_recreational,
|
is_recreational,
|
||||||
is_medical,
|
is_medical,
|
||||||
|
phone,
|
||||||
|
website,
|
||||||
|
email,
|
||||||
|
description,
|
||||||
|
logo_image,
|
||||||
|
banner_image,
|
||||||
|
chain_slug,
|
||||||
|
enterprise_id,
|
||||||
|
c_name,
|
||||||
|
country,
|
||||||
|
store_status,
|
||||||
last_seen_at,
|
last_seen_at,
|
||||||
updated_at
|
updated_at
|
||||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, NOW(), NOW())
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, NOW(), NOW())
|
||||||
ON CONFLICT (platform, platform_location_id)
|
ON CONFLICT (platform, platform_location_id)
|
||||||
DO UPDATE SET
|
DO UPDATE SET
|
||||||
name = EXCLUDED.name,
|
name = EXCLUDED.name,
|
||||||
platform_menu_url = EXCLUDED.platform_menu_url,
|
platform_menu_url = EXCLUDED.platform_menu_url,
|
||||||
raw_address = COALESCE(EXCLUDED.raw_address, dutchie_discovery_locations.raw_address),
|
raw_address = COALESCE(EXCLUDED.raw_address, dutchie_discovery_locations.raw_address),
|
||||||
address_line1 = COALESCE(EXCLUDED.address_line1, dutchie_discovery_locations.address_line1),
|
address_line1 = COALESCE(EXCLUDED.address_line1, dutchie_discovery_locations.address_line1),
|
||||||
|
address_line2 = COALESCE(EXCLUDED.address_line2, dutchie_discovery_locations.address_line2),
|
||||||
city = COALESCE(EXCLUDED.city, dutchie_discovery_locations.city),
|
city = COALESCE(EXCLUDED.city, dutchie_discovery_locations.city),
|
||||||
state_code = COALESCE(EXCLUDED.state_code, dutchie_discovery_locations.state_code),
|
state_code = COALESCE(EXCLUDED.state_code, dutchie_discovery_locations.state_code),
|
||||||
postal_code = COALESCE(EXCLUDED.postal_code, dutchie_discovery_locations.postal_code),
|
postal_code = COALESCE(EXCLUDED.postal_code, dutchie_discovery_locations.postal_code),
|
||||||
@@ -846,6 +853,17 @@ export async function upsertLocation(
|
|||||||
offers_pickup = COALESCE(EXCLUDED.offers_pickup, dutchie_discovery_locations.offers_pickup),
|
offers_pickup = COALESCE(EXCLUDED.offers_pickup, dutchie_discovery_locations.offers_pickup),
|
||||||
is_recreational = COALESCE(EXCLUDED.is_recreational, dutchie_discovery_locations.is_recreational),
|
is_recreational = COALESCE(EXCLUDED.is_recreational, dutchie_discovery_locations.is_recreational),
|
||||||
is_medical = COALESCE(EXCLUDED.is_medical, dutchie_discovery_locations.is_medical),
|
is_medical = COALESCE(EXCLUDED.is_medical, dutchie_discovery_locations.is_medical),
|
||||||
|
phone = COALESCE(EXCLUDED.phone, dutchie_discovery_locations.phone),
|
||||||
|
website = COALESCE(EXCLUDED.website, dutchie_discovery_locations.website),
|
||||||
|
email = COALESCE(EXCLUDED.email, dutchie_discovery_locations.email),
|
||||||
|
description = COALESCE(EXCLUDED.description, dutchie_discovery_locations.description),
|
||||||
|
logo_image = COALESCE(EXCLUDED.logo_image, dutchie_discovery_locations.logo_image),
|
||||||
|
banner_image = COALESCE(EXCLUDED.banner_image, dutchie_discovery_locations.banner_image),
|
||||||
|
chain_slug = COALESCE(EXCLUDED.chain_slug, dutchie_discovery_locations.chain_slug),
|
||||||
|
enterprise_id = COALESCE(EXCLUDED.enterprise_id, dutchie_discovery_locations.enterprise_id),
|
||||||
|
c_name = COALESCE(EXCLUDED.c_name, dutchie_discovery_locations.c_name),
|
||||||
|
country = COALESCE(EXCLUDED.country, dutchie_discovery_locations.country),
|
||||||
|
store_status = COALESCE(EXCLUDED.store_status, dutchie_discovery_locations.store_status),
|
||||||
last_seen_at = NOW(),
|
last_seen_at = NOW(),
|
||||||
updated_at = NOW()
|
updated_at = NOW()
|
||||||
RETURNING id, (xmax = 0) as is_new`,
|
RETURNING id, (xmax = 0) as is_new`,
|
||||||
@@ -861,7 +879,7 @@ export async function upsertLocation(
|
|||||||
location.city || null,
|
location.city || null,
|
||||||
location.state || null,
|
location.state || null,
|
||||||
location.zip || null,
|
location.zip || null,
|
||||||
location.country || 'US',
|
location.country || 'United States',
|
||||||
location.latitude || null,
|
location.latitude || null,
|
||||||
location.longitude || null,
|
location.longitude || null,
|
||||||
location.timezone || null,
|
location.timezone || null,
|
||||||
@@ -871,6 +889,17 @@ export async function upsertLocation(
|
|||||||
location.offerPickup ?? null,
|
location.offerPickup ?? null,
|
||||||
location.isRecreational ?? null,
|
location.isRecreational ?? null,
|
||||||
location.isMedical ?? null,
|
location.isMedical ?? null,
|
||||||
|
location.phone || null,
|
||||||
|
location.website || null,
|
||||||
|
location.email || null,
|
||||||
|
location.description || null,
|
||||||
|
location.logoImage || null,
|
||||||
|
location.bannerImage || null,
|
||||||
|
location.chainSlug || null,
|
||||||
|
location.enterpriseId || null,
|
||||||
|
location.cName || null,
|
||||||
|
location.country || 'United States',
|
||||||
|
location.status || null,
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
579
backend/src/discovery/promotion.ts
Normal file
579
backend/src/discovery/promotion.ts
Normal file
@@ -0,0 +1,579 @@
|
|||||||
|
/**
|
||||||
|
* Discovery Promotion Service
|
||||||
|
*
|
||||||
|
* Handles the promotion of discovery locations to dispensaries:
|
||||||
|
* 1. Discovery → Raw data in dutchie_discovery_locations (status='discovered')
|
||||||
|
* 2. Validation → Check required fields, reject incomplete records
|
||||||
|
* 3. Promotion → Idempotent upsert to dispensaries, link back via dispensary_id
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
import { DiscoveryLocationRow, DiscoveryStatus } from './types';
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// VALIDATION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface ValidationResult {
|
||||||
|
valid: boolean;
|
||||||
|
errors: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ValidationSummary {
|
||||||
|
totalChecked: number;
|
||||||
|
validCount: number;
|
||||||
|
invalidCount: number;
|
||||||
|
invalidRecords: Array<{
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
errors: string[];
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate a single discovery location has all required fields for promotion
|
||||||
|
*/
|
||||||
|
export function validateForPromotion(loc: DiscoveryLocationRow): ValidationResult {
|
||||||
|
const errors: string[] = [];
|
||||||
|
|
||||||
|
// Required fields
|
||||||
|
if (!loc.platform_location_id) {
|
||||||
|
errors.push('Missing platform_location_id');
|
||||||
|
}
|
||||||
|
if (!loc.name || loc.name.trim() === '') {
|
||||||
|
errors.push('Missing name');
|
||||||
|
}
|
||||||
|
if (!loc.city || loc.city.trim() === '') {
|
||||||
|
errors.push('Missing city');
|
||||||
|
}
|
||||||
|
if (!loc.state_code || loc.state_code.trim() === '') {
|
||||||
|
errors.push('Missing state_code');
|
||||||
|
}
|
||||||
|
if (!loc.platform_menu_url) {
|
||||||
|
errors.push('Missing platform_menu_url');
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
valid: errors.length === 0,
|
||||||
|
errors,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate all discovered locations and return summary
|
||||||
|
*/
|
||||||
|
export async function validateDiscoveredLocations(
|
||||||
|
stateCode?: string
|
||||||
|
): Promise<ValidationSummary> {
|
||||||
|
let query = `
|
||||||
|
SELECT * FROM dutchie_discovery_locations
|
||||||
|
WHERE status = 'discovered'
|
||||||
|
`;
|
||||||
|
const params: string[] = [];
|
||||||
|
|
||||||
|
if (stateCode) {
|
||||||
|
query += ` AND state_code = $1`;
|
||||||
|
params.push(stateCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
const locations = result.rows as DiscoveryLocationRow[];
|
||||||
|
|
||||||
|
const invalidRecords: ValidationSummary['invalidRecords'] = [];
|
||||||
|
let validCount = 0;
|
||||||
|
|
||||||
|
for (const loc of locations) {
|
||||||
|
const validation = validateForPromotion(loc);
|
||||||
|
if (validation.valid) {
|
||||||
|
validCount++;
|
||||||
|
} else {
|
||||||
|
invalidRecords.push({
|
||||||
|
id: loc.id,
|
||||||
|
name: loc.name,
|
||||||
|
errors: validation.errors,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
totalChecked: locations.length,
|
||||||
|
validCount,
|
||||||
|
invalidCount: invalidRecords.length,
|
||||||
|
invalidRecords,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// PROMOTION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface PromotionResult {
|
||||||
|
discoveryId: number;
|
||||||
|
dispensaryId: number;
|
||||||
|
action: 'created' | 'updated' | 'skipped';
|
||||||
|
name: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface PromotionSummary {
|
||||||
|
totalProcessed: number;
|
||||||
|
created: number;
|
||||||
|
updated: number;
|
||||||
|
skipped: number;
|
||||||
|
rejected: number;
|
||||||
|
results: PromotionResult[];
|
||||||
|
rejectedRecords: Array<{
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
errors: string[];
|
||||||
|
}>;
|
||||||
|
durationMs: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a URL-safe slug from name and city
|
||||||
|
*/
|
||||||
|
function generateSlug(name: string, city: string, state: string): string {
|
||||||
|
const base = `${name}-${city}-${state}`
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[^a-z0-9]+/g, '-')
|
||||||
|
.replace(/^-|-$/g, '')
|
||||||
|
.substring(0, 100);
|
||||||
|
return base;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log a promotion action to dutchie_promotion_log
|
||||||
|
*/
|
||||||
|
async function logPromotionAction(
|
||||||
|
action: string,
|
||||||
|
discoveryId: number | null,
|
||||||
|
dispensaryId: number | null,
|
||||||
|
stateCode: string | null,
|
||||||
|
storeName: string | null,
|
||||||
|
validationErrors: string[] | null = null,
|
||||||
|
fieldChanges: Record<string, any> | null = null,
|
||||||
|
triggeredBy: string = 'auto'
|
||||||
|
): Promise<void> {
|
||||||
|
await pool.query(`
|
||||||
|
INSERT INTO dutchie_promotion_log
|
||||||
|
(discovery_id, dispensary_id, action, state_code, store_name, validation_errors, field_changes, triggered_by)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||||
|
`, [
|
||||||
|
discoveryId,
|
||||||
|
dispensaryId,
|
||||||
|
action,
|
||||||
|
stateCode,
|
||||||
|
storeName,
|
||||||
|
validationErrors,
|
||||||
|
fieldChanges ? JSON.stringify(fieldChanges) : null,
|
||||||
|
triggeredBy,
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a status alert for the dashboard
|
||||||
|
*/
|
||||||
|
export async function createStatusAlert(
|
||||||
|
dispensaryId: number,
|
||||||
|
profileId: number | null,
|
||||||
|
alertType: string,
|
||||||
|
severity: 'info' | 'warning' | 'error' | 'critical',
|
||||||
|
message: string,
|
||||||
|
previousStatus?: string | null,
|
||||||
|
newStatus?: string | null,
|
||||||
|
metadata?: Record<string, any>
|
||||||
|
): Promise<number> {
|
||||||
|
const result = await pool.query(`
|
||||||
|
INSERT INTO crawler_status_alerts
|
||||||
|
(dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||||
|
RETURNING id
|
||||||
|
`, [
|
||||||
|
dispensaryId,
|
||||||
|
profileId,
|
||||||
|
alertType,
|
||||||
|
severity,
|
||||||
|
message,
|
||||||
|
previousStatus || null,
|
||||||
|
newStatus || null,
|
||||||
|
metadata ? JSON.stringify(metadata) : null,
|
||||||
|
]);
|
||||||
|
return result.rows[0].id;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create or update crawler profile for a dispensary with initial sandbox status
|
||||||
|
*/
|
||||||
|
async function ensureCrawlerProfile(
|
||||||
|
dispensaryId: number,
|
||||||
|
dispensaryName: string,
|
||||||
|
platformDispensaryId: string
|
||||||
|
): Promise<{ profileId: number; created: boolean }> {
|
||||||
|
// Check if profile already exists
|
||||||
|
const existingResult = await pool.query(`
|
||||||
|
SELECT id FROM dispensary_crawler_profiles
|
||||||
|
WHERE dispensary_id = $1 AND enabled = true
|
||||||
|
LIMIT 1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (existingResult.rows.length > 0) {
|
||||||
|
return { profileId: existingResult.rows[0].id, created: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new profile with sandbox status
|
||||||
|
const profileKey = dispensaryName
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[^a-z0-9]+/g, '-')
|
||||||
|
.replace(/^-|-$/g, '')
|
||||||
|
.substring(0, 50);
|
||||||
|
|
||||||
|
const insertResult = await pool.query(`
|
||||||
|
INSERT INTO dispensary_crawler_profiles (
|
||||||
|
dispensary_id,
|
||||||
|
profile_name,
|
||||||
|
profile_key,
|
||||||
|
crawler_type,
|
||||||
|
status,
|
||||||
|
status_reason,
|
||||||
|
status_changed_at,
|
||||||
|
config,
|
||||||
|
enabled,
|
||||||
|
consecutive_successes,
|
||||||
|
consecutive_failures,
|
||||||
|
created_at,
|
||||||
|
updated_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2, $3, 'dutchie', 'sandbox', 'Newly promoted from discovery', CURRENT_TIMESTAMP,
|
||||||
|
$4::jsonb, true, 0, 0, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
|
||||||
|
)
|
||||||
|
RETURNING id
|
||||||
|
`, [
|
||||||
|
dispensaryId,
|
||||||
|
dispensaryName,
|
||||||
|
profileKey,
|
||||||
|
JSON.stringify({
|
||||||
|
platformDispensaryId,
|
||||||
|
useBothModes: true,
|
||||||
|
downloadImages: true,
|
||||||
|
trackStock: true,
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const profileId = insertResult.rows[0].id;
|
||||||
|
|
||||||
|
// Create status alert for new sandbox store
|
||||||
|
await createStatusAlert(
|
||||||
|
dispensaryId,
|
||||||
|
profileId,
|
||||||
|
'promoted',
|
||||||
|
'info',
|
||||||
|
`${dispensaryName} promoted to sandbox - awaiting first successful crawl`,
|
||||||
|
null,
|
||||||
|
'sandbox',
|
||||||
|
{ source: 'discovery_promotion', platformDispensaryId }
|
||||||
|
);
|
||||||
|
|
||||||
|
return { profileId, created: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Promote a single discovery location to dispensaries table
|
||||||
|
* Idempotent: uses ON CONFLICT on platform_dispensary_id
|
||||||
|
*/
|
||||||
|
async function promoteLocation(
|
||||||
|
loc: DiscoveryLocationRow
|
||||||
|
): Promise<PromotionResult> {
|
||||||
|
const slug = loc.platform_slug || generateSlug(loc.name, loc.city || '', loc.state_code || '');
|
||||||
|
|
||||||
|
// Upsert into dispensaries
|
||||||
|
// ON CONFLICT by platform_dispensary_id ensures idempotency
|
||||||
|
const upsertResult = await pool.query(`
|
||||||
|
INSERT INTO dispensaries (
|
||||||
|
platform,
|
||||||
|
name,
|
||||||
|
slug,
|
||||||
|
city,
|
||||||
|
state,
|
||||||
|
address1,
|
||||||
|
address2,
|
||||||
|
zipcode,
|
||||||
|
postal_code,
|
||||||
|
phone,
|
||||||
|
website,
|
||||||
|
email,
|
||||||
|
latitude,
|
||||||
|
longitude,
|
||||||
|
timezone,
|
||||||
|
platform_dispensary_id,
|
||||||
|
menu_url,
|
||||||
|
menu_type,
|
||||||
|
description,
|
||||||
|
logo_image,
|
||||||
|
banner_image,
|
||||||
|
offer_pickup,
|
||||||
|
offer_delivery,
|
||||||
|
is_medical,
|
||||||
|
is_recreational,
|
||||||
|
chain_slug,
|
||||||
|
enterprise_id,
|
||||||
|
c_name,
|
||||||
|
country,
|
||||||
|
status,
|
||||||
|
crawl_enabled,
|
||||||
|
dutchie_verified,
|
||||||
|
dutchie_verified_at,
|
||||||
|
dutchie_discovery_id,
|
||||||
|
created_at,
|
||||||
|
updated_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
|
||||||
|
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
|
||||||
|
$21, $22, $23, $24, $25, $26, $27, $28, $29, $30,
|
||||||
|
$31, $32, $33, $34, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
|
||||||
|
)
|
||||||
|
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
|
||||||
|
DO UPDATE SET
|
||||||
|
name = EXCLUDED.name,
|
||||||
|
city = EXCLUDED.city,
|
||||||
|
state = EXCLUDED.state,
|
||||||
|
address1 = EXCLUDED.address1,
|
||||||
|
address2 = EXCLUDED.address2,
|
||||||
|
zipcode = EXCLUDED.zipcode,
|
||||||
|
postal_code = EXCLUDED.postal_code,
|
||||||
|
phone = EXCLUDED.phone,
|
||||||
|
website = EXCLUDED.website,
|
||||||
|
email = EXCLUDED.email,
|
||||||
|
latitude = EXCLUDED.latitude,
|
||||||
|
longitude = EXCLUDED.longitude,
|
||||||
|
timezone = EXCLUDED.timezone,
|
||||||
|
menu_url = EXCLUDED.menu_url,
|
||||||
|
description = EXCLUDED.description,
|
||||||
|
logo_image = EXCLUDED.logo_image,
|
||||||
|
banner_image = EXCLUDED.banner_image,
|
||||||
|
offer_pickup = EXCLUDED.offer_pickup,
|
||||||
|
offer_delivery = EXCLUDED.offer_delivery,
|
||||||
|
is_medical = EXCLUDED.is_medical,
|
||||||
|
is_recreational = EXCLUDED.is_recreational,
|
||||||
|
chain_slug = EXCLUDED.chain_slug,
|
||||||
|
enterprise_id = EXCLUDED.enterprise_id,
|
||||||
|
c_name = EXCLUDED.c_name,
|
||||||
|
country = EXCLUDED.country,
|
||||||
|
status = EXCLUDED.status,
|
||||||
|
dutchie_discovery_id = EXCLUDED.dutchie_discovery_id,
|
||||||
|
updated_at = CURRENT_TIMESTAMP
|
||||||
|
RETURNING id, (xmax = 0) AS inserted
|
||||||
|
`, [
|
||||||
|
loc.platform || 'dutchie', // $1 platform
|
||||||
|
loc.name, // $2 name
|
||||||
|
slug, // $3 slug
|
||||||
|
loc.city, // $4 city
|
||||||
|
loc.state_code, // $5 state
|
||||||
|
loc.address_line1, // $6 address1
|
||||||
|
loc.address_line2, // $7 address2
|
||||||
|
loc.postal_code, // $8 zipcode
|
||||||
|
loc.postal_code, // $9 postal_code
|
||||||
|
loc.phone, // $10 phone
|
||||||
|
loc.website, // $11 website
|
||||||
|
loc.email, // $12 email
|
||||||
|
loc.latitude, // $13 latitude
|
||||||
|
loc.longitude, // $14 longitude
|
||||||
|
loc.timezone, // $15 timezone
|
||||||
|
loc.platform_location_id, // $16 platform_dispensary_id
|
||||||
|
loc.platform_menu_url, // $17 menu_url
|
||||||
|
'dutchie', // $18 menu_type
|
||||||
|
loc.description, // $19 description
|
||||||
|
loc.logo_image, // $20 logo_image
|
||||||
|
loc.banner_image, // $21 banner_image
|
||||||
|
loc.offers_pickup ?? true, // $22 offer_pickup
|
||||||
|
loc.offers_delivery ?? false, // $23 offer_delivery
|
||||||
|
loc.is_medical ?? false, // $24 is_medical
|
||||||
|
loc.is_recreational ?? true, // $25 is_recreational
|
||||||
|
loc.chain_slug, // $26 chain_slug
|
||||||
|
loc.enterprise_id, // $27 enterprise_id
|
||||||
|
loc.c_name, // $28 c_name
|
||||||
|
loc.country || 'United States', // $29 country
|
||||||
|
loc.store_status || 'open', // $30 status
|
||||||
|
true, // $31 crawl_enabled
|
||||||
|
true, // $32 dutchie_verified
|
||||||
|
new Date(), // $33 dutchie_verified_at
|
||||||
|
loc.id, // $34 dutchie_discovery_id
|
||||||
|
]);
|
||||||
|
|
||||||
|
const dispensaryId = upsertResult.rows[0].id;
|
||||||
|
const wasInserted = upsertResult.rows[0].inserted;
|
||||||
|
|
||||||
|
// Link discovery location back to dispensary and update status
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dutchie_discovery_locations
|
||||||
|
SET
|
||||||
|
dispensary_id = $1,
|
||||||
|
status = 'verified',
|
||||||
|
verified_at = CURRENT_TIMESTAMP,
|
||||||
|
verified_by = 'auto-promotion'
|
||||||
|
WHERE id = $2
|
||||||
|
`, [dispensaryId, loc.id]);
|
||||||
|
|
||||||
|
// Create crawler profile with sandbox status for new dispensaries
|
||||||
|
if (wasInserted && loc.platform_location_id) {
|
||||||
|
await ensureCrawlerProfile(dispensaryId, loc.name, loc.platform_location_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
const action = wasInserted ? 'promoted_create' : 'promoted_update';
|
||||||
|
|
||||||
|
// Log the promotion
|
||||||
|
await logPromotionAction(
|
||||||
|
action,
|
||||||
|
loc.id,
|
||||||
|
dispensaryId,
|
||||||
|
loc.state_code,
|
||||||
|
loc.name,
|
||||||
|
null,
|
||||||
|
{ slug, city: loc.city, platform_location_id: loc.platform_location_id }
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
discoveryId: loc.id,
|
||||||
|
dispensaryId,
|
||||||
|
action: wasInserted ? 'created' : 'updated',
|
||||||
|
name: loc.name,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Promote all valid discovered locations to dispensaries
|
||||||
|
*
|
||||||
|
* @param stateCode Optional filter by state (e.g., 'CA', 'AZ')
|
||||||
|
* @param dryRun If true, only validate without making changes
|
||||||
|
*/
|
||||||
|
export async function promoteDiscoveredLocations(
|
||||||
|
stateCode?: string,
|
||||||
|
dryRun = false
|
||||||
|
): Promise<PromotionSummary> {
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT * FROM dutchie_discovery_locations
|
||||||
|
WHERE status = 'discovered'
|
||||||
|
`;
|
||||||
|
const params: string[] = [];
|
||||||
|
|
||||||
|
if (stateCode) {
|
||||||
|
query += ` AND state_code = $1`;
|
||||||
|
params.push(stateCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
query += ` ORDER BY id`;
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
const locations = result.rows as DiscoveryLocationRow[];
|
||||||
|
|
||||||
|
const results: PromotionResult[] = [];
|
||||||
|
const rejectedRecords: PromotionSummary['rejectedRecords'] = [];
|
||||||
|
let created = 0;
|
||||||
|
let updated = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
let rejected = 0;
|
||||||
|
|
||||||
|
for (const loc of locations) {
|
||||||
|
// Step 2: Validation
|
||||||
|
const validation = validateForPromotion(loc);
|
||||||
|
|
||||||
|
if (!validation.valid) {
|
||||||
|
rejected++;
|
||||||
|
rejectedRecords.push({
|
||||||
|
id: loc.id,
|
||||||
|
name: loc.name,
|
||||||
|
errors: validation.errors,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Mark as rejected if not dry run
|
||||||
|
if (!dryRun) {
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dutchie_discovery_locations
|
||||||
|
SET status = 'rejected', notes = $1
|
||||||
|
WHERE id = $2
|
||||||
|
`, [validation.errors.join('; '), loc.id]);
|
||||||
|
|
||||||
|
// Log the rejection
|
||||||
|
await logPromotionAction(
|
||||||
|
'rejected',
|
||||||
|
loc.id,
|
||||||
|
null,
|
||||||
|
loc.state_code,
|
||||||
|
loc.name,
|
||||||
|
validation.errors
|
||||||
|
);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: Promotion (skip if dry run)
|
||||||
|
if (dryRun) {
|
||||||
|
skipped++;
|
||||||
|
results.push({
|
||||||
|
discoveryId: loc.id,
|
||||||
|
dispensaryId: 0,
|
||||||
|
action: 'skipped',
|
||||||
|
name: loc.name,
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const promotionResult = await promoteLocation(loc);
|
||||||
|
results.push(promotionResult);
|
||||||
|
|
||||||
|
if (promotionResult.action === 'created') {
|
||||||
|
created++;
|
||||||
|
} else {
|
||||||
|
updated++;
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`Failed to promote location ${loc.id} (${loc.name}):`, error.message);
|
||||||
|
rejected++;
|
||||||
|
rejectedRecords.push({
|
||||||
|
id: loc.id,
|
||||||
|
name: loc.name,
|
||||||
|
errors: [`Promotion error: ${error.message}`],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
totalProcessed: locations.length,
|
||||||
|
created,
|
||||||
|
updated,
|
||||||
|
skipped,
|
||||||
|
rejected,
|
||||||
|
results,
|
||||||
|
rejectedRecords,
|
||||||
|
durationMs: Date.now() - startTime,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Promote a single discovery location by ID
|
||||||
|
*/
|
||||||
|
export async function promoteSingleLocation(
|
||||||
|
discoveryId: number
|
||||||
|
): Promise<PromotionResult> {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT * FROM dutchie_discovery_locations WHERE id = $1`,
|
||||||
|
[discoveryId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
throw new Error(`Discovery location ${discoveryId} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const loc = result.rows[0] as DiscoveryLocationRow;
|
||||||
|
|
||||||
|
// Validate
|
||||||
|
const validation = validateForPromotion(loc);
|
||||||
|
if (!validation.valid) {
|
||||||
|
throw new Error(`Validation failed: ${validation.errors.join(', ')}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Promote
|
||||||
|
return promoteLocation(loc);
|
||||||
|
}
|
||||||
@@ -18,8 +18,8 @@ import {
|
|||||||
getCitiesToCrawl,
|
getCitiesToCrawl,
|
||||||
getCityBySlug,
|
getCityBySlug,
|
||||||
seedKnownCities,
|
seedKnownCities,
|
||||||
ARIZONA_CITIES,
|
|
||||||
} from './city-discovery';
|
} from './city-discovery';
|
||||||
|
import { getCitiesForState } from './location-discovery';
|
||||||
import {
|
import {
|
||||||
DiscoveryLocation,
|
DiscoveryLocation,
|
||||||
DiscoveryCity,
|
DiscoveryCity,
|
||||||
@@ -27,6 +27,11 @@ import {
|
|||||||
mapLocationRowToLocation,
|
mapLocationRowToLocation,
|
||||||
mapCityRowToCity,
|
mapCityRowToCity,
|
||||||
} from './types';
|
} from './types';
|
||||||
|
import {
|
||||||
|
validateDiscoveredLocations,
|
||||||
|
promoteDiscoveredLocations,
|
||||||
|
promoteSingleLocation,
|
||||||
|
} from './promotion';
|
||||||
|
|
||||||
export function createDiscoveryRoutes(pool: Pool): Router {
|
export function createDiscoveryRoutes(pool: Pool): Router {
|
||||||
const router = Router();
|
const router = Router();
|
||||||
@@ -53,44 +58,44 @@ export function createDiscoveryRoutes(pool: Pool): Router {
|
|||||||
offset = '0',
|
offset = '0',
|
||||||
} = req.query;
|
} = req.query;
|
||||||
|
|
||||||
let whereClause = 'WHERE platform = $1 AND active = TRUE';
|
let whereClause = 'WHERE dl.platform = $1 AND dl.active = TRUE';
|
||||||
const params: any[] = [platform];
|
const params: any[] = [platform];
|
||||||
let paramIndex = 2;
|
let paramIndex = 2;
|
||||||
|
|
||||||
if (status) {
|
if (status) {
|
||||||
whereClause += ` AND status = $${paramIndex}`;
|
whereClause += ` AND dl.status = $${paramIndex}`;
|
||||||
params.push(status);
|
params.push(status);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stateCode) {
|
if (stateCode) {
|
||||||
whereClause += ` AND state_code = $${paramIndex}`;
|
whereClause += ` AND dl.state_code = $${paramIndex}`;
|
||||||
params.push(stateCode);
|
params.push(stateCode);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (countryCode) {
|
if (countryCode) {
|
||||||
whereClause += ` AND country_code = $${paramIndex}`;
|
whereClause += ` AND dl.country_code = $${paramIndex}`;
|
||||||
params.push(countryCode);
|
params.push(countryCode);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (city) {
|
if (city) {
|
||||||
whereClause += ` AND city ILIKE $${paramIndex}`;
|
whereClause += ` AND dl.city ILIKE $${paramIndex}`;
|
||||||
params.push(`%${city}%`);
|
params.push(`%${city}%`);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (search) {
|
if (search) {
|
||||||
whereClause += ` AND (name ILIKE $${paramIndex} OR platform_slug ILIKE $${paramIndex})`;
|
whereClause += ` AND (dl.name ILIKE $${paramIndex} OR dl.platform_slug ILIKE $${paramIndex})`;
|
||||||
params.push(`%${search}%`);
|
params.push(`%${search}%`);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hasDispensary === 'true') {
|
if (hasDispensary === 'true') {
|
||||||
whereClause += ' AND dispensary_id IS NOT NULL';
|
whereClause += ' AND dl.dispensary_id IS NOT NULL';
|
||||||
} else if (hasDispensary === 'false') {
|
} else if (hasDispensary === 'false') {
|
||||||
whereClause += ' AND dispensary_id IS NULL';
|
whereClause += ' AND dl.dispensary_id IS NULL';
|
||||||
}
|
}
|
||||||
|
|
||||||
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
|
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
|
||||||
@@ -705,15 +710,22 @@ export function createDiscoveryRoutes(pool: Pool): Router {
|
|||||||
return res.status(400).json({ error: 'stateCode is required' });
|
return res.status(400).json({ error: 'stateCode is required' });
|
||||||
}
|
}
|
||||||
|
|
||||||
let cities: any[] = [];
|
// Dynamically fetch cities from Dutchie for any state
|
||||||
if (stateCode === 'AZ') {
|
const cityNames = await getCitiesForState(stateCode as string);
|
||||||
cities = ARIZONA_CITIES;
|
|
||||||
} else {
|
if (cityNames.length === 0) {
|
||||||
return res.status(400).json({
|
return res.status(400).json({
|
||||||
error: `No predefined cities for state: ${stateCode}. Add cities to city-discovery.ts`,
|
error: `No cities found for state: ${stateCode}`,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert to seed format
|
||||||
|
const cities = cityNames.map(name => ({
|
||||||
|
name,
|
||||||
|
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
|
||||||
|
stateCode: stateCode as string,
|
||||||
|
}));
|
||||||
|
|
||||||
const result = await seedKnownCities(pool, cities);
|
const result = await seedKnownCities(pool, cities);
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
@@ -834,6 +846,136 @@ export function createDiscoveryRoutes(pool: Pool): Router {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// PROMOTION ENDPOINTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/discovery/admin/validate
|
||||||
|
* Validate discovered locations before promotion
|
||||||
|
*/
|
||||||
|
router.get('/admin/validate', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { stateCode } = req.query;
|
||||||
|
const summary = await validateDiscoveredLocations(stateCode as string | undefined);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
...summary,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/discovery/admin/promote
|
||||||
|
* Promote all valid discovered locations to dispensaries (idempotent)
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - stateCode: Filter by state (e.g., 'CA', 'AZ')
|
||||||
|
* - dryRun: If true, only validate without making changes
|
||||||
|
*/
|
||||||
|
router.post('/admin/promote', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { stateCode, dryRun = false } = req.body;
|
||||||
|
|
||||||
|
console.log(`[Discovery API] Starting promotion for ${stateCode || 'all states'} (dryRun=${dryRun})`);
|
||||||
|
const summary = await promoteDiscoveredLocations(stateCode, dryRun);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
...summary,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/discovery/admin/promote/:id
|
||||||
|
* Promote a single discovery location by ID
|
||||||
|
*/
|
||||||
|
router.post('/admin/promote/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
|
||||||
|
console.log(`[Discovery API] Promoting single location ${id}`);
|
||||||
|
const result = await promoteSingleLocation(parseInt(id, 10));
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
...result,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// PROMOTION LOG
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/discovery/promotion-log
|
||||||
|
* Get promotion audit log
|
||||||
|
*/
|
||||||
|
router.get('/promotion-log', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { state, dispensary_id, limit = '100' } = req.query;
|
||||||
|
|
||||||
|
let whereClause = 'WHERE 1=1';
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (state) {
|
||||||
|
whereClause += ` AND pl.state_code = $${paramIndex}`;
|
||||||
|
params.push(state);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dispensary_id) {
|
||||||
|
whereClause += ` AND pl.dispensary_id = $${paramIndex}`;
|
||||||
|
params.push(parseInt(dispensary_id as string, 10));
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
params.push(parseInt(limit as string, 10));
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
pl.*,
|
||||||
|
dl.name as discovery_name,
|
||||||
|
d.name as dispensary_name
|
||||||
|
FROM dutchie_promotion_log pl
|
||||||
|
LEFT JOIN dutchie_discovery_locations dl ON pl.discovery_id = dl.id
|
||||||
|
LEFT JOIN dispensaries d ON pl.dispensary_id = d.id
|
||||||
|
${whereClause}
|
||||||
|
ORDER BY pl.created_at DESC
|
||||||
|
LIMIT $${paramIndex}
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
logs: rows.map((r: any) => ({
|
||||||
|
id: r.id,
|
||||||
|
discoveryId: r.discovery_id,
|
||||||
|
dispensaryId: r.dispensary_id,
|
||||||
|
action: r.action,
|
||||||
|
stateCode: r.state_code,
|
||||||
|
storeName: r.store_name,
|
||||||
|
validationErrors: r.validation_errors,
|
||||||
|
fieldChanges: r.field_changes,
|
||||||
|
triggeredBy: r.triggered_by,
|
||||||
|
createdAt: r.created_at,
|
||||||
|
discoveryName: r.discovery_name,
|
||||||
|
dispensaryName: r.dispensary_name,
|
||||||
|
})),
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
return router;
|
return router;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -60,6 +60,7 @@ export interface DiscoveryLocation {
|
|||||||
stateCode: string | null;
|
stateCode: string | null;
|
||||||
postalCode: string | null;
|
postalCode: string | null;
|
||||||
countryCode: string | null;
|
countryCode: string | null;
|
||||||
|
country: string | null;
|
||||||
latitude: number | null;
|
latitude: number | null;
|
||||||
longitude: number | null;
|
longitude: number | null;
|
||||||
timezone: string | null;
|
timezone: string | null;
|
||||||
@@ -72,6 +73,18 @@ export interface DiscoveryLocation {
|
|||||||
offersPickup: boolean | null;
|
offersPickup: boolean | null;
|
||||||
isRecreational: boolean | null;
|
isRecreational: boolean | null;
|
||||||
isMedical: boolean | null;
|
isMedical: boolean | null;
|
||||||
|
// New Dutchie fields
|
||||||
|
phone: string | null;
|
||||||
|
website: string | null;
|
||||||
|
email: string | null;
|
||||||
|
description: string | null;
|
||||||
|
logoImage: string | null;
|
||||||
|
bannerImage: string | null;
|
||||||
|
chainSlug: string | null;
|
||||||
|
enterpriseId: string | null;
|
||||||
|
cName: string | null;
|
||||||
|
storeStatus: string | null;
|
||||||
|
// Timestamps
|
||||||
firstSeenAt: Date;
|
firstSeenAt: Date;
|
||||||
lastSeenAt: Date;
|
lastSeenAt: Date;
|
||||||
lastCheckedAt: Date | null;
|
lastCheckedAt: Date | null;
|
||||||
@@ -96,6 +109,7 @@ export interface DiscoveryLocationRow {
|
|||||||
state_code: string | null;
|
state_code: string | null;
|
||||||
postal_code: string | null;
|
postal_code: string | null;
|
||||||
country_code: string | null;
|
country_code: string | null;
|
||||||
|
country: string | null;
|
||||||
latitude: number | null;
|
latitude: number | null;
|
||||||
longitude: number | null;
|
longitude: number | null;
|
||||||
timezone: string | null;
|
timezone: string | null;
|
||||||
@@ -108,6 +122,18 @@ export interface DiscoveryLocationRow {
|
|||||||
offers_pickup: boolean | null;
|
offers_pickup: boolean | null;
|
||||||
is_recreational: boolean | null;
|
is_recreational: boolean | null;
|
||||||
is_medical: boolean | null;
|
is_medical: boolean | null;
|
||||||
|
// New Dutchie fields (snake_case for DB row)
|
||||||
|
phone: string | null;
|
||||||
|
website: string | null;
|
||||||
|
email: string | null;
|
||||||
|
description: string | null;
|
||||||
|
logo_image: string | null;
|
||||||
|
banner_image: string | null;
|
||||||
|
chain_slug: string | null;
|
||||||
|
enterprise_id: string | null;
|
||||||
|
c_name: string | null;
|
||||||
|
store_status: string | null;
|
||||||
|
// Timestamps
|
||||||
first_seen_at: Date;
|
first_seen_at: Date;
|
||||||
last_seen_at: Date;
|
last_seen_at: Date;
|
||||||
last_checked_at: Date | null;
|
last_checked_at: Date | null;
|
||||||
@@ -245,6 +271,7 @@ export function mapLocationRowToLocation(row: DiscoveryLocationRow): DiscoveryLo
|
|||||||
stateCode: row.state_code,
|
stateCode: row.state_code,
|
||||||
postalCode: row.postal_code,
|
postalCode: row.postal_code,
|
||||||
countryCode: row.country_code,
|
countryCode: row.country_code,
|
||||||
|
country: row.country,
|
||||||
latitude: row.latitude,
|
latitude: row.latitude,
|
||||||
longitude: row.longitude,
|
longitude: row.longitude,
|
||||||
timezone: row.timezone,
|
timezone: row.timezone,
|
||||||
@@ -257,6 +284,18 @@ export function mapLocationRowToLocation(row: DiscoveryLocationRow): DiscoveryLo
|
|||||||
offersPickup: row.offers_pickup,
|
offersPickup: row.offers_pickup,
|
||||||
isRecreational: row.is_recreational,
|
isRecreational: row.is_recreational,
|
||||||
isMedical: row.is_medical,
|
isMedical: row.is_medical,
|
||||||
|
// New Dutchie fields
|
||||||
|
phone: row.phone,
|
||||||
|
website: row.website,
|
||||||
|
email: row.email,
|
||||||
|
description: row.description,
|
||||||
|
logoImage: row.logo_image,
|
||||||
|
bannerImage: row.banner_image,
|
||||||
|
chainSlug: row.chain_slug,
|
||||||
|
enterpriseId: row.enterprise_id,
|
||||||
|
cName: row.c_name,
|
||||||
|
storeStatus: row.store_status,
|
||||||
|
// Timestamps
|
||||||
firstSeenAt: row.first_seen_at,
|
firstSeenAt: row.first_seen_at,
|
||||||
lastSeenAt: row.last_seen_at,
|
lastSeenAt: row.last_seen_at,
|
||||||
lastCheckedAt: row.last_checked_at,
|
lastCheckedAt: row.last_checked_at,
|
||||||
|
|||||||
@@ -16,6 +16,12 @@ import {
|
|||||||
NormalizedBrand,
|
NormalizedBrand,
|
||||||
NormalizationResult,
|
NormalizationResult,
|
||||||
} from './types';
|
} from './types';
|
||||||
|
import {
|
||||||
|
downloadProductImage,
|
||||||
|
ProductImageContext,
|
||||||
|
isImageStorageReady,
|
||||||
|
LocalImageSizes,
|
||||||
|
} from '../utils/image-storage';
|
||||||
|
|
||||||
const BATCH_SIZE = 100;
|
const BATCH_SIZE = 100;
|
||||||
|
|
||||||
@@ -23,10 +29,21 @@ const BATCH_SIZE = 100;
|
|||||||
// PRODUCT UPSERTS
|
// PRODUCT UPSERTS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
|
export interface NewProductInfo {
|
||||||
|
id: number; // store_products.id
|
||||||
|
externalProductId: string; // provider_product_id
|
||||||
|
name: string;
|
||||||
|
brandName: string | null;
|
||||||
|
primaryImageUrl: string | null;
|
||||||
|
hasLocalImage?: boolean; // True if local_image_path is already set
|
||||||
|
}
|
||||||
|
|
||||||
export interface UpsertProductsResult {
|
export interface UpsertProductsResult {
|
||||||
upserted: number;
|
upserted: number;
|
||||||
new: number;
|
new: number;
|
||||||
updated: number;
|
updated: number;
|
||||||
|
newProducts: NewProductInfo[]; // Details of newly created products
|
||||||
|
productsNeedingImages: NewProductInfo[]; // Products (new or updated) that need image downloads
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -41,12 +58,14 @@ export async function upsertStoreProducts(
|
|||||||
options: { dryRun?: boolean } = {}
|
options: { dryRun?: boolean } = {}
|
||||||
): Promise<UpsertProductsResult> {
|
): Promise<UpsertProductsResult> {
|
||||||
if (products.length === 0) {
|
if (products.length === 0) {
|
||||||
return { upserted: 0, new: 0, updated: 0 };
|
return { upserted: 0, new: 0, updated: 0, newProducts: [], productsNeedingImages: [] };
|
||||||
}
|
}
|
||||||
|
|
||||||
const { dryRun = false } = options;
|
const { dryRun = false } = options;
|
||||||
let newCount = 0;
|
let newCount = 0;
|
||||||
let updatedCount = 0;
|
let updatedCount = 0;
|
||||||
|
const newProducts: NewProductInfo[] = [];
|
||||||
|
const productsNeedingImages: NewProductInfo[] = [];
|
||||||
|
|
||||||
// Process in batches
|
// Process in batches
|
||||||
for (let i = 0; i < products.length; i += BATCH_SIZE) {
|
for (let i = 0; i < products.length; i += BATCH_SIZE) {
|
||||||
@@ -68,10 +87,10 @@ export async function upsertStoreProducts(
|
|||||||
const result = await client.query(
|
const result = await client.query(
|
||||||
`INSERT INTO store_products (
|
`INSERT INTO store_products (
|
||||||
dispensary_id, provider, provider_product_id, provider_brand_id,
|
dispensary_id, provider, provider_product_id, provider_brand_id,
|
||||||
name, brand_name, category, subcategory,
|
name_raw, brand_name_raw, category_raw, subcategory_raw,
|
||||||
price_rec, price_med, price_rec_special, price_med_special,
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
is_on_special, discount_percent,
|
is_on_special, discount_percent,
|
||||||
is_in_stock, stock_status,
|
is_in_stock, stock_status, stock_quantity, total_quantity_available,
|
||||||
thc_percent, cbd_percent,
|
thc_percent, cbd_percent,
|
||||||
image_url,
|
image_url,
|
||||||
first_seen_at, last_seen_at, updated_at
|
first_seen_at, last_seen_at, updated_at
|
||||||
@@ -80,17 +99,17 @@ export async function upsertStoreProducts(
|
|||||||
$5, $6, $7, $8,
|
$5, $6, $7, $8,
|
||||||
$9, $10, $11, $12,
|
$9, $10, $11, $12,
|
||||||
$13, $14,
|
$13, $14,
|
||||||
$15, $16,
|
$15, $16, $17, $17,
|
||||||
$17, $18,
|
$18, $19,
|
||||||
$19,
|
$20,
|
||||||
NOW(), NOW(), NOW()
|
NOW(), NOW(), NOW()
|
||||||
)
|
)
|
||||||
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
||||||
DO UPDATE SET
|
DO UPDATE SET
|
||||||
name = EXCLUDED.name,
|
name_raw = EXCLUDED.name_raw,
|
||||||
brand_name = EXCLUDED.brand_name,
|
brand_name_raw = EXCLUDED.brand_name_raw,
|
||||||
category = EXCLUDED.category,
|
category_raw = EXCLUDED.category_raw,
|
||||||
subcategory = EXCLUDED.subcategory,
|
subcategory_raw = EXCLUDED.subcategory_raw,
|
||||||
price_rec = EXCLUDED.price_rec,
|
price_rec = EXCLUDED.price_rec,
|
||||||
price_med = EXCLUDED.price_med,
|
price_med = EXCLUDED.price_med,
|
||||||
price_rec_special = EXCLUDED.price_rec_special,
|
price_rec_special = EXCLUDED.price_rec_special,
|
||||||
@@ -99,12 +118,14 @@ export async function upsertStoreProducts(
|
|||||||
discount_percent = EXCLUDED.discount_percent,
|
discount_percent = EXCLUDED.discount_percent,
|
||||||
is_in_stock = EXCLUDED.is_in_stock,
|
is_in_stock = EXCLUDED.is_in_stock,
|
||||||
stock_status = EXCLUDED.stock_status,
|
stock_status = EXCLUDED.stock_status,
|
||||||
|
stock_quantity = EXCLUDED.stock_quantity,
|
||||||
|
total_quantity_available = EXCLUDED.total_quantity_available,
|
||||||
thc_percent = EXCLUDED.thc_percent,
|
thc_percent = EXCLUDED.thc_percent,
|
||||||
cbd_percent = EXCLUDED.cbd_percent,
|
cbd_percent = EXCLUDED.cbd_percent,
|
||||||
image_url = EXCLUDED.image_url,
|
image_url = EXCLUDED.image_url,
|
||||||
last_seen_at = NOW(),
|
last_seen_at = NOW(),
|
||||||
updated_at = NOW()
|
updated_at = NOW()
|
||||||
RETURNING (xmax = 0) as is_new`,
|
RETURNING id, (xmax = 0) as is_new, (local_image_path IS NOT NULL) as has_local_image`,
|
||||||
[
|
[
|
||||||
product.dispensaryId,
|
product.dispensaryId,
|
||||||
product.platform,
|
product.platform,
|
||||||
@@ -122,16 +143,38 @@ export async function upsertStoreProducts(
|
|||||||
productPricing?.discountPercent,
|
productPricing?.discountPercent,
|
||||||
productAvailability?.inStock ?? true,
|
productAvailability?.inStock ?? true,
|
||||||
productAvailability?.stockStatus || 'unknown',
|
productAvailability?.stockStatus || 'unknown',
|
||||||
product.thcPercent,
|
productAvailability?.quantity ?? null, // stock_quantity and total_quantity_available
|
||||||
product.cbdPercent,
|
// Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
|
||||||
|
product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
|
||||||
|
product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
|
||||||
product.primaryImageUrl,
|
product.primaryImageUrl,
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
if (result.rows[0]?.is_new) {
|
const row = result.rows[0];
|
||||||
|
const productInfo: NewProductInfo = {
|
||||||
|
id: row.id,
|
||||||
|
externalProductId: product.externalProductId,
|
||||||
|
name: product.name,
|
||||||
|
brandName: product.brandName,
|
||||||
|
primaryImageUrl: product.primaryImageUrl,
|
||||||
|
hasLocalImage: row.has_local_image,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (row.is_new) {
|
||||||
newCount++;
|
newCount++;
|
||||||
|
// Track new products
|
||||||
|
newProducts.push(productInfo);
|
||||||
|
// New products always need images (if they have a source URL)
|
||||||
|
if (product.primaryImageUrl && !row.has_local_image) {
|
||||||
|
productsNeedingImages.push(productInfo);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
updatedCount++;
|
updatedCount++;
|
||||||
|
// Updated products need images only if they don't have a local image yet
|
||||||
|
if (product.primaryImageUrl && !row.has_local_image) {
|
||||||
|
productsNeedingImages.push(productInfo);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -148,6 +191,8 @@ export async function upsertStoreProducts(
|
|||||||
upserted: newCount + updatedCount,
|
upserted: newCount + updatedCount,
|
||||||
new: newCount,
|
new: newCount,
|
||||||
updated: updatedCount,
|
updated: updatedCount,
|
||||||
|
newProducts,
|
||||||
|
productsNeedingImages,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -212,8 +257,9 @@ export async function createStoreProductSnapshots(
|
|||||||
productAvailability?.inStock ?? true,
|
productAvailability?.inStock ?? true,
|
||||||
productAvailability?.quantity,
|
productAvailability?.quantity,
|
||||||
productAvailability?.stockStatus || 'unknown',
|
productAvailability?.stockStatus || 'unknown',
|
||||||
product.thcPercent,
|
// Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
|
||||||
product.cbdPercent,
|
product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
|
||||||
|
product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
|
||||||
product.primaryImageUrl,
|
product.primaryImageUrl,
|
||||||
JSON.stringify(product.rawProduct),
|
JSON.stringify(product.rawProduct),
|
||||||
]);
|
]);
|
||||||
@@ -229,7 +275,7 @@ export async function createStoreProductSnapshots(
|
|||||||
`INSERT INTO store_product_snapshots (
|
`INSERT INTO store_product_snapshots (
|
||||||
dispensary_id, provider, provider_product_id, crawl_run_id,
|
dispensary_id, provider, provider_product_id, crawl_run_id,
|
||||||
captured_at,
|
captured_at,
|
||||||
name, brand_name, category, subcategory,
|
name_raw, brand_name_raw, category_raw, subcategory_raw,
|
||||||
price_rec, price_med, price_rec_special, price_med_special,
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
is_on_special, discount_percent,
|
is_on_special, discount_percent,
|
||||||
is_in_stock, stock_quantity, stock_status,
|
is_in_stock, stock_quantity, stock_status,
|
||||||
@@ -245,6 +291,202 @@ export async function createStoreProductSnapshots(
|
|||||||
return { created };
|
return { created };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// VARIANT UPSERTS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface UpsertVariantsResult {
|
||||||
|
upserted: number;
|
||||||
|
new: number;
|
||||||
|
updated: number;
|
||||||
|
snapshotsCreated: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract variant data from raw Dutchie product
|
||||||
|
*/
|
||||||
|
function extractVariantsFromRaw(rawProduct: any): any[] {
|
||||||
|
const children = rawProduct?.POSMetaData?.children || [];
|
||||||
|
return children.map((child: any) => ({
|
||||||
|
option: child.option || child.key || '',
|
||||||
|
canonicalSku: child.canonicalSKU || null,
|
||||||
|
canonicalId: child.canonicalID || null,
|
||||||
|
canonicalName: child.canonicalName || null,
|
||||||
|
priceRec: child.recPrice || child.price || null,
|
||||||
|
priceMed: child.medPrice || null,
|
||||||
|
priceRecSpecial: child.recSpecialPrice || null,
|
||||||
|
priceMedSpecial: child.medSpecialPrice || null,
|
||||||
|
quantity: child.quantityAvailable ?? child.quantity ?? null,
|
||||||
|
inStock: (child.quantityAvailable ?? child.quantity ?? 0) > 0,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse weight value and unit from option string
|
||||||
|
* e.g., "1g" -> { value: 1, unit: "g" }
|
||||||
|
* "3.5g" -> { value: 3.5, unit: "g" }
|
||||||
|
* "1/8oz" -> { value: 0.125, unit: "oz" }
|
||||||
|
*/
|
||||||
|
function parseWeight(option: string): { value: number | null; unit: string | null } {
|
||||||
|
if (!option) return { value: null, unit: null };
|
||||||
|
|
||||||
|
// Handle fractions like "1/8oz"
|
||||||
|
const fractionMatch = option.match(/^(\d+)\/(\d+)\s*(g|oz|mg|ml)?$/i);
|
||||||
|
if (fractionMatch) {
|
||||||
|
const value = parseInt(fractionMatch[1]) / parseInt(fractionMatch[2]);
|
||||||
|
return { value, unit: fractionMatch[3]?.toLowerCase() || 'oz' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle decimals like "3.5g" or "100mg"
|
||||||
|
const decimalMatch = option.match(/^([\d.]+)\s*(g|oz|mg|ml|each)?$/i);
|
||||||
|
if (decimalMatch) {
|
||||||
|
return {
|
||||||
|
value: parseFloat(decimalMatch[1]),
|
||||||
|
unit: decimalMatch[2]?.toLowerCase() || null
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { value: null, unit: null };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Upsert variants for products and create variant snapshots
|
||||||
|
*/
|
||||||
|
export async function upsertProductVariants(
|
||||||
|
pool: Pool,
|
||||||
|
dispensaryId: number,
|
||||||
|
products: NormalizedProduct[],
|
||||||
|
crawlRunId: number | null,
|
||||||
|
options: { dryRun?: boolean } = {}
|
||||||
|
): Promise<UpsertVariantsResult> {
|
||||||
|
if (products.length === 0) {
|
||||||
|
return { upserted: 0, new: 0, updated: 0, snapshotsCreated: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
const { dryRun = false } = options;
|
||||||
|
let newCount = 0;
|
||||||
|
let updatedCount = 0;
|
||||||
|
let snapshotsCreated = 0;
|
||||||
|
|
||||||
|
for (const product of products) {
|
||||||
|
// Get the store_product_id for this product
|
||||||
|
const productResult = await pool.query(
|
||||||
|
`SELECT id FROM store_products
|
||||||
|
WHERE dispensary_id = $1 AND provider = $2 AND provider_product_id = $3`,
|
||||||
|
[dispensaryId, product.platform, product.externalProductId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (productResult.rows.length === 0) {
|
||||||
|
continue; // Product not found, skip variants
|
||||||
|
}
|
||||||
|
|
||||||
|
const storeProductId = productResult.rows[0].id;
|
||||||
|
const variants = extractVariantsFromRaw(product.rawProduct);
|
||||||
|
|
||||||
|
if (variants.length === 0) {
|
||||||
|
continue; // No variants to process
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dryRun) {
|
||||||
|
console.log(`[DryRun] Would upsert ${variants.length} variants for product ${product.externalProductId}`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const variant of variants) {
|
||||||
|
const { value: weightValue, unit: weightUnit } = parseWeight(variant.option);
|
||||||
|
const isOnSpecial = (variant.priceRecSpecial !== null && variant.priceRecSpecial < variant.priceRec) ||
|
||||||
|
(variant.priceMedSpecial !== null && variant.priceMedSpecial < variant.priceMed);
|
||||||
|
|
||||||
|
// Upsert variant
|
||||||
|
const variantResult = await pool.query(
|
||||||
|
`INSERT INTO product_variants (
|
||||||
|
store_product_id, dispensary_id,
|
||||||
|
option, canonical_sku, canonical_id, canonical_name,
|
||||||
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
|
quantity, quantity_available, in_stock, is_on_special,
|
||||||
|
weight_value, weight_unit,
|
||||||
|
first_seen_at, last_seen_at, updated_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2,
|
||||||
|
$3, $4, $5, $6,
|
||||||
|
$7, $8, $9, $10,
|
||||||
|
$11, $11, $12, $13,
|
||||||
|
$14, $15,
|
||||||
|
NOW(), NOW(), NOW()
|
||||||
|
)
|
||||||
|
ON CONFLICT (store_product_id, option)
|
||||||
|
DO UPDATE SET
|
||||||
|
canonical_sku = COALESCE(EXCLUDED.canonical_sku, product_variants.canonical_sku),
|
||||||
|
canonical_id = COALESCE(EXCLUDED.canonical_id, product_variants.canonical_id),
|
||||||
|
canonical_name = COALESCE(EXCLUDED.canonical_name, product_variants.canonical_name),
|
||||||
|
price_rec = EXCLUDED.price_rec,
|
||||||
|
price_med = EXCLUDED.price_med,
|
||||||
|
price_rec_special = EXCLUDED.price_rec_special,
|
||||||
|
price_med_special = EXCLUDED.price_med_special,
|
||||||
|
quantity = EXCLUDED.quantity,
|
||||||
|
quantity_available = EXCLUDED.quantity_available,
|
||||||
|
in_stock = EXCLUDED.in_stock,
|
||||||
|
is_on_special = EXCLUDED.is_on_special,
|
||||||
|
weight_value = COALESCE(EXCLUDED.weight_value, product_variants.weight_value),
|
||||||
|
weight_unit = COALESCE(EXCLUDED.weight_unit, product_variants.weight_unit),
|
||||||
|
last_seen_at = NOW(),
|
||||||
|
last_price_change_at = CASE
|
||||||
|
WHEN product_variants.price_rec IS DISTINCT FROM EXCLUDED.price_rec
|
||||||
|
OR product_variants.price_rec_special IS DISTINCT FROM EXCLUDED.price_rec_special
|
||||||
|
THEN NOW()
|
||||||
|
ELSE product_variants.last_price_change_at
|
||||||
|
END,
|
||||||
|
last_stock_change_at = CASE
|
||||||
|
WHEN product_variants.quantity IS DISTINCT FROM EXCLUDED.quantity
|
||||||
|
THEN NOW()
|
||||||
|
ELSE product_variants.last_stock_change_at
|
||||||
|
END,
|
||||||
|
updated_at = NOW()
|
||||||
|
RETURNING id, (xmax = 0) as is_new`,
|
||||||
|
[
|
||||||
|
storeProductId, dispensaryId,
|
||||||
|
variant.option, variant.canonicalSku, variant.canonicalId, variant.canonicalName,
|
||||||
|
variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
|
||||||
|
variant.quantity, variant.inStock, isOnSpecial,
|
||||||
|
weightValue, weightUnit,
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
const variantId = variantResult.rows[0].id;
|
||||||
|
if (variantResult.rows[0]?.is_new) {
|
||||||
|
newCount++;
|
||||||
|
} else {
|
||||||
|
updatedCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create variant snapshot
|
||||||
|
await pool.query(
|
||||||
|
`INSERT INTO product_variant_snapshots (
|
||||||
|
product_variant_id, store_product_id, dispensary_id, crawl_run_id,
|
||||||
|
option,
|
||||||
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
|
quantity, in_stock, is_on_special,
|
||||||
|
captured_at
|
||||||
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW())`,
|
||||||
|
[
|
||||||
|
variantId, storeProductId, dispensaryId, crawlRunId,
|
||||||
|
variant.option,
|
||||||
|
variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
|
||||||
|
variant.quantity, variant.inStock, isOnSpecial,
|
||||||
|
]
|
||||||
|
);
|
||||||
|
snapshotsCreated++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
upserted: newCount + updatedCount,
|
||||||
|
new: newCount,
|
||||||
|
updated: updatedCount,
|
||||||
|
snapshotsCreated,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// DISCONTINUED PRODUCTS
|
// DISCONTINUED PRODUCTS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -366,6 +608,19 @@ export async function upsertBrands(
|
|||||||
// FULL HYDRATION
|
// FULL HYDRATION
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
|
export interface ImageDownloadResult {
|
||||||
|
downloaded: number;
|
||||||
|
skipped: number;
|
||||||
|
failed: number;
|
||||||
|
bytesTotal: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DispensaryContext {
|
||||||
|
stateCode: string;
|
||||||
|
storeSlug: string;
|
||||||
|
hasExistingProducts?: boolean; // True if store already has products with local images
|
||||||
|
}
|
||||||
|
|
||||||
export interface HydratePayloadResult {
|
export interface HydratePayloadResult {
|
||||||
productsUpserted: number;
|
productsUpserted: number;
|
||||||
productsNew: number;
|
productsNew: number;
|
||||||
@@ -373,6 +628,157 @@ export interface HydratePayloadResult {
|
|||||||
productsDiscontinued: number;
|
productsDiscontinued: number;
|
||||||
snapshotsCreated: number;
|
snapshotsCreated: number;
|
||||||
brandsCreated: number;
|
brandsCreated: number;
|
||||||
|
variantsUpserted: number;
|
||||||
|
variantsNew: number;
|
||||||
|
variantSnapshotsCreated: number;
|
||||||
|
imagesDownloaded: number;
|
||||||
|
imagesSkipped: number;
|
||||||
|
imagesFailed: number;
|
||||||
|
imagesBytesTotal: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to create slug from string
|
||||||
|
*/
|
||||||
|
function slugify(str: string): string {
|
||||||
|
return str
|
||||||
|
.toLowerCase()
|
||||||
|
.replace(/[^a-z0-9]+/g, '-')
|
||||||
|
.replace(/^-+|-+$/g, '')
|
||||||
|
.substring(0, 50) || 'unknown';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Download images for new products and update their local paths
|
||||||
|
*/
|
||||||
|
export async function downloadProductImages(
|
||||||
|
pool: Pool,
|
||||||
|
newProducts: NewProductInfo[],
|
||||||
|
dispensaryContext: DispensaryContext,
|
||||||
|
options: { dryRun?: boolean; concurrency?: number } = {}
|
||||||
|
): Promise<ImageDownloadResult> {
|
||||||
|
const { dryRun = false, concurrency = 5 } = options;
|
||||||
|
|
||||||
|
// Filter products that have images to download
|
||||||
|
const productsWithImages = newProducts.filter(p => p.primaryImageUrl);
|
||||||
|
|
||||||
|
if (productsWithImages.length === 0) {
|
||||||
|
return { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if image storage is ready
|
||||||
|
if (!isImageStorageReady()) {
|
||||||
|
console.warn('[ImageDownload] Image storage not initialized, skipping downloads');
|
||||||
|
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dryRun) {
|
||||||
|
console.log(`[DryRun] Would download ${productsWithImages.length} images`);
|
||||||
|
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
let downloaded = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
let failed = 0;
|
||||||
|
let bytesTotal = 0;
|
||||||
|
|
||||||
|
// Process in batches with concurrency limit
|
||||||
|
for (let i = 0; i < productsWithImages.length; i += concurrency) {
|
||||||
|
const batch = productsWithImages.slice(i, i + concurrency);
|
||||||
|
|
||||||
|
const results = await Promise.allSettled(
|
||||||
|
batch.map(async (product) => {
|
||||||
|
const ctx: ProductImageContext = {
|
||||||
|
stateCode: dispensaryContext.stateCode,
|
||||||
|
storeSlug: dispensaryContext.storeSlug,
|
||||||
|
brandSlug: slugify(product.brandName || 'unknown'),
|
||||||
|
productId: product.externalProductId,
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = await downloadProductImage(product.primaryImageUrl!, ctx, { skipIfExists: true });
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
// Update the database with local image path
|
||||||
|
const imagesJson = JSON.stringify({
|
||||||
|
full: result.urls!.full,
|
||||||
|
medium: result.urls!.medium,
|
||||||
|
thumb: result.urls!.thumb,
|
||||||
|
});
|
||||||
|
|
||||||
|
await pool.query(
|
||||||
|
`UPDATE store_products
|
||||||
|
SET local_image_path = $1, images = $2
|
||||||
|
WHERE id = $3`,
|
||||||
|
[result.urls!.full, imagesJson, product.id]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const result of results) {
|
||||||
|
if (result.status === 'fulfilled') {
|
||||||
|
const downloadResult = result.value;
|
||||||
|
if (downloadResult.success) {
|
||||||
|
if (downloadResult.skipped) {
|
||||||
|
skipped++;
|
||||||
|
} else {
|
||||||
|
downloaded++;
|
||||||
|
bytesTotal += downloadResult.bytesDownloaded || 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
failed++;
|
||||||
|
console.warn(`[ImageDownload] Failed: ${downloadResult.error}`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
failed++;
|
||||||
|
console.error(`[ImageDownload] Error:`, result.reason);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[ImageDownload] Downloaded: ${downloaded}, Skipped: ${skipped}, Failed: ${failed}, Bytes: ${bytesTotal}`);
|
||||||
|
return { downloaded, skipped, failed, bytesTotal };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get dispensary context for image paths
|
||||||
|
* Also checks if this dispensary already has products with local images
|
||||||
|
* to skip unnecessary filesystem checks for existing stores
|
||||||
|
*/
|
||||||
|
async function getDispensaryContext(pool: Pool, dispensaryId: number): Promise<DispensaryContext | null> {
|
||||||
|
try {
|
||||||
|
const result = await pool.query(
|
||||||
|
`SELECT
|
||||||
|
d.state,
|
||||||
|
d.slug,
|
||||||
|
d.name,
|
||||||
|
EXISTS(
|
||||||
|
SELECT 1 FROM store_products sp
|
||||||
|
WHERE sp.dispensary_id = d.id
|
||||||
|
AND sp.local_image_path IS NOT NULL
|
||||||
|
LIMIT 1
|
||||||
|
) as has_local_images
|
||||||
|
FROM dispensaries d
|
||||||
|
WHERE d.id = $1`,
|
||||||
|
[dispensaryId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const row = result.rows[0];
|
||||||
|
return {
|
||||||
|
stateCode: row.state || 'unknown',
|
||||||
|
storeSlug: row.slug || slugify(row.name || `store-${dispensaryId}`),
|
||||||
|
hasExistingProducts: row.has_local_images,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[getDispensaryContext] Error:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -383,9 +789,9 @@ export async function hydrateToCanonical(
|
|||||||
dispensaryId: number,
|
dispensaryId: number,
|
||||||
normResult: NormalizationResult,
|
normResult: NormalizationResult,
|
||||||
crawlRunId: number | null,
|
crawlRunId: number | null,
|
||||||
options: { dryRun?: boolean } = {}
|
options: { dryRun?: boolean; downloadImages?: boolean } = {}
|
||||||
): Promise<HydratePayloadResult> {
|
): Promise<HydratePayloadResult> {
|
||||||
const { dryRun = false } = options;
|
const { dryRun = false, downloadImages: shouldDownloadImages = true } = options;
|
||||||
|
|
||||||
// 1. Upsert brands
|
// 1. Upsert brands
|
||||||
const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
|
const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
|
||||||
@@ -399,7 +805,7 @@ export async function hydrateToCanonical(
|
|||||||
{ dryRun }
|
{ dryRun }
|
||||||
);
|
);
|
||||||
|
|
||||||
// 3. Create snapshots
|
// 3. Create product snapshots
|
||||||
const snapshotResult = await createStoreProductSnapshots(
|
const snapshotResult = await createStoreProductSnapshots(
|
||||||
pool,
|
pool,
|
||||||
dispensaryId,
|
dispensaryId,
|
||||||
@@ -410,7 +816,16 @@ export async function hydrateToCanonical(
|
|||||||
{ dryRun }
|
{ dryRun }
|
||||||
);
|
);
|
||||||
|
|
||||||
// 4. Mark discontinued products
|
// 4. Upsert variants and create variant snapshots
|
||||||
|
const variantResult = await upsertProductVariants(
|
||||||
|
pool,
|
||||||
|
dispensaryId,
|
||||||
|
normResult.products,
|
||||||
|
crawlRunId,
|
||||||
|
{ dryRun }
|
||||||
|
);
|
||||||
|
|
||||||
|
// 5. Mark discontinued products
|
||||||
const currentProductIds = new Set(
|
const currentProductIds = new Set(
|
||||||
normResult.products.map((p) => p.externalProductId)
|
normResult.products.map((p) => p.externalProductId)
|
||||||
);
|
);
|
||||||
@@ -424,6 +839,36 @@ export async function hydrateToCanonical(
|
|||||||
{ dryRun }
|
{ dryRun }
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// 6. Download images for products that need them
|
||||||
|
// This includes:
|
||||||
|
// - New products (always need images)
|
||||||
|
// - Updated products that don't have local images yet (backfill)
|
||||||
|
// This avoids:
|
||||||
|
// - Filesystem checks for products that already have local images
|
||||||
|
// - Unnecessary HTTP requests for products with existing images
|
||||||
|
let imageResult: ImageDownloadResult = { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
|
||||||
|
|
||||||
|
if (shouldDownloadImages && productResult.productsNeedingImages.length > 0) {
|
||||||
|
const dispensaryContext = await getDispensaryContext(pool, dispensaryId);
|
||||||
|
|
||||||
|
if (dispensaryContext) {
|
||||||
|
const newCount = productResult.productsNeedingImages.filter(p => !p.hasLocalImage).length;
|
||||||
|
const backfillCount = productResult.productsNeedingImages.length - newCount;
|
||||||
|
console.log(`[Hydration] Downloading images for ${productResult.productsNeedingImages.length} products (${productResult.new} new, ${backfillCount} backfill)...`);
|
||||||
|
imageResult = await downloadProductImages(
|
||||||
|
pool,
|
||||||
|
productResult.productsNeedingImages,
|
||||||
|
dispensaryContext,
|
||||||
|
{ dryRun }
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
console.warn(`[Hydration] Could not get dispensary context for ID ${dispensaryId}, skipping image downloads`);
|
||||||
|
}
|
||||||
|
} else if (productResult.productsNeedingImages.length === 0 && productResult.upserted > 0) {
|
||||||
|
// All products already have local images
|
||||||
|
console.log(`[Hydration] All ${productResult.upserted} products already have local images, skipping downloads`);
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
productsUpserted: productResult.upserted,
|
productsUpserted: productResult.upserted,
|
||||||
productsNew: productResult.new,
|
productsNew: productResult.new,
|
||||||
@@ -431,5 +876,12 @@ export async function hydrateToCanonical(
|
|||||||
productsDiscontinued: discontinuedCount,
|
productsDiscontinued: discontinuedCount,
|
||||||
snapshotsCreated: snapshotResult.created,
|
snapshotsCreated: snapshotResult.created,
|
||||||
brandsCreated: brandResult.new,
|
brandsCreated: brandResult.new,
|
||||||
|
variantsUpserted: variantResult.upserted,
|
||||||
|
variantsNew: variantResult.new,
|
||||||
|
variantSnapshotsCreated: variantResult.snapshotsCreated,
|
||||||
|
imagesDownloaded: imageResult.downloaded,
|
||||||
|
imagesSkipped: imageResult.skipped,
|
||||||
|
imagesFailed: imageResult.failed,
|
||||||
|
imagesBytesTotal: imageResult.bytesTotal,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -234,99 +234,94 @@ export async function syncProductsToCanonical(
|
|||||||
|
|
||||||
const result = await pool.query(
|
const result = await pool.query(
|
||||||
`INSERT INTO store_products (
|
`INSERT INTO store_products (
|
||||||
dispensary_id, state_id, provider, provider_product_id,
|
dispensary_id, provider, provider_product_id, provider_brand_id,
|
||||||
provider_brand_id, provider_dispensary_id, enterprise_product_id,
|
platform_dispensary_id, external_product_id,
|
||||||
legacy_dutchie_product_id,
|
name_raw, brand_name_raw, category_raw, subcategory_raw, strain_type,
|
||||||
name, brand_name, category, subcategory, product_type, strain_type,
|
description, effects, cannabinoids_v2,
|
||||||
description, effects, cannabinoids,
|
thc_percent, cbd_percent, thc_content, cbd_content,
|
||||||
thc_percent, cbd_percent, thc_content_text, cbd_content_text,
|
is_in_stock, stock_status, stock_quantity, total_quantity_available,
|
||||||
is_in_stock, stock_status, stock_quantity,
|
image_url, primary_image_url, images,
|
||||||
total_quantity_available, total_kiosk_quantity_available,
|
is_on_special, featured, medical_only, rec_only,
|
||||||
image_url, local_image_url, local_image_thumb_url, local_image_medium_url,
|
|
||||||
original_image_url, additional_images,
|
|
||||||
is_on_special, is_featured, medical_only, rec_only,
|
|
||||||
is_below_threshold, is_below_kiosk_threshold,
|
is_below_threshold, is_below_kiosk_threshold,
|
||||||
platform_status, c_name, weight, options, measurements,
|
status, c_name, weight, measurements,
|
||||||
first_seen_at, last_seen_at, updated_at
|
first_seen_at, last_seen_at, created_at, updated_at
|
||||||
) VALUES (
|
) VALUES (
|
||||||
$1, $2, 'dutchie', $3,
|
$1, 'dutchie', $2, $3,
|
||||||
$4, $5, $6,
|
$4, $5,
|
||||||
$7,
|
$6, $7, $8, $9, $10,
|
||||||
$8, $9, $10, $11, $12, $13,
|
$11, $12, $13,
|
||||||
$14, $15, $16,
|
$14, $15, $16, $17,
|
||||||
$17, $18, $19, $20,
|
$18, $19, $20, $21,
|
||||||
$21, $22, $23,
|
$22, $23, $24,
|
||||||
$24, $25,
|
$25, $26, $27, $28,
|
||||||
$26, $27, $28, $29,
|
$29, $30,
|
||||||
$30, $31,
|
$31, $32, $33, $34,
|
||||||
$32, $33, $34, $35,
|
$35, $36, NOW(), NOW()
|
||||||
$36, $37,
|
|
||||||
$38, $39, $40, $41, $42,
|
|
||||||
$43, $44, NOW()
|
|
||||||
)
|
)
|
||||||
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
||||||
DO UPDATE SET
|
DO UPDATE SET
|
||||||
legacy_dutchie_product_id = EXCLUDED.legacy_dutchie_product_id,
|
name_raw = EXCLUDED.name_raw,
|
||||||
name = EXCLUDED.name,
|
brand_name_raw = EXCLUDED.brand_name_raw,
|
||||||
brand_name = EXCLUDED.brand_name,
|
category_raw = EXCLUDED.category_raw,
|
||||||
category = EXCLUDED.category,
|
subcategory_raw = EXCLUDED.subcategory_raw,
|
||||||
subcategory = EXCLUDED.subcategory,
|
strain_type = EXCLUDED.strain_type,
|
||||||
is_in_stock = EXCLUDED.is_in_stock,
|
is_in_stock = EXCLUDED.is_in_stock,
|
||||||
stock_status = EXCLUDED.stock_status,
|
stock_status = EXCLUDED.stock_status,
|
||||||
|
stock_quantity = EXCLUDED.stock_quantity,
|
||||||
|
total_quantity_available = EXCLUDED.total_quantity_available,
|
||||||
thc_percent = EXCLUDED.thc_percent,
|
thc_percent = EXCLUDED.thc_percent,
|
||||||
cbd_percent = EXCLUDED.cbd_percent,
|
cbd_percent = EXCLUDED.cbd_percent,
|
||||||
|
thc_content = EXCLUDED.thc_content,
|
||||||
|
cbd_content = EXCLUDED.cbd_content,
|
||||||
image_url = EXCLUDED.image_url,
|
image_url = EXCLUDED.image_url,
|
||||||
local_image_url = EXCLUDED.local_image_url,
|
primary_image_url = EXCLUDED.primary_image_url,
|
||||||
is_on_special = EXCLUDED.is_on_special,
|
is_on_special = EXCLUDED.is_on_special,
|
||||||
platform_status = EXCLUDED.platform_status,
|
status = EXCLUDED.status,
|
||||||
|
description = COALESCE(EXCLUDED.description, store_products.description),
|
||||||
|
effects = COALESCE(EXCLUDED.effects, store_products.effects),
|
||||||
|
cannabinoids_v2 = COALESCE(EXCLUDED.cannabinoids_v2, store_products.cannabinoids_v2),
|
||||||
|
weight = EXCLUDED.weight,
|
||||||
|
measurements = EXCLUDED.measurements,
|
||||||
last_seen_at = NOW(),
|
last_seen_at = NOW(),
|
||||||
updated_at = NOW()
|
updated_at = NOW()
|
||||||
RETURNING (xmax = 0) as is_new`,
|
RETURNING (xmax = 0) as is_new`,
|
||||||
[
|
[
|
||||||
dispensaryId,
|
dispensaryId, // $1
|
||||||
stateId,
|
p.external_product_id, // $2
|
||||||
p.external_product_id,
|
p.brand_id, // $3
|
||||||
p.brand_id,
|
p.platform_dispensary_id, // $4
|
||||||
p.platform_dispensary_id,
|
p.external_product_id, // $5 external_product_id
|
||||||
p.enterprise_product_id,
|
p.name, // $6
|
||||||
p.id,
|
p.brand_name, // $7
|
||||||
p.name,
|
p.type || p.category, // $8 category_raw
|
||||||
p.brand_name,
|
p.subcategory, // $9
|
||||||
p.category || p.type,
|
p.strain_type, // $10
|
||||||
p.subcategory,
|
p.description, // $11
|
||||||
p.type,
|
p.effects, // $12
|
||||||
p.strain_type,
|
p.cannabinoids_v2, // $13
|
||||||
p.description,
|
thcPercent, // $14
|
||||||
p.effects,
|
cbdPercent, // $15
|
||||||
p.cannabinoids_v2,
|
p.thc_content, // $16
|
||||||
thcPercent,
|
p.cbd_content, // $17
|
||||||
cbdPercent,
|
isInStock, // $18
|
||||||
p.thc_content,
|
stockStatus, // $19
|
||||||
p.cbd_content,
|
p.total_quantity_available || 0, // $20 stock_quantity
|
||||||
isInStock,
|
p.total_quantity_available || 0, // $21
|
||||||
stockStatus,
|
p.primary_image_url, // $22 image_url
|
||||||
p.total_quantity_available,
|
p.primary_image_url, // $23
|
||||||
p.total_quantity_available,
|
p.additional_images, // $24 images
|
||||||
p.total_kiosk_quantity_available,
|
p.special || false, // $25
|
||||||
p.primary_image_url,
|
p.featured || false, // $26
|
||||||
p.local_image_url,
|
p.medical_only || false, // $27
|
||||||
p.local_image_thumb_url,
|
p.rec_only || false, // $28
|
||||||
p.local_image_medium_url,
|
p.is_below_threshold || false, // $29
|
||||||
p.original_image_url,
|
p.is_below_kiosk_threshold || false, // $30
|
||||||
p.additional_images,
|
p.status, // $31
|
||||||
p.special || false,
|
p.c_name, // $32
|
||||||
p.featured || false,
|
p.weight, // $33
|
||||||
p.medical_only || false,
|
p.measurements, // $34
|
||||||
p.rec_only || false,
|
p.first_seen_at || p.updated_at, // $35
|
||||||
p.is_below_threshold || false,
|
p.last_seen_at || p.updated_at, // $36
|
||||||
p.is_below_kiosk_threshold || false,
|
|
||||||
p.status,
|
|
||||||
p.c_name,
|
|
||||||
p.weight,
|
|
||||||
p.options,
|
|
||||||
p.measurements,
|
|
||||||
p.first_seen_at || p.updated_at,
|
|
||||||
p.last_seen_at || p.updated_at,
|
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -107,7 +107,8 @@ export class HydrationWorker {
|
|||||||
console.log(
|
console.log(
|
||||||
`[HydrationWorker] ${this.options.dryRun ? '[DryRun] ' : ''}Processed payload ${payload.id}: ` +
|
`[HydrationWorker] ${this.options.dryRun ? '[DryRun] ' : ''}Processed payload ${payload.id}: ` +
|
||||||
`${hydrateResult.productsNew} new, ${hydrateResult.productsUpdated} updated, ` +
|
`${hydrateResult.productsNew} new, ${hydrateResult.productsUpdated} updated, ` +
|
||||||
`${hydrateResult.productsDiscontinued} discontinued, ${hydrateResult.snapshotsCreated} snapshots`
|
`${hydrateResult.productsDiscontinued} discontinued, ${hydrateResult.snapshotsCreated} snapshots, ` +
|
||||||
|
`${hydrateResult.variantsUpserted} variants (${hydrateResult.variantSnapshotsCreated} variant snapshots)`
|
||||||
);
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -6,22 +6,70 @@ import { initializeMinio, isMinioEnabled } from './utils/minio';
|
|||||||
import { initializeImageStorage } from './utils/image-storage';
|
import { initializeImageStorage } from './utils/image-storage';
|
||||||
import { logger } from './services/logger';
|
import { logger } from './services/logger';
|
||||||
import { cleanupOrphanedJobs } from './services/proxyTestQueue';
|
import { cleanupOrphanedJobs } from './services/proxyTestQueue';
|
||||||
|
import { runAutoMigrations } from './db/auto-migrate';
|
||||||
|
import { getPool } from './db/pool';
|
||||||
import healthRoutes from './routes/health';
|
import healthRoutes from './routes/health';
|
||||||
|
import imageProxyRoutes from './routes/image-proxy';
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
const app = express();
|
const app = express();
|
||||||
const PORT = process.env.PORT || 3010;
|
const PORT = process.env.PORT || 3010;
|
||||||
|
|
||||||
app.use(cors());
|
// CORS configuration - allow requests from any origin with API key auth
|
||||||
|
// WordPress plugins need to make requests from their own domains
|
||||||
|
app.use(cors({
|
||||||
|
origin: true, // Reflect the request origin
|
||||||
|
credentials: true,
|
||||||
|
methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'],
|
||||||
|
allowedHeaders: ['Content-Type', 'Authorization', 'x-api-key', 'X-API-Key'],
|
||||||
|
exposedHeaders: ['Content-Length', 'X-Request-Id'],
|
||||||
|
}));
|
||||||
app.use(express.json());
|
app.use(express.json());
|
||||||
|
|
||||||
// Serve static images when MinIO is not configured
|
// Serve static images when MinIO is not configured
|
||||||
const LOCAL_IMAGES_PATH = process.env.LOCAL_IMAGES_PATH || '/app/public/images';
|
// Uses ./public/images relative to working directory (works for both Docker and local dev)
|
||||||
|
const LOCAL_IMAGES_PATH = process.env.LOCAL_IMAGES_PATH || './public/images';
|
||||||
app.use('/images', express.static(LOCAL_IMAGES_PATH));
|
app.use('/images', express.static(LOCAL_IMAGES_PATH));
|
||||||
|
|
||||||
|
// Image proxy with on-demand resizing
|
||||||
|
// Usage: /img/products/az/store/brand/product/image.webp?w=200&h=200
|
||||||
|
app.use('/img', imageProxyRoutes);
|
||||||
|
|
||||||
// Serve static downloads (plugin files, etc.)
|
// Serve static downloads (plugin files, etc.)
|
||||||
const LOCAL_DOWNLOADS_PATH = process.env.LOCAL_DOWNLOADS_PATH || '/app/public/downloads';
|
// Uses ./public/downloads relative to working directory (works for both Docker and local dev)
|
||||||
|
const LOCAL_DOWNLOADS_PATH = process.env.LOCAL_DOWNLOADS_PATH || './public/downloads';
|
||||||
|
|
||||||
|
// Dynamic "latest" redirect for WordPress plugin - finds highest version automatically
|
||||||
|
app.get('/downloads/cannaiq-menus-latest.zip', (req, res) => {
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
try {
|
||||||
|
const files = fs.readdirSync(LOCAL_DOWNLOADS_PATH);
|
||||||
|
const pluginFiles = files
|
||||||
|
.filter((f: string) => f.match(/^cannaiq-menus-\d+\.\d+\.\d+\.zip$/))
|
||||||
|
.sort((a: string, b: string) => {
|
||||||
|
const vA = a.match(/(\d+)\.(\d+)\.(\d+)/);
|
||||||
|
const vB = b.match(/(\d+)\.(\d+)\.(\d+)/);
|
||||||
|
if (!vA || !vB) return 0;
|
||||||
|
for (let i = 1; i <= 3; i++) {
|
||||||
|
const diff = parseInt(vB[i]) - parseInt(vA[i]);
|
||||||
|
if (diff !== 0) return diff;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (pluginFiles.length > 0) {
|
||||||
|
const latestFile = pluginFiles[0];
|
||||||
|
res.redirect(302, `/downloads/${latestFile}`);
|
||||||
|
} else {
|
||||||
|
res.status(404).json({ error: 'No plugin versions found' });
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
res.status(500).json({ error: 'Failed to find latest plugin' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
app.use('/downloads', express.static(LOCAL_DOWNLOADS_PATH));
|
app.use('/downloads', express.static(LOCAL_DOWNLOADS_PATH));
|
||||||
|
|
||||||
// Simple health check for load balancers/K8s probes
|
// Simple health check for load balancers/K8s probes
|
||||||
@@ -61,11 +109,16 @@ import apiPermissionsRoutes from './routes/api-permissions';
|
|||||||
import parallelScrapeRoutes from './routes/parallel-scrape';
|
import parallelScrapeRoutes from './routes/parallel-scrape';
|
||||||
import crawlerSandboxRoutes from './routes/crawler-sandbox';
|
import crawlerSandboxRoutes from './routes/crawler-sandbox';
|
||||||
import versionRoutes from './routes/version';
|
import versionRoutes from './routes/version';
|
||||||
|
import deployStatusRoutes from './routes/deploy-status';
|
||||||
import publicApiRoutes from './routes/public-api';
|
import publicApiRoutes from './routes/public-api';
|
||||||
import usersRoutes from './routes/users';
|
import usersRoutes from './routes/users';
|
||||||
import staleProcessesRoutes from './routes/stale-processes';
|
import staleProcessesRoutes from './routes/stale-processes';
|
||||||
import orchestratorAdminRoutes from './routes/orchestrator-admin';
|
import orchestratorAdminRoutes from './routes/orchestrator-admin';
|
||||||
|
import adminDebugRoutes from './routes/admin-debug';
|
||||||
|
import intelligenceRoutes from './routes/intelligence';
|
||||||
|
import marketsRoutes from './routes/markets';
|
||||||
import workersRoutes from './routes/workers';
|
import workersRoutes from './routes/workers';
|
||||||
|
import jobQueueRoutes from './routes/job-queue';
|
||||||
import { createMultiStateRoutes } from './multi-state';
|
import { createMultiStateRoutes } from './multi-state';
|
||||||
import { trackApiUsage, checkRateLimit } from './middleware/apiTokenTracker';
|
import { trackApiUsage, checkRateLimit } from './middleware/apiTokenTracker';
|
||||||
import { validateWordPressPermissions } from './middleware/wordpressPermissions';
|
import { validateWordPressPermissions } from './middleware/wordpressPermissions';
|
||||||
@@ -75,7 +128,7 @@ import { createPortalRoutes } from './portals';
|
|||||||
import { createStatesRouter } from './routes/states';
|
import { createStatesRouter } from './routes/states';
|
||||||
import { createAnalyticsV2Router } from './routes/analytics-v2';
|
import { createAnalyticsV2Router } from './routes/analytics-v2';
|
||||||
import { createDiscoveryRoutes } from './discovery';
|
import { createDiscoveryRoutes } from './discovery';
|
||||||
import { getPool } from './db/pool';
|
import pipelineRoutes from './routes/pipeline';
|
||||||
|
|
||||||
// Consumer API routes (findadispo.com, findagram.co)
|
// Consumer API routes (findadispo.com, findagram.co)
|
||||||
import consumerAuthRoutes from './routes/consumer-auth';
|
import consumerAuthRoutes from './routes/consumer-auth';
|
||||||
@@ -86,6 +139,9 @@ import consumerDealsRoutes from './routes/consumer-deals';
|
|||||||
import eventsRoutes from './routes/events';
|
import eventsRoutes from './routes/events';
|
||||||
import clickAnalyticsRoutes from './routes/click-analytics';
|
import clickAnalyticsRoutes from './routes/click-analytics';
|
||||||
import seoRoutes from './routes/seo';
|
import seoRoutes from './routes/seo';
|
||||||
|
import priceAnalyticsRoutes from './routes/price-analytics';
|
||||||
|
import tasksRoutes from './routes/tasks';
|
||||||
|
import workerRegistryRoutes from './routes/worker-registry';
|
||||||
|
|
||||||
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
|
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
|
||||||
// These domains can access the API without authentication
|
// These domains can access the API without authentication
|
||||||
@@ -128,11 +184,25 @@ app.use('/api/api-permissions', apiPermissionsRoutes);
|
|||||||
app.use('/api/parallel-scrape', parallelScrapeRoutes);
|
app.use('/api/parallel-scrape', parallelScrapeRoutes);
|
||||||
app.use('/api/crawler-sandbox', crawlerSandboxRoutes);
|
app.use('/api/crawler-sandbox', crawlerSandboxRoutes);
|
||||||
app.use('/api/version', versionRoutes);
|
app.use('/api/version', versionRoutes);
|
||||||
|
app.use('/api/admin/deploy-status', deployStatusRoutes);
|
||||||
|
console.log('[DeployStatus] Routes registered at /api/admin/deploy-status');
|
||||||
app.use('/api/users', usersRoutes);
|
app.use('/api/users', usersRoutes);
|
||||||
app.use('/api/stale-processes', staleProcessesRoutes);
|
app.use('/api/stale-processes', staleProcessesRoutes);
|
||||||
// Admin routes - orchestrator actions
|
// Admin routes - orchestrator actions
|
||||||
app.use('/api/admin/orchestrator', orchestratorAdminRoutes);
|
app.use('/api/admin/orchestrator', orchestratorAdminRoutes);
|
||||||
|
|
||||||
|
// Admin routes - debug endpoints (snapshot inspection)
|
||||||
|
app.use('/api/admin/debug', adminDebugRoutes);
|
||||||
|
console.log('[AdminDebug] Routes registered at /api/admin/debug');
|
||||||
|
|
||||||
|
// Admin routes - intelligence (brands, pricing analytics)
|
||||||
|
app.use('/api/admin/intelligence', intelligenceRoutes);
|
||||||
|
console.log('[Intelligence] Routes registered at /api/admin/intelligence');
|
||||||
|
|
||||||
|
// Markets routes - store and product data for admin dashboard
|
||||||
|
app.use('/api/markets', marketsRoutes);
|
||||||
|
console.log('[Markets] Routes registered at /api/markets');
|
||||||
|
|
||||||
// SEO orchestrator routes
|
// SEO orchestrator routes
|
||||||
app.use('/api/seo', seoRoutes);
|
app.use('/api/seo', seoRoutes);
|
||||||
|
|
||||||
@@ -140,7 +210,17 @@ app.use('/api/seo', seoRoutes);
|
|||||||
app.use('/api/workers', workersRoutes);
|
app.use('/api/workers', workersRoutes);
|
||||||
// Monitor routes - aliased from workers for convenience
|
// Monitor routes - aliased from workers for convenience
|
||||||
app.use('/api/monitor', workersRoutes);
|
app.use('/api/monitor', workersRoutes);
|
||||||
console.log('[Workers] Routes registered at /api/workers and /api/monitor');
|
// Job queue management
|
||||||
|
app.use('/api/job-queue', jobQueueRoutes);
|
||||||
|
console.log('[Workers] Routes registered at /api/workers, /api/monitor, and /api/job-queue');
|
||||||
|
|
||||||
|
// Task queue management - worker tasks with capacity planning
|
||||||
|
app.use('/api/tasks', tasksRoutes);
|
||||||
|
console.log('[Tasks] Routes registered at /api/tasks');
|
||||||
|
|
||||||
|
// Worker registry - dynamic worker registration, heartbeats, and name management
|
||||||
|
app.use('/api/worker-registry', workerRegistryRoutes);
|
||||||
|
console.log('[WorkerRegistry] Routes registered at /api/worker-registry');
|
||||||
|
|
||||||
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
|
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
|
||||||
try {
|
try {
|
||||||
@@ -174,6 +254,10 @@ console.log('[Events] Routes registered at /api/events');
|
|||||||
app.use('/api/analytics/clicks', clickAnalyticsRoutes);
|
app.use('/api/analytics/clicks', clickAnalyticsRoutes);
|
||||||
console.log('[ClickAnalytics] Routes registered at /api/analytics/clicks');
|
console.log('[ClickAnalytics] Routes registered at /api/analytics/clicks');
|
||||||
|
|
||||||
|
// Price Analytics API - price history, specials, and market comparisons
|
||||||
|
app.use('/api/analytics/price', priceAnalyticsRoutes);
|
||||||
|
console.log('[PriceAnalytics] Routes registered at /api/analytics/price');
|
||||||
|
|
||||||
// States API routes - cannabis legalization status and targeting
|
// States API routes - cannabis legalization status and targeting
|
||||||
try {
|
try {
|
||||||
const statesRouter = createStatesRouter(getPool());
|
const statesRouter = createStatesRouter(getPool());
|
||||||
@@ -213,6 +297,10 @@ try {
|
|||||||
console.warn('[Discovery] Failed to register routes:', error);
|
console.warn('[Discovery] Failed to register routes:', error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Pipeline Stage Transitions - Explicit API for moving stores through 6-stage pipeline
|
||||||
|
app.use('/api/pipeline', pipelineRoutes);
|
||||||
|
console.log('[Pipeline] Routes registered at /api/pipeline');
|
||||||
|
|
||||||
// Platform-specific Discovery Routes
|
// Platform-specific Discovery Routes
|
||||||
// TODO: Rebuild with /platforms/dutchie/ module
|
// TODO: Rebuild with /platforms/dutchie/ module
|
||||||
|
|
||||||
@@ -220,6 +308,17 @@ async function startServer() {
|
|||||||
try {
|
try {
|
||||||
logger.info('system', 'Starting server...');
|
logger.info('system', 'Starting server...');
|
||||||
|
|
||||||
|
// Run auto-migrations before anything else
|
||||||
|
const pool = getPool();
|
||||||
|
const migrationsApplied = await runAutoMigrations(pool);
|
||||||
|
if (migrationsApplied > 0) {
|
||||||
|
logger.info('system', `Applied ${migrationsApplied} database migrations`);
|
||||||
|
} else if (migrationsApplied === 0) {
|
||||||
|
logger.info('system', 'Database schema up to date');
|
||||||
|
} else {
|
||||||
|
logger.warn('system', 'Some migrations failed - check logs');
|
||||||
|
}
|
||||||
|
|
||||||
await initializeMinio();
|
await initializeMinio();
|
||||||
await initializeImageStorage();
|
await initializeImageStorage();
|
||||||
logger.info('system', isMinioEnabled() ? 'MinIO storage initialized' : 'Local filesystem storage initialized');
|
logger.info('system', isMinioEnabled() ? 'MinIO storage initialized' : 'Local filesystem storage initialized');
|
||||||
|
|||||||
@@ -319,12 +319,13 @@ export function createMultiStateRoutes(pool: Pool): Router {
|
|||||||
// =========================================================================
|
// =========================================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/analytics/compare/brand/:brandId
|
* GET /api/analytics/compare/brand/:brandIdOrName
|
||||||
* Compare a brand across multiple states
|
* Compare a brand across multiple states
|
||||||
|
* Accepts either numeric brand ID or brand name (URL encoded)
|
||||||
*/
|
*/
|
||||||
router.get('/analytics/compare/brand/:brandId', async (req: Request, res: Response) => {
|
router.get('/analytics/compare/brand/:brandIdOrName', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const brandId = parseInt(req.params.brandId);
|
const { brandIdOrName } = req.params;
|
||||||
const statesParam = req.query.states as string;
|
const statesParam = req.query.states as string;
|
||||||
|
|
||||||
// Parse states - either comma-separated or get all active states
|
// Parse states - either comma-separated or get all active states
|
||||||
@@ -336,7 +337,22 @@ export function createMultiStateRoutes(pool: Pool): Router {
|
|||||||
states = activeStates.map(s => s.code);
|
states = activeStates.map(s => s.code);
|
||||||
}
|
}
|
||||||
|
|
||||||
const comparison = await stateService.compareBrandAcrossStates(brandId, states);
|
// Check if it's a numeric ID or a brand name
|
||||||
|
const brandId = parseInt(brandIdOrName);
|
||||||
|
let comparison;
|
||||||
|
|
||||||
|
if (!isNaN(brandId)) {
|
||||||
|
// Try by ID first
|
||||||
|
try {
|
||||||
|
comparison = await stateService.compareBrandAcrossStates(brandId, states);
|
||||||
|
} catch (idErr: any) {
|
||||||
|
// If brand ID not found, try as name
|
||||||
|
comparison = await stateService.compareBrandByNameAcrossStates(brandIdOrName, states);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Use brand name directly
|
||||||
|
comparison = await stateService.compareBrandByNameAcrossStates(decodeURIComponent(brandIdOrName), states);
|
||||||
|
}
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
success: true,
|
success: true,
|
||||||
|
|||||||
@@ -67,18 +67,19 @@ export class StateQueryService {
|
|||||||
*/
|
*/
|
||||||
async getStateSummary(state: string): Promise<StateSummary | null> {
|
async getStateSummary(state: string): Promise<StateSummary | null> {
|
||||||
// Get base metrics from materialized view
|
// Get base metrics from materialized view
|
||||||
|
// Migration 051 uses dispensary_count column (not store_count)
|
||||||
const metricsResult = await this.pool.query(`
|
const metricsResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
state,
|
state,
|
||||||
state_name AS "stateName",
|
state_name AS "stateName",
|
||||||
dispensary_count AS "storeCount",
|
COALESCE(dispensary_count, 0) AS "storeCount",
|
||||||
dispensary_count AS "dutchieStores",
|
COALESCE(dispensary_count, 0) AS "dutchieStores",
|
||||||
dispensary_count AS "activeStores",
|
COALESCE(dispensary_count, 0) AS "activeStores",
|
||||||
total_products AS "totalProducts",
|
COALESCE(total_products, 0) AS "totalProducts",
|
||||||
in_stock_products AS "inStockProducts",
|
COALESCE(in_stock_products, 0) AS "inStockProducts",
|
||||||
out_of_stock_products AS "outOfStockProducts",
|
COALESCE(out_of_stock_products, 0) AS "outOfStockProducts",
|
||||||
unique_brands AS "uniqueBrands",
|
COALESCE(unique_brands, 0) AS "uniqueBrands",
|
||||||
unique_categories AS "uniqueCategories",
|
COALESCE(unique_categories, 0) AS "uniqueCategories",
|
||||||
avg_price_rec AS "avgPriceRec",
|
avg_price_rec AS "avgPriceRec",
|
||||||
min_price_rec AS "minPriceRec",
|
min_price_rec AS "minPriceRec",
|
||||||
max_price_rec AS "maxPriceRec",
|
max_price_rec AS "maxPriceRec",
|
||||||
@@ -110,10 +111,25 @@ export class StateQueryService {
|
|||||||
// Get top categories
|
// Get top categories
|
||||||
const topCategories = await this.getCategoriesByState(state, { limit: 5 });
|
const topCategories = await this.getCategoriesByState(state, { limit: 5 });
|
||||||
|
|
||||||
|
// Parse numeric values from strings (PostgreSQL returns bigint as string)
|
||||||
return {
|
return {
|
||||||
...metrics,
|
state: metrics.state,
|
||||||
recentCrawls: parseInt(crawlResult.rows[0]?.recent_crawls || '0'),
|
stateName: metrics.stateName,
|
||||||
failedCrawls: parseInt(crawlResult.rows[0]?.failed_crawls || '0'),
|
storeCount: parseInt(metrics.storeCount || '0', 10),
|
||||||
|
dutchieStores: parseInt(metrics.dutchieStores || '0', 10),
|
||||||
|
activeStores: parseInt(metrics.activeStores || '0', 10),
|
||||||
|
totalProducts: parseInt(metrics.totalProducts || '0', 10),
|
||||||
|
inStockProducts: parseInt(metrics.inStockProducts || '0', 10),
|
||||||
|
outOfStockProducts: parseInt(metrics.outOfStockProducts || '0', 10),
|
||||||
|
onSpecialProducts: parseInt(metrics.onSpecialProducts || '0', 10),
|
||||||
|
uniqueBrands: parseInt(metrics.uniqueBrands || '0', 10),
|
||||||
|
uniqueCategories: parseInt(metrics.uniqueCategories || '0', 10),
|
||||||
|
avgPriceRec: metrics.avgPriceRec ? parseFloat(metrics.avgPriceRec) : null,
|
||||||
|
minPriceRec: metrics.minPriceRec ? parseFloat(metrics.minPriceRec) : null,
|
||||||
|
maxPriceRec: metrics.maxPriceRec ? parseFloat(metrics.maxPriceRec) : null,
|
||||||
|
refreshedAt: metrics.refreshedAt,
|
||||||
|
recentCrawls: parseInt(crawlResult.rows[0]?.recent_crawls || '0', 10),
|
||||||
|
failedCrawls: parseInt(crawlResult.rows[0]?.failed_crawls || '0', 10),
|
||||||
lastCrawlAt: crawlResult.rows[0]?.last_crawl_at || null,
|
lastCrawlAt: crawlResult.rows[0]?.last_crawl_at || null,
|
||||||
topBrands,
|
topBrands,
|
||||||
topCategories,
|
topCategories,
|
||||||
@@ -121,29 +137,49 @@ export class StateQueryService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get metrics for all states
|
* Get metrics for all states (including states with no data)
|
||||||
*/
|
*/
|
||||||
async getAllStateMetrics(): Promise<StateMetrics[]> {
|
async getAllStateMetrics(): Promise<StateMetrics[]> {
|
||||||
|
// Migration 051 uses dispensary_count column (not store_count)
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
state,
|
s.code AS state,
|
||||||
state_name AS "stateName",
|
s.name AS "stateName",
|
||||||
dispensary_count AS "storeCount",
|
COALESCE(m.dispensary_count, 0) AS "storeCount",
|
||||||
dispensary_count AS "dutchieStores",
|
COALESCE(m.dispensary_count, 0) AS "dutchieStores",
|
||||||
dispensary_count AS "activeStores",
|
COALESCE(m.dispensary_count, 0) AS "activeStores",
|
||||||
total_products AS "totalProducts",
|
COALESCE(m.total_products, 0) AS "totalProducts",
|
||||||
in_stock_products AS "inStockProducts",
|
COALESCE(m.in_stock_products, 0) AS "inStockProducts",
|
||||||
out_of_stock_products AS "outOfStockProducts",
|
COALESCE(m.out_of_stock_products, 0) AS "outOfStockProducts",
|
||||||
unique_brands AS "uniqueBrands",
|
COALESCE(m.unique_brands, 0) AS "uniqueBrands",
|
||||||
unique_categories AS "uniqueCategories",
|
COALESCE(m.unique_categories, 0) AS "uniqueCategories",
|
||||||
avg_price_rec AS "avgPriceRec",
|
m.avg_price_rec AS "avgPriceRec",
|
||||||
min_price_rec AS "minPriceRec",
|
m.min_price_rec AS "minPriceRec",
|
||||||
max_price_rec AS "maxPriceRec",
|
m.max_price_rec AS "maxPriceRec",
|
||||||
refreshed_at AS "refreshedAt"
|
m.refreshed_at AS "refreshedAt",
|
||||||
FROM mv_state_metrics
|
0 AS "onSpecialProducts"
|
||||||
ORDER BY dispensary_count DESC
|
FROM states s
|
||||||
|
LEFT JOIN mv_state_metrics m ON s.code = m.state
|
||||||
|
ORDER BY COALESCE(m.dispensary_count, 0) DESC, s.name ASC
|
||||||
`);
|
`);
|
||||||
return result.rows;
|
// Parse numeric values from strings (PostgreSQL returns bigint as string)
|
||||||
|
return result.rows.map((row: any) => ({
|
||||||
|
state: row.state,
|
||||||
|
stateName: row.stateName,
|
||||||
|
storeCount: parseInt(row.storeCount || '0', 10),
|
||||||
|
dutchieStores: parseInt(row.dutchieStores || '0', 10),
|
||||||
|
activeStores: parseInt(row.activeStores || '0', 10),
|
||||||
|
totalProducts: parseInt(row.totalProducts || '0', 10),
|
||||||
|
inStockProducts: parseInt(row.inStockProducts || '0', 10),
|
||||||
|
outOfStockProducts: parseInt(row.outOfStockProducts || '0', 10),
|
||||||
|
uniqueBrands: parseInt(row.uniqueBrands || '0', 10),
|
||||||
|
uniqueCategories: parseInt(row.uniqueCategories || '0', 10),
|
||||||
|
avgPriceRec: row.avgPriceRec ? parseFloat(row.avgPriceRec) : null,
|
||||||
|
minPriceRec: row.minPriceRec ? parseFloat(row.minPriceRec) : null,
|
||||||
|
maxPriceRec: row.maxPriceRec ? parseFloat(row.maxPriceRec) : null,
|
||||||
|
refreshedAt: row.refreshedAt,
|
||||||
|
onSpecialProducts: parseInt(row.onSpecialProducts || '0', 10),
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
// =========================================================================
|
// =========================================================================
|
||||||
@@ -152,29 +188,37 @@ export class StateQueryService {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Get brands present in a specific state
|
* Get brands present in a specific state
|
||||||
|
* Uses inline query instead of v_brand_state_presence view for compatibility
|
||||||
*/
|
*/
|
||||||
async getBrandsByState(state: string, options: StateQueryOptions = {}): Promise<BrandInState[]> {
|
async getBrandsByState(state: string, options: StateQueryOptions = {}): Promise<BrandInState[]> {
|
||||||
const { limit = 50, offset = 0, sortBy = 'productCount', sortDir = 'desc' } = options;
|
const { limit = 50, offset = 0, sortBy = 'productCount', sortDir = 'desc' } = options;
|
||||||
|
|
||||||
|
// Sort columns must reference the aliased output names with quotes
|
||||||
const sortColumn = {
|
const sortColumn = {
|
||||||
productCount: 'product_count',
|
productCount: '"productCount"',
|
||||||
storeCount: 'store_count',
|
storeCount: '"storeCount"',
|
||||||
avgPrice: 'avg_price',
|
avgPrice: '"avgPrice"',
|
||||||
name: 'brand_name',
|
name: '"brandName"',
|
||||||
}[sortBy] || 'product_count';
|
}[sortBy] || '"productCount"';
|
||||||
|
|
||||||
|
// Inline query that aggregates brand data from store_products and dispensaries
|
||||||
|
// Works whether or not v_brand_state_presence view exists
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
brand_id AS "brandId",
|
COALESCE(sp.brand_id, 0) AS "brandId",
|
||||||
brand_name AS "brandName",
|
sp.brand_name_raw AS "brandName",
|
||||||
brand_slug AS "brandSlug",
|
LOWER(REPLACE(sp.brand_name_raw, ' ', '-')) AS "brandSlug",
|
||||||
store_count AS "storeCount",
|
COUNT(DISTINCT d.id) AS "storeCount",
|
||||||
product_count AS "productCount",
|
COUNT(DISTINCT sp.id) AS "productCount",
|
||||||
avg_price AS "avgPrice",
|
ROUND(AVG(sp.price_rec)::numeric, 2) AS "avgPrice",
|
||||||
first_seen_in_state AS "firstSeenInState",
|
MIN(sp.first_seen_at) AS "firstSeenInState",
|
||||||
last_seen_in_state AS "lastSeenInState"
|
MAX(sp.last_seen_at) AS "lastSeenInState"
|
||||||
FROM v_brand_state_presence
|
FROM store_products sp
|
||||||
WHERE state = $1
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE d.state = $1
|
||||||
|
AND sp.brand_name_raw IS NOT NULL
|
||||||
|
AND sp.brand_name_raw != ''
|
||||||
|
GROUP BY sp.brand_id, sp.brand_name_raw
|
||||||
ORDER BY ${sortColumn} ${sortDir === 'asc' ? 'ASC' : 'DESC'}
|
ORDER BY ${sortColumn} ${sortDir === 'asc' ? 'ASC' : 'DESC'}
|
||||||
LIMIT $2 OFFSET $3
|
LIMIT $2 OFFSET $3
|
||||||
`, [state, limit, offset]);
|
`, [state, limit, offset]);
|
||||||
@@ -184,18 +228,48 @@ export class StateQueryService {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Get brand penetration across all states
|
* Get brand penetration across all states
|
||||||
|
* Uses inline query instead of fn_brand_state_penetration function for compatibility
|
||||||
*/
|
*/
|
||||||
async getBrandStatePenetration(brandId: number): Promise<BrandStatePenetration[]> {
|
async getBrandStatePenetration(brandId: number): Promise<BrandStatePenetration[]> {
|
||||||
|
// Inline query that calculates brand penetration by state
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
|
WITH state_totals AS (
|
||||||
|
SELECT
|
||||||
|
d.state,
|
||||||
|
s.name AS state_name,
|
||||||
|
COUNT(DISTINCT d.id) AS total_stores
|
||||||
|
FROM dispensaries d
|
||||||
|
JOIN states s ON d.state = s.code
|
||||||
|
WHERE d.state IS NOT NULL
|
||||||
|
GROUP BY d.state, s.name
|
||||||
|
),
|
||||||
|
brand_presence AS (
|
||||||
|
SELECT
|
||||||
|
d.state,
|
||||||
|
COUNT(DISTINCT d.id) AS stores_with_brand,
|
||||||
|
COUNT(DISTINCT sp.id) AS product_count,
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE (sp.brand_id = $1 OR sp.brand_name_raw = (SELECT name FROM brands WHERE id = $1))
|
||||||
|
AND d.state IS NOT NULL
|
||||||
|
GROUP BY d.state
|
||||||
|
)
|
||||||
SELECT
|
SELECT
|
||||||
state,
|
st.state,
|
||||||
state_name AS "stateName",
|
st.state_name AS "stateName",
|
||||||
total_stores AS "totalStores",
|
st.total_stores AS "totalStores",
|
||||||
stores_with_brand AS "storesWithBrand",
|
COALESCE(bp.stores_with_brand, 0) AS "storesWithBrand",
|
||||||
penetration_pct AS "penetrationPct",
|
CASE
|
||||||
product_count AS "productCount",
|
WHEN st.total_stores > 0
|
||||||
avg_price AS "avgPrice"
|
THEN ROUND((COALESCE(bp.stores_with_brand, 0)::numeric / st.total_stores) * 100, 2)
|
||||||
FROM fn_brand_state_penetration($1)
|
ELSE 0
|
||||||
|
END AS "penetrationPct",
|
||||||
|
COALESCE(bp.product_count, 0) AS "productCount",
|
||||||
|
bp.avg_price AS "avgPrice"
|
||||||
|
FROM state_totals st
|
||||||
|
LEFT JOIN brand_presence bp ON st.state = bp.state
|
||||||
|
ORDER BY COALESCE(bp.stores_with_brand, 0) DESC
|
||||||
`, [brandId]);
|
`, [brandId]);
|
||||||
|
|
||||||
return result.rows;
|
return result.rows;
|
||||||
@@ -257,33 +331,128 @@ export class StateQueryService {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compare a brand by name across multiple states
|
||||||
|
* Used when we only have a brand name (not an ID from the brands table)
|
||||||
|
*/
|
||||||
|
async compareBrandByNameAcrossStates(
|
||||||
|
brandName: string,
|
||||||
|
states: string[]
|
||||||
|
): Promise<BrandCrossStateComparison> {
|
||||||
|
// Get penetration data by brand name
|
||||||
|
const penetrationResult = await this.pool.query(`
|
||||||
|
WITH state_totals AS (
|
||||||
|
SELECT
|
||||||
|
d.state,
|
||||||
|
s.name AS state_name,
|
||||||
|
COUNT(DISTINCT d.id) AS total_stores
|
||||||
|
FROM dispensaries d
|
||||||
|
JOIN states s ON d.state = s.code
|
||||||
|
WHERE d.state IS NOT NULL
|
||||||
|
GROUP BY d.state, s.name
|
||||||
|
),
|
||||||
|
brand_presence AS (
|
||||||
|
SELECT
|
||||||
|
d.state,
|
||||||
|
COUNT(DISTINCT d.id) AS stores_with_brand,
|
||||||
|
COUNT(DISTINCT sp.id) AS product_count,
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE sp.brand_name_raw ILIKE $1
|
||||||
|
AND d.state IS NOT NULL
|
||||||
|
GROUP BY d.state
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
st.state,
|
||||||
|
st.state_name AS "stateName",
|
||||||
|
st.total_stores AS "totalStores",
|
||||||
|
COALESCE(bp.stores_with_brand, 0) AS "storesWithBrand",
|
||||||
|
CASE
|
||||||
|
WHEN st.total_stores > 0
|
||||||
|
THEN ROUND((COALESCE(bp.stores_with_brand, 0)::numeric / st.total_stores) * 100, 2)
|
||||||
|
ELSE 0
|
||||||
|
END AS "penetrationPct",
|
||||||
|
COALESCE(bp.product_count, 0) AS "productCount",
|
||||||
|
bp.avg_price AS "avgPrice"
|
||||||
|
FROM state_totals st
|
||||||
|
LEFT JOIN brand_presence bp ON st.state = bp.state
|
||||||
|
ORDER BY COALESCE(bp.stores_with_brand, 0) DESC
|
||||||
|
`, [brandName]);
|
||||||
|
|
||||||
|
// Filter by requested states
|
||||||
|
const filteredStates = penetrationResult.rows.filter((p: any) =>
|
||||||
|
states.includes(p.state)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Calculate national metrics
|
||||||
|
const nationalResult = await this.pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(DISTINCT d.id) AS total_stores,
|
||||||
|
COUNT(DISTINCT CASE WHEN sp.brand_name_raw ILIKE $1 THEN d.id END) AS stores_with_brand,
|
||||||
|
AVG(sp.price_rec) FILTER (WHERE sp.brand_name_raw ILIKE $1) AS avg_price
|
||||||
|
FROM dispensaries d
|
||||||
|
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
|
||||||
|
WHERE d.state IS NOT NULL
|
||||||
|
`, [brandName]);
|
||||||
|
|
||||||
|
const nationalData = nationalResult.rows[0];
|
||||||
|
const nationalPenetration = nationalData.total_stores > 0
|
||||||
|
? (nationalData.stores_with_brand / nationalData.total_stores) * 100
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
// Find best/worst states
|
||||||
|
const sortedByPenetration = [...filteredStates].sort(
|
||||||
|
(a: any, b: any) => parseFloat(b.penetrationPct) - parseFloat(a.penetrationPct)
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
brandId: 0, // No ID when using brand name
|
||||||
|
brandName,
|
||||||
|
states: filteredStates,
|
||||||
|
nationalPenetration: Math.round(nationalPenetration * 100) / 100,
|
||||||
|
nationalAvgPrice: nationalData.avg_price
|
||||||
|
? Math.round(parseFloat(nationalData.avg_price) * 100) / 100
|
||||||
|
: null,
|
||||||
|
bestPerformingState: sortedByPenetration[0]?.state || null,
|
||||||
|
worstPerformingState: sortedByPenetration[sortedByPenetration.length - 1]?.state || null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// =========================================================================
|
// =========================================================================
|
||||||
// Category Queries
|
// Category Queries
|
||||||
// =========================================================================
|
// =========================================================================
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get categories in a specific state
|
* Get categories in a specific state
|
||||||
|
* Uses inline query instead of v_category_state_distribution view for compatibility
|
||||||
*/
|
*/
|
||||||
async getCategoriesByState(state: string, options: StateQueryOptions = {}): Promise<CategoryInState[]> {
|
async getCategoriesByState(state: string, options: StateQueryOptions = {}): Promise<CategoryInState[]> {
|
||||||
const { limit = 50, offset = 0, sortBy = 'productCount', sortDir = 'desc' } = options;
|
const { limit = 50, offset = 0, sortBy = 'productCount', sortDir = 'desc' } = options;
|
||||||
|
|
||||||
|
// Sort columns must reference the aliased output names with quotes
|
||||||
const sortColumn = {
|
const sortColumn = {
|
||||||
productCount: 'product_count',
|
productCount: '"productCount"',
|
||||||
storeCount: 'store_count',
|
storeCount: '"storeCount"',
|
||||||
avgPrice: 'avg_price',
|
avgPrice: '"avgPrice"',
|
||||||
category: 'category',
|
category: 'category',
|
||||||
}[sortBy] || 'product_count';
|
}[sortBy] || '"productCount"';
|
||||||
|
|
||||||
|
// Inline query that aggregates category data from store_products and dispensaries
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
category,
|
sp.category_raw AS category,
|
||||||
product_count AS "productCount",
|
COUNT(DISTINCT sp.id) AS "productCount",
|
||||||
store_count AS "storeCount",
|
COUNT(DISTINCT d.id) AS "storeCount",
|
||||||
avg_price AS "avgPrice",
|
ROUND(AVG(sp.price_rec)::numeric, 2) AS "avgPrice",
|
||||||
in_stock_count AS "inStockCount",
|
COUNT(DISTINCT CASE WHEN sp.is_in_stock THEN sp.id END) AS "inStockCount",
|
||||||
on_special_count AS "onSpecialCount"
|
0 AS "onSpecialCount"
|
||||||
FROM v_category_state_distribution
|
FROM store_products sp
|
||||||
WHERE state = $1
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE d.state = $1
|
||||||
|
AND sp.category_raw IS NOT NULL
|
||||||
|
AND sp.category_raw != ''
|
||||||
|
GROUP BY sp.category_raw
|
||||||
ORDER BY ${sortColumn} ${sortDir === 'asc' ? 'ASC' : 'DESC'}
|
ORDER BY ${sortColumn} ${sortDir === 'asc' ? 'ASC' : 'DESC'}
|
||||||
LIMIT $2 OFFSET $3
|
LIMIT $2 OFFSET $3
|
||||||
`, [state, limit, offset]);
|
`, [state, limit, offset]);
|
||||||
@@ -293,25 +462,38 @@ export class StateQueryService {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Compare a category across multiple states
|
* Compare a category across multiple states
|
||||||
|
* Uses inline query instead of v_category_state_distribution view for compatibility
|
||||||
*/
|
*/
|
||||||
async compareCategoryAcrossStates(
|
async compareCategoryAcrossStates(
|
||||||
category: string,
|
category: string,
|
||||||
states: string[]
|
states: string[]
|
||||||
): Promise<CategoryCrossStateComparison> {
|
): Promise<CategoryCrossStateComparison> {
|
||||||
|
// Inline query for category distribution by state
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
|
WITH category_stats AS (
|
||||||
|
SELECT
|
||||||
|
d.state,
|
||||||
|
sp.category_raw AS category,
|
||||||
|
COUNT(DISTINCT sp.id) AS product_count,
|
||||||
|
COUNT(DISTINCT d.id) AS store_count,
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE sp.category_raw = $1
|
||||||
|
AND d.state = ANY($2)
|
||||||
|
GROUP BY d.state, sp.category_raw
|
||||||
|
)
|
||||||
SELECT
|
SELECT
|
||||||
v.state,
|
cs.state,
|
||||||
s.name AS "stateName",
|
s.name AS "stateName",
|
||||||
v.category,
|
cs.category,
|
||||||
v.product_count AS "productCount",
|
cs.product_count AS "productCount",
|
||||||
v.store_count AS "storeCount",
|
cs.store_count AS "storeCount",
|
||||||
v.avg_price AS "avgPrice",
|
cs.avg_price AS "avgPrice",
|
||||||
ROUND(v.product_count::NUMERIC / SUM(v.product_count) OVER () * 100, 2) AS "marketShare"
|
ROUND(cs.product_count::NUMERIC / NULLIF(SUM(cs.product_count) OVER (), 0) * 100, 2) AS "marketShare"
|
||||||
FROM v_category_state_distribution v
|
FROM category_stats cs
|
||||||
JOIN states s ON v.state = s.code
|
JOIN states s ON cs.state = s.code
|
||||||
WHERE v.category = $1
|
ORDER BY cs.product_count DESC
|
||||||
AND v.state = ANY($2)
|
|
||||||
ORDER BY v.product_count DESC
|
|
||||||
`, [category, states]);
|
`, [category, states]);
|
||||||
|
|
||||||
// Get national totals
|
// Get national totals
|
||||||
@@ -345,41 +527,49 @@ export class StateQueryService {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Get stores in a specific state
|
* Get stores in a specific state
|
||||||
|
* Uses inline query for compatibility - does not depend on v_store_state_summary view
|
||||||
*/
|
*/
|
||||||
async getStoresByState(state: string, options: StateQueryOptions = {}): Promise<StoreInState[]> {
|
async getStoresByState(state: string, options: StateQueryOptions = {}): Promise<StoreInState[]> {
|
||||||
const { limit = 100, offset = 0, includeInactive = false, sortBy = 'productCount', sortDir = 'desc' } = options;
|
const { limit = 100, offset = 0, includeInactive = false, sortBy = 'productCount', sortDir = 'desc' } = options;
|
||||||
|
|
||||||
|
// Sort columns must reference the aliased output names with quotes
|
||||||
const sortColumn = {
|
const sortColumn = {
|
||||||
productCount: 'product_count',
|
productCount: '"productCount"',
|
||||||
brandCount: 'brand_count',
|
brandCount: '"brandCount"',
|
||||||
avgPrice: 'avg_price',
|
avgPrice: '"avgPrice"',
|
||||||
name: 'dispensary_name',
|
name: '"dispensaryName"',
|
||||||
city: 'city',
|
city: 'city',
|
||||||
lastCrawl: 'last_crawl_at',
|
lastCrawl: '"lastCrawlAt"',
|
||||||
}[sortBy] || 'product_count';
|
}[sortBy] || '"productCount"';
|
||||||
|
|
||||||
let whereClause = 'WHERE state = $1';
|
let whereClause = 'WHERE d.state = $1';
|
||||||
if (!includeInactive) {
|
if (!includeInactive) {
|
||||||
whereClause += ` AND crawl_status != 'disabled'`;
|
// Use stage column instead of crawl_status (which doesn't exist)
|
||||||
|
whereClause += ` AND (d.stage IS NULL OR d.stage NOT IN ('disabled', 'failing'))`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Inline query that aggregates store data from dispensaries and store_products
|
||||||
|
// Works whether or not v_store_state_summary view exists
|
||||||
|
// Uses 'stage' column instead of 'crawl_status' which doesn't exist in this schema
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
dispensary_id AS "dispensaryId",
|
d.id AS "dispensaryId",
|
||||||
dispensary_name AS "dispensaryName",
|
d.name AS "dispensaryName",
|
||||||
dispensary_slug AS "dispensarySlug",
|
d.slug AS "dispensarySlug",
|
||||||
state,
|
d.state,
|
||||||
city,
|
d.city,
|
||||||
menu_type AS "menuType",
|
d.menu_type AS "menuType",
|
||||||
crawl_status AS "crawlStatus",
|
d.stage AS "crawlStatus",
|
||||||
last_crawl_at AS "lastCrawlAt",
|
d.last_crawl_at AS "lastCrawlAt",
|
||||||
product_count AS "productCount",
|
COUNT(DISTINCT sp.id) AS "productCount",
|
||||||
in_stock_count AS "inStockCount",
|
COUNT(DISTINCT CASE WHEN sp.is_in_stock THEN sp.id END) AS "inStockCount",
|
||||||
brand_count AS "brandCount",
|
COUNT(DISTINCT sp.brand_id) AS "brandCount",
|
||||||
avg_price AS "avgPrice",
|
ROUND(AVG(sp.price_rec)::numeric, 2) AS "avgPrice",
|
||||||
special_count AS "specialCount"
|
COUNT(DISTINCT CASE WHEN sp.is_on_special THEN sp.id END) AS "specialCount"
|
||||||
FROM v_store_state_summary
|
FROM dispensaries d
|
||||||
|
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
|
||||||
${whereClause}
|
${whereClause}
|
||||||
|
GROUP BY d.id, d.name, d.slug, d.state, d.city, d.menu_type, d.stage, d.last_crawl_at
|
||||||
ORDER BY ${sortColumn} ${sortDir === 'asc' ? 'ASC' : 'DESC'} NULLS LAST
|
ORDER BY ${sortColumn} ${sortDir === 'asc' ? 'ASC' : 'DESC'} NULLS LAST
|
||||||
LIMIT $2 OFFSET $3
|
LIMIT $2 OFFSET $3
|
||||||
`, [state, limit, offset]);
|
`, [state, limit, offset]);
|
||||||
@@ -393,6 +583,7 @@ export class StateQueryService {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Get price distribution by state
|
* Get price distribution by state
|
||||||
|
* Uses inline query instead of fn_national_price_comparison for compatibility
|
||||||
*/
|
*/
|
||||||
async getStorePriceDistribution(
|
async getStorePriceDistribution(
|
||||||
state: string,
|
state: string,
|
||||||
@@ -400,44 +591,104 @@ export class StateQueryService {
|
|||||||
): Promise<StatePriceDistribution[]> {
|
): Promise<StatePriceDistribution[]> {
|
||||||
const { category, brandId } = options;
|
const { category, brandId } = options;
|
||||||
|
|
||||||
|
// Build WHERE conditions dynamically
|
||||||
|
const conditions = ['d.state = $1', 'sp.price_rec IS NOT NULL', 'sp.price_rec > 0'];
|
||||||
|
const params: any[] = [state];
|
||||||
|
let paramIndex = 2;
|
||||||
|
|
||||||
|
if (category) {
|
||||||
|
conditions.push(`sp.category_raw = $${paramIndex}`);
|
||||||
|
params.push(category);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
if (brandId) {
|
||||||
|
conditions.push(`sp.brand_id = $${paramIndex}`);
|
||||||
|
params.push(brandId);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT * FROM fn_national_price_comparison($1, $2)
|
SELECT
|
||||||
WHERE state = $3
|
d.state,
|
||||||
`, [category || null, brandId || null, state]);
|
s.name AS state_name,
|
||||||
|
COUNT(DISTINCT sp.id) AS product_count,
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price,
|
||||||
|
MIN(sp.price_rec) AS min_price,
|
||||||
|
MAX(sp.price_rec) AS max_price,
|
||||||
|
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) AS median_price,
|
||||||
|
ROUND(STDDEV(sp.price_rec)::numeric, 2) AS price_stddev
|
||||||
|
FROM dispensaries d
|
||||||
|
JOIN states s ON d.state = s.code
|
||||||
|
JOIN store_products sp ON d.id = sp.dispensary_id
|
||||||
|
WHERE ${conditions.join(' AND ')}
|
||||||
|
GROUP BY d.state, s.name
|
||||||
|
ORDER BY avg_price DESC
|
||||||
|
`, params);
|
||||||
|
|
||||||
return result.rows.map(row => ({
|
return result.rows.map(row => ({
|
||||||
state: row.state,
|
state: row.state,
|
||||||
stateName: row.state_name,
|
stateName: row.state_name,
|
||||||
productCount: parseInt(row.product_count),
|
productCount: parseInt(row.product_count || '0'),
|
||||||
avgPrice: parseFloat(row.avg_price),
|
avgPrice: parseFloat(row.avg_price || '0'),
|
||||||
minPrice: parseFloat(row.min_price),
|
minPrice: parseFloat(row.min_price || '0'),
|
||||||
maxPrice: parseFloat(row.max_price),
|
maxPrice: parseFloat(row.max_price || '0'),
|
||||||
medianPrice: parseFloat(row.median_price),
|
medianPrice: parseFloat(row.median_price || '0'),
|
||||||
priceStddev: parseFloat(row.price_stddev),
|
priceStddev: parseFloat(row.price_stddev || '0'),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get national price comparison across all states
|
* Get national price comparison across all states
|
||||||
|
* Uses inline query instead of fn_national_price_comparison for compatibility
|
||||||
*/
|
*/
|
||||||
async getNationalPriceComparison(
|
async getNationalPriceComparison(
|
||||||
options: { category?: string; brandId?: number } = {}
|
options: { category?: string; brandId?: number } = {}
|
||||||
): Promise<StatePriceDistribution[]> {
|
): Promise<StatePriceDistribution[]> {
|
||||||
const { category, brandId } = options;
|
const { category, brandId } = options;
|
||||||
|
|
||||||
|
// Build WHERE conditions dynamically
|
||||||
|
const conditions = ['d.state IS NOT NULL', 'sp.price_rec IS NOT NULL', 'sp.price_rec > 0'];
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (category) {
|
||||||
|
conditions.push(`sp.category_raw = $${paramIndex}`);
|
||||||
|
params.push(category);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
if (brandId) {
|
||||||
|
conditions.push(`sp.brand_id = $${paramIndex}`);
|
||||||
|
params.push(brandId);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT * FROM fn_national_price_comparison($1, $2)
|
SELECT
|
||||||
`, [category || null, brandId || null]);
|
d.state,
|
||||||
|
s.name AS state_name,
|
||||||
|
COUNT(DISTINCT sp.id) AS product_count,
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price,
|
||||||
|
MIN(sp.price_rec) AS min_price,
|
||||||
|
MAX(sp.price_rec) AS max_price,
|
||||||
|
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) AS median_price,
|
||||||
|
ROUND(STDDEV(sp.price_rec)::numeric, 2) AS price_stddev
|
||||||
|
FROM dispensaries d
|
||||||
|
JOIN states s ON d.state = s.code
|
||||||
|
JOIN store_products sp ON d.id = sp.dispensary_id
|
||||||
|
WHERE ${conditions.join(' AND ')}
|
||||||
|
GROUP BY d.state, s.name
|
||||||
|
ORDER BY avg_price DESC
|
||||||
|
`, params);
|
||||||
|
|
||||||
return result.rows.map(row => ({
|
return result.rows.map(row => ({
|
||||||
state: row.state,
|
state: row.state,
|
||||||
stateName: row.state_name,
|
stateName: row.state_name,
|
||||||
productCount: parseInt(row.product_count),
|
productCount: parseInt(row.product_count || '0'),
|
||||||
avgPrice: parseFloat(row.avg_price),
|
avgPrice: parseFloat(row.avg_price || '0'),
|
||||||
minPrice: parseFloat(row.min_price),
|
minPrice: parseFloat(row.min_price || '0'),
|
||||||
maxPrice: parseFloat(row.max_price),
|
maxPrice: parseFloat(row.max_price || '0'),
|
||||||
medianPrice: parseFloat(row.median_price),
|
medianPrice: parseFloat(row.median_price || '0'),
|
||||||
priceStddev: parseFloat(row.price_stddev),
|
priceStddev: parseFloat(row.price_stddev || '0'),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -498,7 +749,7 @@ export class StateQueryService {
|
|||||||
switch (metric) {
|
switch (metric) {
|
||||||
case 'stores':
|
case 'stores':
|
||||||
query = `
|
query = `
|
||||||
SELECT state, state_name AS "stateName", dispensary_count AS value, 'stores' AS label
|
SELECT state, state_name AS "stateName", COALESCE(dispensary_count, 0) AS value, 'stores' AS label
|
||||||
FROM mv_state_metrics
|
FROM mv_state_metrics
|
||||||
WHERE state IS NOT NULL
|
WHERE state IS NOT NULL
|
||||||
ORDER BY state
|
ORDER BY state
|
||||||
@@ -507,7 +758,7 @@ export class StateQueryService {
|
|||||||
|
|
||||||
case 'products':
|
case 'products':
|
||||||
query = `
|
query = `
|
||||||
SELECT state, state_name AS "stateName", total_products AS value, 'products' AS label
|
SELECT state, state_name AS "stateName", COALESCE(total_products, 0) AS value, 'products' AS label
|
||||||
FROM mv_state_metrics
|
FROM mv_state_metrics
|
||||||
WHERE state IS NOT NULL
|
WHERE state IS NOT NULL
|
||||||
ORDER BY state
|
ORDER BY state
|
||||||
@@ -516,7 +767,7 @@ export class StateQueryService {
|
|||||||
|
|
||||||
case 'brands':
|
case 'brands':
|
||||||
query = `
|
query = `
|
||||||
SELECT state, state_name AS "stateName", unique_brands AS value, 'brands' AS label
|
SELECT state, state_name AS "stateName", COALESCE(unique_brands, 0) AS value, 'brands' AS label
|
||||||
FROM mv_state_metrics
|
FROM mv_state_metrics
|
||||||
WHERE state IS NOT NULL
|
WHERE state IS NOT NULL
|
||||||
ORDER BY state
|
ORDER BY state
|
||||||
@@ -536,10 +787,33 @@ export class StateQueryService {
|
|||||||
if (!options.brandId) {
|
if (!options.brandId) {
|
||||||
throw new Error('brandId required for penetration heatmap');
|
throw new Error('brandId required for penetration heatmap');
|
||||||
}
|
}
|
||||||
|
// Inline query instead of fn_brand_state_penetration function
|
||||||
query = `
|
query = `
|
||||||
SELECT state, state_name AS "stateName", penetration_pct AS value, 'penetration %' AS label
|
WITH state_totals AS (
|
||||||
FROM fn_brand_state_penetration($1)
|
SELECT d.state, s.name AS state_name, COUNT(DISTINCT d.id) AS total_stores
|
||||||
ORDER BY state
|
FROM dispensaries d
|
||||||
|
JOIN states s ON d.state = s.code
|
||||||
|
WHERE d.state IS NOT NULL
|
||||||
|
GROUP BY d.state, s.name
|
||||||
|
),
|
||||||
|
brand_presence AS (
|
||||||
|
SELECT d.state, COUNT(DISTINCT d.id) AS stores_with_brand
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE (sp.brand_id = $1 OR sp.brand_name_raw = (SELECT name FROM brands WHERE id = $1))
|
||||||
|
AND d.state IS NOT NULL
|
||||||
|
GROUP BY d.state
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
st.state,
|
||||||
|
st.state_name AS "stateName",
|
||||||
|
CASE WHEN st.total_stores > 0
|
||||||
|
THEN ROUND((COALESCE(bp.stores_with_brand, 0)::numeric / st.total_stores) * 100, 2)
|
||||||
|
ELSE 0 END AS value,
|
||||||
|
'penetration %' AS label
|
||||||
|
FROM state_totals st
|
||||||
|
LEFT JOIN brand_presence bp ON st.state = bp.state
|
||||||
|
ORDER BY st.state
|
||||||
`;
|
`;
|
||||||
params = [options.brandId];
|
params = [options.brandId];
|
||||||
break;
|
break;
|
||||||
@@ -549,7 +823,14 @@ export class StateQueryService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const result = await this.pool.query(query, params);
|
const result = await this.pool.query(query, params);
|
||||||
return result.rows;
|
// Parse numeric values from strings (PostgreSQL returns bigint as string)
|
||||||
|
// Round to 2 decimal places for display
|
||||||
|
return result.rows.map((row: any) => ({
|
||||||
|
state: row.state,
|
||||||
|
stateName: row.stateName,
|
||||||
|
value: row.value !== null ? Math.round(parseFloat(row.value) * 100) / 100 : 0,
|
||||||
|
label: row.label,
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ export interface StateMetrics {
|
|||||||
activeStores: number;
|
activeStores: number;
|
||||||
totalProducts: number;
|
totalProducts: number;
|
||||||
inStockProducts: number;
|
inStockProducts: number;
|
||||||
|
outOfStockProducts: number;
|
||||||
onSpecialProducts: number;
|
onSpecialProducts: number;
|
||||||
uniqueBrands: number;
|
uniqueBrands: number;
|
||||||
uniqueCategories: number;
|
uniqueCategories: number;
|
||||||
|
|||||||
@@ -159,6 +159,7 @@ export const GRAPHQL_HASHES = {
|
|||||||
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
||||||
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
|
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
|
||||||
DispensaryInfo: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
DispensaryInfo: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
|
||||||
|
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
|
||||||
};
|
};
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -212,7 +213,24 @@ const FINGERPRINTS: Fingerprint[] = [
|
|||||||
|
|
||||||
let currentFingerprintIndex = 0;
|
let currentFingerprintIndex = 0;
|
||||||
|
|
||||||
|
// Forward declaration for session (actual CrawlSession interface defined later)
|
||||||
|
let currentSession: {
|
||||||
|
sessionId: string;
|
||||||
|
fingerprint: Fingerprint;
|
||||||
|
proxyUrl: string | null;
|
||||||
|
stateCode?: string;
|
||||||
|
timezone?: string;
|
||||||
|
startedAt: Date;
|
||||||
|
} | null = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current fingerprint - returns session fingerprint if active, otherwise default
|
||||||
|
*/
|
||||||
export function getFingerprint(): Fingerprint {
|
export function getFingerprint(): Fingerprint {
|
||||||
|
// Use session fingerprint if a session is active
|
||||||
|
if (currentSession) {
|
||||||
|
return currentSession.fingerprint;
|
||||||
|
}
|
||||||
return FINGERPRINTS[currentFingerprintIndex];
|
return FINGERPRINTS[currentFingerprintIndex];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -227,6 +245,103 @@ export function resetFingerprint(): void {
|
|||||||
currentFingerprintIndex = 0;
|
currentFingerprintIndex = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a random fingerprint from the pool
|
||||||
|
*/
|
||||||
|
export function getRandomFingerprint(): Fingerprint {
|
||||||
|
const index = Math.floor(Math.random() * FINGERPRINTS.length);
|
||||||
|
return FINGERPRINTS[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// SESSION MANAGEMENT
|
||||||
|
// Per-session fingerprint rotation for stealth
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface CrawlSession {
|
||||||
|
sessionId: string;
|
||||||
|
fingerprint: Fingerprint;
|
||||||
|
proxyUrl: string | null;
|
||||||
|
stateCode?: string;
|
||||||
|
timezone?: string;
|
||||||
|
startedAt: Date;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: currentSession variable declared earlier in file for proper scoping
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Timezone to Accept-Language mapping
|
||||||
|
* US timezones all use en-US but this can be extended for international
|
||||||
|
*/
|
||||||
|
const TIMEZONE_TO_LOCALE: Record<string, string> = {
|
||||||
|
'America/Phoenix': 'en-US,en;q=0.9',
|
||||||
|
'America/Los_Angeles': 'en-US,en;q=0.9',
|
||||||
|
'America/Denver': 'en-US,en;q=0.9',
|
||||||
|
'America/Chicago': 'en-US,en;q=0.9',
|
||||||
|
'America/New_York': 'en-US,en;q=0.9',
|
||||||
|
'America/Detroit': 'en-US,en;q=0.9',
|
||||||
|
'America/Anchorage': 'en-US,en;q=0.9',
|
||||||
|
'Pacific/Honolulu': 'en-US,en;q=0.9',
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get Accept-Language header for a given timezone
|
||||||
|
*/
|
||||||
|
export function getLocaleForTimezone(timezone?: string): string {
|
||||||
|
if (!timezone) return 'en-US,en;q=0.9';
|
||||||
|
return TIMEZONE_TO_LOCALE[timezone] || 'en-US,en;q=0.9';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start a new crawl session with a random fingerprint
|
||||||
|
* Call this before crawling a store to get a fresh identity
|
||||||
|
*/
|
||||||
|
export function startSession(stateCode?: string, timezone?: string): CrawlSession {
|
||||||
|
const baseFp = getRandomFingerprint();
|
||||||
|
|
||||||
|
// Override Accept-Language based on timezone for geographic consistency
|
||||||
|
const fingerprint: Fingerprint = {
|
||||||
|
...baseFp,
|
||||||
|
acceptLanguage: getLocaleForTimezone(timezone),
|
||||||
|
};
|
||||||
|
|
||||||
|
currentSession = {
|
||||||
|
sessionId: `session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
||||||
|
fingerprint,
|
||||||
|
proxyUrl: currentProxy,
|
||||||
|
stateCode,
|
||||||
|
timezone,
|
||||||
|
startedAt: new Date(),
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`[Dutchie Client] Started session ${currentSession.sessionId}`);
|
||||||
|
console.log(`[Dutchie Client] Fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
|
||||||
|
console.log(`[Dutchie Client] Accept-Language: ${fingerprint.acceptLanguage}`);
|
||||||
|
if (timezone) {
|
||||||
|
console.log(`[Dutchie Client] Timezone: ${timezone}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return currentSession;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* End the current crawl session
|
||||||
|
*/
|
||||||
|
export function endSession(): void {
|
||||||
|
if (currentSession) {
|
||||||
|
const duration = Math.round((Date.now() - currentSession.startedAt.getTime()) / 1000);
|
||||||
|
console.log(`[Dutchie Client] Ended session ${currentSession.sessionId} (${duration}s)`);
|
||||||
|
currentSession = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current active session
|
||||||
|
*/
|
||||||
|
export function getCurrentSession(): CrawlSession | null {
|
||||||
|
return currentSession;
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// CURL HTTP CLIENT
|
// CURL HTTP CLIENT
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -366,7 +481,7 @@ export function curlGet(url: string, headers: Record<string, string>, timeout =
|
|||||||
export interface ExecuteGraphQLOptions {
|
export interface ExecuteGraphQLOptions {
|
||||||
maxRetries?: number;
|
maxRetries?: number;
|
||||||
retryOn403?: boolean;
|
retryOn403?: boolean;
|
||||||
cName: string;
|
cName?: string; // Optional - used for Referer header, defaults to 'cities'
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -378,7 +493,7 @@ export async function executeGraphQL(
|
|||||||
hash: string,
|
hash: string,
|
||||||
options: ExecuteGraphQLOptions
|
options: ExecuteGraphQLOptions
|
||||||
): Promise<any> {
|
): Promise<any> {
|
||||||
const { maxRetries = 3, retryOn403 = true, cName } = options;
|
const { maxRetries = 3, retryOn403 = true, cName = 'cities' } = options;
|
||||||
|
|
||||||
const body = {
|
const body = {
|
||||||
operationName,
|
operationName,
|
||||||
@@ -419,7 +534,8 @@ export async function executeGraphQL(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (response.status === 403 && retryOn403) {
|
if (response.status === 403 && retryOn403) {
|
||||||
console.warn(`[Dutchie Client] 403 blocked - rotating fingerprint...`);
|
console.warn(`[Dutchie Client] 403 blocked - rotating proxy and fingerprint...`);
|
||||||
|
await rotateProxyOn403('403 Forbidden on GraphQL');
|
||||||
rotateFingerprint();
|
rotateFingerprint();
|
||||||
attempt++;
|
attempt++;
|
||||||
await sleep(1000 * attempt);
|
await sleep(1000 * attempt);
|
||||||
@@ -502,7 +618,8 @@ export async function fetchPage(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (response.status === 403 && retryOn403) {
|
if (response.status === 403 && retryOn403) {
|
||||||
console.warn(`[Dutchie Client] 403 blocked - rotating fingerprint...`);
|
console.warn(`[Dutchie Client] 403 blocked - rotating proxy and fingerprint...`);
|
||||||
|
await rotateProxyOn403('403 Forbidden on page fetch');
|
||||||
rotateFingerprint();
|
rotateFingerprint();
|
||||||
attempt++;
|
attempt++;
|
||||||
await sleep(1000 * attempt);
|
await sleep(1000 * attempt);
|
||||||
|
|||||||
@@ -18,6 +18,13 @@ export {
|
|||||||
getFingerprint,
|
getFingerprint,
|
||||||
rotateFingerprint,
|
rotateFingerprint,
|
||||||
resetFingerprint,
|
resetFingerprint,
|
||||||
|
getRandomFingerprint,
|
||||||
|
getLocaleForTimezone,
|
||||||
|
|
||||||
|
// Session Management (per-store fingerprint rotation)
|
||||||
|
startSession,
|
||||||
|
endSession,
|
||||||
|
getCurrentSession,
|
||||||
|
|
||||||
// Proxy
|
// Proxy
|
||||||
setProxy,
|
setProxy,
|
||||||
@@ -32,6 +39,7 @@ export {
|
|||||||
// Types
|
// Types
|
||||||
type CurlResponse,
|
type CurlResponse,
|
||||||
type Fingerprint,
|
type Fingerprint,
|
||||||
|
type CrawlSession,
|
||||||
type ExecuteGraphQLOptions,
|
type ExecuteGraphQLOptions,
|
||||||
type FetchPageOptions,
|
type FetchPageOptions,
|
||||||
} from './client';
|
} from './client';
|
||||||
|
|||||||
168
backend/src/routes/admin-debug.ts
Normal file
168
backend/src/routes/admin-debug.ts
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
/**
|
||||||
|
* Admin Debug Routes
|
||||||
|
*
|
||||||
|
* Debug endpoints for inspecting crawl snapshots and raw payloads.
|
||||||
|
* Uses canonical store_* tables (not legacy dutchie_* tables).
|
||||||
|
*/
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { authMiddleware } from '../auth/middleware';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
router.use(authMiddleware);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/debug/stores/:id/snapshots
|
||||||
|
* List recent snapshots for a store's products
|
||||||
|
*/
|
||||||
|
router.get('/stores/:id/snapshots', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const { limit = '50', offset = '0' } = req.query;
|
||||||
|
|
||||||
|
const dispensaryId = parseInt(id, 10);
|
||||||
|
const limitNum = Math.min(parseInt(limit as string, 10), 200);
|
||||||
|
const offsetNum = parseInt(offset as string, 10);
|
||||||
|
|
||||||
|
// Get snapshots with product info
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
sps.id,
|
||||||
|
sps.store_product_id as product_id,
|
||||||
|
COALESCE(sps.name_raw, sp.name_raw, 'Unknown Product') as product_name,
|
||||||
|
COALESCE(sps.brand_name_raw, sp.brand_name_raw) as brand_name,
|
||||||
|
sps.captured_at as crawled_at,
|
||||||
|
COALESCE(sps.stock_status, 'unknown') as stock_status,
|
||||||
|
sps.price_rec as regular_price,
|
||||||
|
sps.price_rec_special as sale_price,
|
||||||
|
sps.raw_data as raw_payload
|
||||||
|
FROM store_product_snapshots sps
|
||||||
|
LEFT JOIN store_products sp ON sp.id = sps.store_product_id
|
||||||
|
WHERE sps.dispensary_id = $1
|
||||||
|
ORDER BY sps.captured_at DESC
|
||||||
|
LIMIT $2 OFFSET $3
|
||||||
|
`, [dispensaryId, limitNum, offsetNum]);
|
||||||
|
|
||||||
|
// Get total count
|
||||||
|
const { rows: countRows } = await pool.query(
|
||||||
|
`SELECT COUNT(*) as total FROM store_product_snapshots WHERE dispensary_id = $1`,
|
||||||
|
[dispensaryId]
|
||||||
|
);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
snapshots: rows.map((r: any) => ({
|
||||||
|
id: r.id,
|
||||||
|
productId: r.product_id,
|
||||||
|
productName: r.product_name,
|
||||||
|
brandName: r.brand_name,
|
||||||
|
crawledAt: r.crawled_at,
|
||||||
|
stockStatus: r.stock_status,
|
||||||
|
regularPrice: r.regular_price ? parseFloat(r.regular_price) : null,
|
||||||
|
salePrice: r.sale_price ? parseFloat(r.sale_price) : null,
|
||||||
|
rawPayload: r.raw_payload,
|
||||||
|
})),
|
||||||
|
total: parseInt(countRows[0]?.total || '0', 10),
|
||||||
|
limit: limitNum,
|
||||||
|
offset: offsetNum,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[AdminDebug] Error fetching store snapshots:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/debug/snapshots/:id/raw-payload
|
||||||
|
* Get the raw payload for a specific snapshot
|
||||||
|
*/
|
||||||
|
router.get('/snapshots/:id/raw-payload', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const snapshotId = parseInt(id, 10);
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
sps.id,
|
||||||
|
sps.store_product_id as product_id,
|
||||||
|
COALESCE(sps.name_raw, sp.name_raw, 'Unknown Product') as product_name,
|
||||||
|
sps.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
sps.captured_at as crawled_at,
|
||||||
|
sps.raw_data as raw_payload
|
||||||
|
FROM store_product_snapshots sps
|
||||||
|
LEFT JOIN store_products sp ON sp.id = sps.store_product_id
|
||||||
|
LEFT JOIN dispensaries d ON d.id = sps.dispensary_id
|
||||||
|
WHERE sps.id = $1
|
||||||
|
`, [snapshotId]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ error: 'Snapshot not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const r = rows[0];
|
||||||
|
res.json({
|
||||||
|
snapshot: {
|
||||||
|
id: r.id,
|
||||||
|
productId: r.product_id,
|
||||||
|
productName: r.product_name,
|
||||||
|
dispensaryId: r.dispensary_id,
|
||||||
|
dispensaryName: r.dispensary_name,
|
||||||
|
crawledAt: r.crawled_at,
|
||||||
|
rawPayload: r.raw_payload,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[AdminDebug] Error fetching snapshot raw payload:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/debug/products/:id/raw-payload
|
||||||
|
* Get raw payload and metadata for a specific product
|
||||||
|
*/
|
||||||
|
router.get('/products/:id/raw-payload', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const productId = parseInt(id, 10);
|
||||||
|
|
||||||
|
// Query store_products for the product and any raw_payload/metadata
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
sp.id,
|
||||||
|
sp.name_raw as name,
|
||||||
|
sp.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
sp.raw_payload,
|
||||||
|
sp.provider_metadata as metadata,
|
||||||
|
sp.created_at,
|
||||||
|
sp.updated_at
|
||||||
|
FROM store_products sp
|
||||||
|
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
WHERE sp.id = $1
|
||||||
|
`, [productId]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ error: 'Product not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const r = rows[0];
|
||||||
|
res.json({
|
||||||
|
product: {
|
||||||
|
id: r.id,
|
||||||
|
name: r.name,
|
||||||
|
dispensaryId: r.dispensary_id,
|
||||||
|
dispensaryName: r.dispensary_name,
|
||||||
|
rawPayload: r.raw_payload,
|
||||||
|
metadata: r.metadata,
|
||||||
|
createdAt: r.created_at,
|
||||||
|
updatedAt: r.updated_at,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[AdminDebug] Error fetching product raw payload:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -231,6 +231,34 @@ export function createAnalyticsV2Router(pool: Pool): Router {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /brand/:name/promotions
|
||||||
|
* Get brand promotional history - tracks specials, discounts, duration, and sales estimates
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - window: 7d|30d|90d (default: 90d)
|
||||||
|
* - state: state code filter (e.g., AZ)
|
||||||
|
* - category: category filter (e.g., Flower)
|
||||||
|
*/
|
||||||
|
router.get('/brand/:name/promotions', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const brandName = decodeURIComponent(req.params.name);
|
||||||
|
const window = parseTimeWindow(req.query.window as string) || '90d';
|
||||||
|
const stateCode = req.query.state as string | undefined;
|
||||||
|
const category = req.query.category as string | undefined;
|
||||||
|
|
||||||
|
const result = await brandService.getBrandPromotionalHistory(brandName, {
|
||||||
|
window,
|
||||||
|
stateCode,
|
||||||
|
category,
|
||||||
|
});
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[AnalyticsV2] Brand promotions error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch brand promotional history' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// CATEGORY ANALYTICS
|
// CATEGORY ANALYTICS
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -400,6 +428,31 @@ export function createAnalyticsV2Router(pool: Pool): Router {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /store/:id/quantity-changes
|
||||||
|
* Get quantity changes for a store (increases/decreases)
|
||||||
|
* Useful for estimating sales (decreases) or restocks (increases)
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - window: 7d|30d|90d (default: 7d)
|
||||||
|
* - direction: increase|decrease|all (default: all)
|
||||||
|
* - limit: number (default: 100)
|
||||||
|
*/
|
||||||
|
router.get('/store/:id/quantity-changes', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const dispensaryId = parseInt(req.params.id);
|
||||||
|
const window = parseTimeWindow(req.query.window as string);
|
||||||
|
const direction = (req.query.direction as 'increase' | 'decrease' | 'all') || 'all';
|
||||||
|
const limit = req.query.limit ? parseInt(req.query.limit as string) : 100;
|
||||||
|
|
||||||
|
const result = await storeService.getQuantityChanges(dispensaryId, { window, direction, limit });
|
||||||
|
res.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[AnalyticsV2] Store quantity changes error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch store quantity changes' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /store/:id/inventory
|
* GET /store/:id/inventory
|
||||||
* Get store inventory composition
|
* Get store inventory composition
|
||||||
|
|||||||
@@ -35,11 +35,11 @@ router.get('/overview', async (req, res) => {
|
|||||||
|
|
||||||
// Top products
|
// Top products
|
||||||
const topProductsResult = await pool.query(`
|
const topProductsResult = await pool.query(`
|
||||||
SELECT p.id, p.name, p.price, COUNT(c.id) as click_count
|
SELECT p.id, p.name_raw as name, p.price_rec as price, COUNT(c.id) as click_count
|
||||||
FROM clicks c
|
FROM clicks c
|
||||||
JOIN products p ON c.product_id = p.id
|
JOIN store_products p ON c.product_id = p.id
|
||||||
WHERE c.clicked_at >= NOW() - INTERVAL '${parseInt(days as string)} days'
|
WHERE c.clicked_at >= NOW() - INTERVAL '${parseInt(days as string)} days'
|
||||||
GROUP BY p.id, p.name, p.price
|
GROUP BY p.id, p.name_raw, p.price_rec
|
||||||
ORDER BY click_count DESC
|
ORDER BY click_count DESC
|
||||||
LIMIT 10
|
LIMIT 10
|
||||||
`);
|
`);
|
||||||
@@ -109,12 +109,12 @@ router.get('/campaigns/:id', async (req, res) => {
|
|||||||
|
|
||||||
// Clicks by product in this campaign
|
// Clicks by product in this campaign
|
||||||
const byProductResult = await pool.query(`
|
const byProductResult = await pool.query(`
|
||||||
SELECT p.id, p.name, COUNT(c.id) as clicks
|
SELECT p.id, p.name_raw as name, COUNT(c.id) as clicks
|
||||||
FROM clicks c
|
FROM clicks c
|
||||||
JOIN products p ON c.product_id = p.id
|
JOIN store_products p ON c.product_id = p.id
|
||||||
WHERE c.campaign_id = $1
|
WHERE c.campaign_id = $1
|
||||||
AND c.clicked_at >= NOW() - INTERVAL '${parseInt(days as string)} days'
|
AND c.clicked_at >= NOW() - INTERVAL '${parseInt(days as string)} days'
|
||||||
GROUP BY p.id, p.name
|
GROUP BY p.id, p.name_raw
|
||||||
ORDER BY clicks DESC
|
ORDER BY clicks DESC
|
||||||
`, [id]);
|
`, [id]);
|
||||||
|
|
||||||
|
|||||||
@@ -154,7 +154,7 @@ router.patch('/:id/toggle', requireRole('superadmin', 'admin'), async (req, res)
|
|||||||
|
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
UPDATE wp_dutchie_api_permissions
|
UPDATE wp_dutchie_api_permissions
|
||||||
SET is_active = NOT is_active
|
SET is_active = CASE WHEN is_active = 1 THEN 0 ELSE 1 END
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
RETURNING *
|
RETURNING *
|
||||||
`, [id]);
|
`, [id]);
|
||||||
|
|||||||
@@ -37,8 +37,22 @@ router.get('/:id', async (req, res) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const productsResult = await pool.query(`
|
const productsResult = await pool.query(`
|
||||||
SELECT p.*, cp.display_order
|
SELECT
|
||||||
FROM products p
|
p.id,
|
||||||
|
p.dispensary_id,
|
||||||
|
p.name_raw as name,
|
||||||
|
p.brand_name_raw as brand,
|
||||||
|
p.category_raw as category,
|
||||||
|
p.subcategory_raw as subcategory,
|
||||||
|
p.price_rec as price,
|
||||||
|
p.thc_percent,
|
||||||
|
p.cbd_percent,
|
||||||
|
p.strain_type,
|
||||||
|
p.primary_image_url as image_url,
|
||||||
|
p.stock_status,
|
||||||
|
p.is_in_stock as in_stock,
|
||||||
|
cp.display_order
|
||||||
|
FROM store_products p
|
||||||
JOIN campaign_products cp ON p.id = cp.product_id
|
JOIN campaign_products cp ON p.id = cp.product_id
|
||||||
WHERE cp.campaign_id = $1
|
WHERE cp.campaign_id = $1
|
||||||
ORDER BY cp.display_order
|
ORDER BY cp.display_order
|
||||||
|
|||||||
@@ -5,33 +5,37 @@ import { pool } from '../db/pool';
|
|||||||
const router = Router();
|
const router = Router();
|
||||||
router.use(authMiddleware);
|
router.use(authMiddleware);
|
||||||
|
|
||||||
// Get categories (flat list)
|
// Get categories (flat list) - derived from actual product data
|
||||||
router.get('/', async (req, res) => {
|
router.get('/', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { store_id } = req.query;
|
const { store_id, in_stock_only } = req.query;
|
||||||
|
|
||||||
let query = `
|
let query = `
|
||||||
SELECT
|
SELECT
|
||||||
c.*,
|
category_raw as name,
|
||||||
COUNT(DISTINCT p.id) as product_count,
|
category_raw as slug,
|
||||||
pc.name as parent_name
|
COUNT(*) as product_count,
|
||||||
FROM categories c
|
COUNT(*) FILTER (WHERE is_in_stock = true) as in_stock_count
|
||||||
LEFT JOIN products p ON c.id = p.category_id
|
FROM store_products
|
||||||
LEFT JOIN categories pc ON c.parent_id = pc.id
|
WHERE category_raw IS NOT NULL
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const params: any[] = [];
|
const params: any[] = [];
|
||||||
|
|
||||||
if (store_id) {
|
if (store_id) {
|
||||||
query += ' WHERE c.store_id = $1';
|
|
||||||
params.push(store_id);
|
params.push(store_id);
|
||||||
|
query += ` AND dispensary_id = $${params.length}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (in_stock_only === 'true') {
|
||||||
|
query += ` AND is_in_stock = true`;
|
||||||
|
}
|
||||||
|
|
||||||
query += `
|
query += `
|
||||||
GROUP BY c.id, pc.name
|
GROUP BY category_raw
|
||||||
ORDER BY c.display_order, c.name
|
ORDER BY category_raw
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const result = await pool.query(query, params);
|
const result = await pool.query(query, params);
|
||||||
res.json({ categories: result.rows });
|
res.json({ categories: result.rows });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -40,50 +44,86 @@ router.get('/', async (req, res) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Get category tree (hierarchical)
|
// Get category tree (hierarchical) - category -> subcategory structure from product data
|
||||||
router.get('/tree', async (req, res) => {
|
router.get('/tree', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { store_id } = req.query;
|
const { store_id, in_stock_only } = req.query;
|
||||||
|
|
||||||
if (!store_id) {
|
|
||||||
return res.status(400).json({ error: 'store_id is required' });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get all categories for the store
|
|
||||||
const result = await pool.query(`
|
|
||||||
SELECT
|
|
||||||
c.*,
|
|
||||||
COUNT(DISTINCT p.id) as product_count
|
|
||||||
FROM categories c
|
|
||||||
LEFT JOIN products p ON c.id = p.category_id AND p.in_stock = true
|
|
||||||
WHERE c.store_id = $1
|
|
||||||
GROUP BY c.id
|
|
||||||
ORDER BY c.display_order, c.name
|
|
||||||
`, [store_id]);
|
|
||||||
|
|
||||||
// Build tree structure
|
|
||||||
const categories = result.rows;
|
|
||||||
const categoryMap = new Map();
|
|
||||||
const tree: any[] = [];
|
|
||||||
|
|
||||||
// First pass: create map
|
|
||||||
categories.forEach((cat: { id: number; parent_id?: number }) => {
|
|
||||||
categoryMap.set(cat.id, { ...cat, children: [] });
|
|
||||||
});
|
|
||||||
|
|
||||||
// Second pass: build tree
|
// Get category + subcategory combinations with counts
|
||||||
categories.forEach((cat: { id: number; parent_id?: number }) => {
|
let query = `
|
||||||
const node = categoryMap.get(cat.id);
|
SELECT
|
||||||
if (cat.parent_id) {
|
category_raw as category,
|
||||||
const parent = categoryMap.get(cat.parent_id);
|
subcategory_raw as subcategory,
|
||||||
if (parent) {
|
COUNT(*) as product_count,
|
||||||
parent.children.push(node);
|
COUNT(*) FILTER (WHERE is_in_stock = true) as in_stock_count
|
||||||
}
|
FROM store_products
|
||||||
} else {
|
WHERE category_raw IS NOT NULL
|
||||||
tree.push(node);
|
`;
|
||||||
|
|
||||||
|
const params: any[] = [];
|
||||||
|
|
||||||
|
if (store_id) {
|
||||||
|
params.push(store_id);
|
||||||
|
query += ` AND dispensary_id = $${params.length}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_stock_only === 'true') {
|
||||||
|
query += ` AND is_in_stock = true`;
|
||||||
|
}
|
||||||
|
|
||||||
|
query += `
|
||||||
|
GROUP BY category_raw, subcategory_raw
|
||||||
|
ORDER BY category_raw, subcategory_raw
|
||||||
|
`;
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
|
||||||
|
// Build tree structure: category -> subcategories
|
||||||
|
const categoryMap = new Map<string, {
|
||||||
|
name: string;
|
||||||
|
slug: string;
|
||||||
|
product_count: number;
|
||||||
|
in_stock_count: number;
|
||||||
|
subcategories: Array<{
|
||||||
|
name: string;
|
||||||
|
slug: string;
|
||||||
|
product_count: number;
|
||||||
|
in_stock_count: number;
|
||||||
|
}>;
|
||||||
|
}>();
|
||||||
|
|
||||||
|
for (const row of result.rows) {
|
||||||
|
const category = row.category;
|
||||||
|
const subcategory = row.subcategory;
|
||||||
|
const count = parseInt(row.product_count);
|
||||||
|
const inStockCount = parseInt(row.in_stock_count);
|
||||||
|
|
||||||
|
if (!categoryMap.has(category)) {
|
||||||
|
categoryMap.set(category, {
|
||||||
|
name: category,
|
||||||
|
slug: category.toLowerCase().replace(/\s+/g, '-'),
|
||||||
|
product_count: 0,
|
||||||
|
in_stock_count: 0,
|
||||||
|
subcategories: []
|
||||||
|
});
|
||||||
}
|
}
|
||||||
});
|
|
||||||
|
const cat = categoryMap.get(category)!;
|
||||||
|
cat.product_count += count;
|
||||||
|
cat.in_stock_count += inStockCount;
|
||||||
|
|
||||||
|
if (subcategory) {
|
||||||
|
cat.subcategories.push({
|
||||||
|
name: subcategory,
|
||||||
|
slug: subcategory.toLowerCase().replace(/\s+/g, '-'),
|
||||||
|
product_count: count,
|
||||||
|
in_stock_count: inStockCount
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const tree = Array.from(categoryMap.values());
|
||||||
|
|
||||||
res.json({ tree });
|
res.json({ tree });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching category tree:', error);
|
console.error('Error fetching category tree:', error);
|
||||||
@@ -91,4 +131,91 @@ router.get('/tree', async (req, res) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Get all unique subcategories for a category
|
||||||
|
router.get('/:category/subcategories', async (req, res) => {
|
||||||
|
try {
|
||||||
|
const { category } = req.params;
|
||||||
|
const { store_id, in_stock_only } = req.query;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT
|
||||||
|
subcategory_raw as name,
|
||||||
|
subcategory_raw as slug,
|
||||||
|
COUNT(*) as product_count,
|
||||||
|
COUNT(*) FILTER (WHERE is_in_stock = true) as in_stock_count
|
||||||
|
FROM store_products
|
||||||
|
WHERE category_raw = $1
|
||||||
|
AND subcategory_raw IS NOT NULL
|
||||||
|
`;
|
||||||
|
|
||||||
|
const params: any[] = [category];
|
||||||
|
|
||||||
|
if (store_id) {
|
||||||
|
params.push(store_id);
|
||||||
|
query += ` AND dispensary_id = $${params.length}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_stock_only === 'true') {
|
||||||
|
query += ` AND is_in_stock = true`;
|
||||||
|
}
|
||||||
|
|
||||||
|
query += `
|
||||||
|
GROUP BY subcategory_raw
|
||||||
|
ORDER BY subcategory_raw
|
||||||
|
`;
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
res.json({
|
||||||
|
category,
|
||||||
|
subcategories: result.rows
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching subcategories:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch subcategories' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get global category summary (across all stores)
|
||||||
|
router.get('/summary', async (req, res) => {
|
||||||
|
try {
|
||||||
|
const { state } = req.query;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT
|
||||||
|
sp.category_raw as category,
|
||||||
|
COUNT(DISTINCT sp.id) as product_count,
|
||||||
|
COUNT(DISTINCT sp.dispensary_id) as store_count,
|
||||||
|
COUNT(*) FILTER (WHERE sp.is_in_stock = true) as in_stock_count
|
||||||
|
FROM store_products sp
|
||||||
|
`;
|
||||||
|
|
||||||
|
const params: any[] = [];
|
||||||
|
|
||||||
|
if (state) {
|
||||||
|
query += `
|
||||||
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE sp.category_raw IS NOT NULL
|
||||||
|
AND d.state = $1
|
||||||
|
`;
|
||||||
|
params.push(state);
|
||||||
|
} else {
|
||||||
|
query += ` WHERE sp.category_raw IS NOT NULL`;
|
||||||
|
}
|
||||||
|
|
||||||
|
query += `
|
||||||
|
GROUP BY sp.category_raw
|
||||||
|
ORDER BY product_count DESC
|
||||||
|
`;
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
res.json({
|
||||||
|
categories: result.rows,
|
||||||
|
total_categories: result.rows.length
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching category summary:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch category summary' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
@@ -92,9 +92,9 @@ router.get('/brands', async (req: Request, res: Response) => {
|
|||||||
|
|
||||||
if (brandIds.length > 0) {
|
if (brandIds.length > 0) {
|
||||||
const brandNamesResult = await pool.query(`
|
const brandNamesResult = await pool.query(`
|
||||||
SELECT DISTINCT brand_name
|
SELECT DISTINCT brand_name_raw as brand_name
|
||||||
FROM dutchie_products
|
FROM store_products
|
||||||
WHERE brand_name = ANY($1)
|
WHERE brand_name_raw = ANY($1)
|
||||||
`, [brandIds]);
|
`, [brandIds]);
|
||||||
|
|
||||||
brandNamesResult.rows.forEach(r => {
|
brandNamesResult.rows.forEach(r => {
|
||||||
@@ -201,14 +201,14 @@ router.get('/products', async (req: Request, res: Response) => {
|
|||||||
// Try to match by external_id or id
|
// Try to match by external_id or id
|
||||||
const productDetailsResult = await pool.query(`
|
const productDetailsResult = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
external_id,
|
provider_product_id as external_id,
|
||||||
id::text as product_id,
|
id::text as product_id,
|
||||||
name,
|
name_raw as name,
|
||||||
brand_name,
|
brand_name_raw as brand_name,
|
||||||
type,
|
category_raw as type,
|
||||||
subcategory
|
subcategory_raw as subcategory
|
||||||
FROM dutchie_products
|
FROM store_products
|
||||||
WHERE external_id = ANY($1) OR id::text = ANY($1)
|
WHERE provider_product_id = ANY($1) OR id::text = ANY($1)
|
||||||
`, [productIds]);
|
`, [productIds]);
|
||||||
|
|
||||||
productDetailsResult.rows.forEach(r => {
|
productDetailsResult.rows.forEach(r => {
|
||||||
|
|||||||
@@ -26,10 +26,10 @@ router.get('/stats', async (req, res) => {
|
|||||||
COUNT(*) as total,
|
COUNT(*) as total,
|
||||||
COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock,
|
COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock,
|
||||||
COUNT(*) FILTER (WHERE primary_image_url IS NOT NULL) as with_images,
|
COUNT(*) FILTER (WHERE primary_image_url IS NOT NULL) as with_images,
|
||||||
COUNT(DISTINCT brand_name) FILTER (WHERE brand_name IS NOT NULL AND brand_name != '') as unique_brands,
|
COUNT(DISTINCT brand_name_raw) FILTER (WHERE brand_name_raw IS NOT NULL AND brand_name_raw != '') as unique_brands,
|
||||||
COUNT(DISTINCT dispensary_id) as dispensaries_with_products,
|
COUNT(DISTINCT dispensary_id) as dispensaries_with_products,
|
||||||
COUNT(*) FILTER (WHERE created_at >= NOW() - INTERVAL '24 hours') as new_products_24h
|
COUNT(*) FILTER (WHERE created_at >= NOW() - INTERVAL '24 hours') as new_products_24h
|
||||||
FROM dutchie_products
|
FROM store_products
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
ds.total as store_total, ds.active as store_active,
|
ds.total as store_total, ds.active as store_active,
|
||||||
@@ -96,25 +96,25 @@ router.get('/activity', async (req, res) => {
|
|||||||
const scrapesResult = await pool.query(`
|
const scrapesResult = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
d.name,
|
d.name,
|
||||||
d.last_crawled_at as last_scraped_at,
|
d.last_crawl_at as last_scraped_at,
|
||||||
d.product_count
|
d.product_count
|
||||||
FROM dispensaries d
|
FROM dispensaries d
|
||||||
WHERE d.last_crawled_at IS NOT NULL
|
WHERE d.last_crawl_at IS NOT NULL
|
||||||
ORDER BY d.last_crawled_at DESC
|
ORDER BY d.last_crawl_at DESC
|
||||||
LIMIT $1
|
LIMIT $1
|
||||||
`, [limit]);
|
`, [limit]);
|
||||||
|
|
||||||
// Recent products from dutchie_products
|
// Recent products from store_products (canonical)
|
||||||
const productsResult = await pool.query(`
|
const productsResult = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
p.name,
|
p.name_raw as name,
|
||||||
0 as price,
|
p.price_rec as price,
|
||||||
p.brand_name as brand,
|
p.brand_name_raw as brand,
|
||||||
p.thc as thc_percentage,
|
p.thc_percent as thc_percentage,
|
||||||
p.cbd as cbd_percentage,
|
p.cbd_percent as cbd_percentage,
|
||||||
d.name as store_name,
|
d.name as store_name,
|
||||||
p.created_at as first_seen_at
|
p.created_at as first_seen_at
|
||||||
FROM dutchie_products p
|
FROM store_products p
|
||||||
JOIN dispensaries d ON p.dispensary_id = d.id
|
JOIN dispensaries d ON p.dispensary_id = d.id
|
||||||
ORDER BY p.created_at DESC
|
ORDER BY p.created_at DESC
|
||||||
LIMIT $1
|
LIMIT $1
|
||||||
|
|||||||
269
backend/src/routes/deploy-status.ts
Normal file
269
backend/src/routes/deploy-status.ts
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import axios from 'axios';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
// Woodpecker API config - uses env vars or falls back
|
||||||
|
const WOODPECKER_SERVER = process.env.WOODPECKER_SERVER || 'https://ci.cannabrands.app';
|
||||||
|
const WOODPECKER_TOKEN = process.env.WOODPECKER_TOKEN;
|
||||||
|
const GITEA_SERVER = process.env.GITEA_SERVER || 'https://code.cannabrands.app';
|
||||||
|
const GITEA_TOKEN = process.env.GITEA_TOKEN;
|
||||||
|
const REPO_OWNER = 'Creationshop';
|
||||||
|
const REPO_NAME = 'dispensary-scraper';
|
||||||
|
|
||||||
|
interface PipelineStep {
|
||||||
|
name: string;
|
||||||
|
state: 'pending' | 'running' | 'success' | 'failure' | 'skipped';
|
||||||
|
started?: number;
|
||||||
|
stopped?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PipelineInfo {
|
||||||
|
number: number;
|
||||||
|
status: string;
|
||||||
|
event: string;
|
||||||
|
branch: string;
|
||||||
|
message: string;
|
||||||
|
commit: string;
|
||||||
|
author: string;
|
||||||
|
created: number;
|
||||||
|
started?: number;
|
||||||
|
finished?: number;
|
||||||
|
steps?: PipelineStep[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DeployStatusResponse {
|
||||||
|
running: {
|
||||||
|
sha: string;
|
||||||
|
sha_full: string;
|
||||||
|
build_time: string;
|
||||||
|
image_tag: string;
|
||||||
|
};
|
||||||
|
latest: {
|
||||||
|
sha: string;
|
||||||
|
sha_full: string;
|
||||||
|
message: string;
|
||||||
|
author: string;
|
||||||
|
timestamp: string;
|
||||||
|
} | null;
|
||||||
|
is_latest: boolean;
|
||||||
|
commits_behind: number;
|
||||||
|
pipeline: PipelineInfo | null;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch latest commit from Gitea
|
||||||
|
*/
|
||||||
|
async function getLatestCommit(): Promise<{
|
||||||
|
sha: string;
|
||||||
|
message: string;
|
||||||
|
author: string;
|
||||||
|
timestamp: string;
|
||||||
|
} | null> {
|
||||||
|
if (!GITEA_TOKEN) {
|
||||||
|
console.warn('[DeployStatus] GITEA_TOKEN not set, skipping latest commit fetch');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.get(
|
||||||
|
`${GITEA_SERVER}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/commits?limit=1`,
|
||||||
|
{
|
||||||
|
headers: { Authorization: `token ${GITEA_TOKEN}` },
|
||||||
|
timeout: 5000,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.data && response.data.length > 0) {
|
||||||
|
const commit = response.data[0];
|
||||||
|
return {
|
||||||
|
sha: commit.sha,
|
||||||
|
message: commit.commit?.message?.split('\n')[0] || '',
|
||||||
|
author: commit.commit?.author?.name || commit.author?.login || 'unknown',
|
||||||
|
timestamp: commit.commit?.author?.date || commit.created,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[DeployStatus] Failed to fetch latest commit:', error.message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch latest pipeline from Woodpecker
|
||||||
|
*/
|
||||||
|
async function getLatestPipeline(): Promise<PipelineInfo | null> {
|
||||||
|
if (!WOODPECKER_TOKEN) {
|
||||||
|
console.warn('[DeployStatus] WOODPECKER_TOKEN not set, skipping pipeline fetch');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get latest pipeline
|
||||||
|
const listResponse = await axios.get(
|
||||||
|
`${WOODPECKER_SERVER}/api/repos/${REPO_OWNER}/${REPO_NAME}/pipelines?page=1&per_page=1`,
|
||||||
|
{
|
||||||
|
headers: { Authorization: `Bearer ${WOODPECKER_TOKEN}` },
|
||||||
|
timeout: 5000,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!listResponse.data || listResponse.data.length === 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pipeline = listResponse.data[0];
|
||||||
|
|
||||||
|
// Get pipeline steps
|
||||||
|
let steps: PipelineStep[] = [];
|
||||||
|
try {
|
||||||
|
const stepsResponse = await axios.get(
|
||||||
|
`${WOODPECKER_SERVER}/api/repos/${REPO_OWNER}/${REPO_NAME}/pipelines/${pipeline.number}`,
|
||||||
|
{
|
||||||
|
headers: { Authorization: `Bearer ${WOODPECKER_TOKEN}` },
|
||||||
|
timeout: 5000,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (stepsResponse.data?.workflows) {
|
||||||
|
for (const workflow of stepsResponse.data.workflows) {
|
||||||
|
if (workflow.children) {
|
||||||
|
for (const step of workflow.children) {
|
||||||
|
steps.push({
|
||||||
|
name: step.name,
|
||||||
|
state: step.state,
|
||||||
|
started: step.start_time,
|
||||||
|
stopped: step.end_time,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (stepError) {
|
||||||
|
// Steps fetch failed, continue without them
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
number: pipeline.number,
|
||||||
|
status: pipeline.status,
|
||||||
|
event: pipeline.event,
|
||||||
|
branch: pipeline.branch,
|
||||||
|
message: pipeline.message?.split('\n')[0] || '',
|
||||||
|
commit: pipeline.commit?.slice(0, 8) || '',
|
||||||
|
author: pipeline.author || 'unknown',
|
||||||
|
created: pipeline.created_at,
|
||||||
|
started: pipeline.started_at,
|
||||||
|
finished: pipeline.finished_at,
|
||||||
|
steps,
|
||||||
|
};
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[DeployStatus] Failed to fetch pipeline:', error.message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Count commits between two SHAs
|
||||||
|
*/
|
||||||
|
async function countCommitsBetween(fromSha: string, toSha: string): Promise<number> {
|
||||||
|
if (!GITEA_TOKEN || !fromSha || !toSha) return 0;
|
||||||
|
if (fromSha === toSha) return 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.get(
|
||||||
|
`${GITEA_SERVER}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/commits?sha=${toSha}&limit=50`,
|
||||||
|
{
|
||||||
|
headers: { Authorization: `token ${GITEA_TOKEN}` },
|
||||||
|
timeout: 5000,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.data) {
|
||||||
|
const commits = response.data;
|
||||||
|
for (let i = 0; i < commits.length; i++) {
|
||||||
|
if (commits[i].sha.startsWith(fromSha)) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If not found in first 50, assume more than 50 behind
|
||||||
|
return commits.length;
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[DeployStatus] Failed to count commits:', error.message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/deploy-status
|
||||||
|
* Returns deployment status with version comparison and CI info
|
||||||
|
*/
|
||||||
|
router.get('/', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// Get running version from env vars (set during Docker build)
|
||||||
|
const runningSha = process.env.APP_GIT_SHA || 'unknown';
|
||||||
|
const running = {
|
||||||
|
sha: runningSha.slice(0, 8),
|
||||||
|
sha_full: runningSha,
|
||||||
|
build_time: process.env.APP_BUILD_TIME || new Date().toISOString(),
|
||||||
|
image_tag: process.env.CONTAINER_IMAGE_TAG?.slice(0, 8) || 'local',
|
||||||
|
};
|
||||||
|
|
||||||
|
// Fetch latest commit and pipeline in parallel
|
||||||
|
const [latestCommit, pipeline] = await Promise.all([
|
||||||
|
getLatestCommit(),
|
||||||
|
getLatestPipeline(),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Build latest info
|
||||||
|
const latest = latestCommit ? {
|
||||||
|
sha: latestCommit.sha.slice(0, 8),
|
||||||
|
sha_full: latestCommit.sha,
|
||||||
|
message: latestCommit.message,
|
||||||
|
author: latestCommit.author,
|
||||||
|
timestamp: latestCommit.timestamp,
|
||||||
|
} : null;
|
||||||
|
|
||||||
|
// Determine if running latest
|
||||||
|
const isLatest = latest
|
||||||
|
? runningSha.startsWith(latest.sha_full.slice(0, 8)) ||
|
||||||
|
latest.sha_full.startsWith(runningSha.slice(0, 8))
|
||||||
|
: true;
|
||||||
|
|
||||||
|
// Count commits behind
|
||||||
|
const commitsBehind = isLatest
|
||||||
|
? 0
|
||||||
|
: await countCommitsBetween(runningSha, latest?.sha_full || '');
|
||||||
|
|
||||||
|
const response: DeployStatusResponse = {
|
||||||
|
running,
|
||||||
|
latest,
|
||||||
|
is_latest: isLatest,
|
||||||
|
commits_behind: commitsBehind,
|
||||||
|
pipeline,
|
||||||
|
};
|
||||||
|
|
||||||
|
res.json(response);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[DeployStatus] Error:', error);
|
||||||
|
res.status(500).json({
|
||||||
|
error: error.message,
|
||||||
|
running: {
|
||||||
|
sha: process.env.APP_GIT_SHA?.slice(0, 8) || 'unknown',
|
||||||
|
sha_full: process.env.APP_GIT_SHA || 'unknown',
|
||||||
|
build_time: process.env.APP_BUILD_TIME || 'unknown',
|
||||||
|
image_tag: process.env.CONTAINER_IMAGE_TAG?.slice(0, 8) || 'local',
|
||||||
|
},
|
||||||
|
latest: null,
|
||||||
|
is_latest: true,
|
||||||
|
commits_behind: 0,
|
||||||
|
pipeline: null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -8,32 +8,51 @@ router.use(authMiddleware);
|
|||||||
// Valid menu_type values
|
// Valid menu_type values
|
||||||
const VALID_MENU_TYPES = ['dutchie', 'treez', 'jane', 'weedmaps', 'leafly', 'meadow', 'blaze', 'flowhub', 'dispense', 'cova', 'other', 'unknown'];
|
const VALID_MENU_TYPES = ['dutchie', 'treez', 'jane', 'weedmaps', 'leafly', 'meadow', 'blaze', 'flowhub', 'dispense', 'cova', 'other', 'unknown'];
|
||||||
|
|
||||||
// Get all dispensaries
|
// Get all dispensaries (with pagination)
|
||||||
router.get('/', async (req, res) => {
|
router.get('/', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { menu_type, city, state } = req.query;
|
const { menu_type, city, state, crawl_enabled, dutchie_verified, status, limit, offset, search } = req.query;
|
||||||
|
const pageLimit = Math.min(parseInt(limit as string) || 50, 500);
|
||||||
|
const pageOffset = parseInt(offset as string) || 0;
|
||||||
|
|
||||||
let query = `
|
let query = `
|
||||||
SELECT
|
SELECT
|
||||||
id,
|
id,
|
||||||
name,
|
name,
|
||||||
company_name,
|
|
||||||
slug,
|
slug,
|
||||||
address,
|
address1,
|
||||||
|
address2,
|
||||||
city,
|
city,
|
||||||
state,
|
state,
|
||||||
zip,
|
zipcode,
|
||||||
phone,
|
phone,
|
||||||
website,
|
website,
|
||||||
|
email,
|
||||||
dba_name,
|
dba_name,
|
||||||
latitude,
|
latitude,
|
||||||
longitude,
|
longitude,
|
||||||
|
timezone,
|
||||||
menu_url,
|
menu_url,
|
||||||
menu_type,
|
menu_type,
|
||||||
platform,
|
platform,
|
||||||
platform_dispensary_id,
|
platform_dispensary_id,
|
||||||
|
c_name,
|
||||||
|
chain_slug,
|
||||||
|
enterprise_id,
|
||||||
|
description,
|
||||||
|
logo_image,
|
||||||
|
banner_image,
|
||||||
|
offer_pickup,
|
||||||
|
offer_delivery,
|
||||||
|
offer_curbside_pickup,
|
||||||
|
is_medical,
|
||||||
|
is_recreational,
|
||||||
|
status,
|
||||||
|
country,
|
||||||
product_count,
|
product_count,
|
||||||
last_crawl_at,
|
last_crawl_at,
|
||||||
|
crawl_enabled,
|
||||||
|
dutchie_verified,
|
||||||
created_at,
|
created_at,
|
||||||
updated_at
|
updated_at
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
@@ -48,10 +67,10 @@ router.get('/', async (req, res) => {
|
|||||||
params.push(menu_type);
|
params.push(menu_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter by city if provided
|
// Filter by city if provided (supports partial match)
|
||||||
if (city) {
|
if (city) {
|
||||||
conditions.push(`city ILIKE $${params.length + 1}`);
|
conditions.push(`city ILIKE $${params.length + 1}`);
|
||||||
params.push(city);
|
params.push(`%${city}%`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter by state if provided
|
// Filter by state if provided
|
||||||
@@ -60,15 +79,61 @@ router.get('/', async (req, res) => {
|
|||||||
params.push(state);
|
params.push(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (conditions.length > 0) {
|
// Filter by crawl_enabled - defaults to showing only enabled
|
||||||
query += ` WHERE ${conditions.join(' AND ')}`;
|
if (crawl_enabled === 'false' || crawl_enabled === '0') {
|
||||||
|
// Explicitly show disabled only
|
||||||
|
conditions.push(`(crawl_enabled = false OR crawl_enabled IS NULL)`);
|
||||||
|
} else if (crawl_enabled === 'all') {
|
||||||
|
// Show all (no filter)
|
||||||
|
} else {
|
||||||
|
// Default: show only enabled
|
||||||
|
conditions.push(`crawl_enabled = true`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filter by dutchie_verified if provided
|
||||||
|
if (dutchie_verified !== undefined) {
|
||||||
|
const verified = dutchie_verified === 'true' || dutchie_verified === '1';
|
||||||
|
if (verified) {
|
||||||
|
conditions.push(`dutchie_verified = true`);
|
||||||
|
} else {
|
||||||
|
conditions.push(`(dutchie_verified = false OR dutchie_verified IS NULL)`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter by status (e.g., 'dropped', 'open', 'closed')
|
||||||
|
if (status) {
|
||||||
|
conditions.push(`status = $${params.length + 1}`);
|
||||||
|
params.push(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search filter (name, dba_name, city, company_name)
|
||||||
|
if (search) {
|
||||||
|
conditions.push(`(name ILIKE $${params.length + 1} OR dba_name ILIKE $${params.length + 1} OR city ILIKE $${params.length + 1})`);
|
||||||
|
params.push(`%${search}%`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build WHERE clause
|
||||||
|
const whereClause = conditions.length > 0 ? ` WHERE ${conditions.join(' AND ')}` : '';
|
||||||
|
|
||||||
|
// Get total count first
|
||||||
|
const countResult = await pool.query(`SELECT COUNT(*) FROM dispensaries${whereClause}`, params);
|
||||||
|
const total = parseInt(countResult.rows[0].count);
|
||||||
|
|
||||||
|
// Add pagination
|
||||||
|
query += whereClause;
|
||||||
query += ` ORDER BY name`;
|
query += ` ORDER BY name`;
|
||||||
|
query += ` LIMIT $${params.length + 1} OFFSET $${params.length + 2}`;
|
||||||
|
params.push(pageLimit, pageOffset);
|
||||||
|
|
||||||
const result = await pool.query(query, params);
|
const result = await pool.query(query, params);
|
||||||
|
|
||||||
res.json({ dispensaries: result.rows });
|
res.json({
|
||||||
|
dispensaries: result.rows,
|
||||||
|
total,
|
||||||
|
limit: pageLimit,
|
||||||
|
offset: pageOffset,
|
||||||
|
hasMore: pageOffset + result.rows.length < total
|
||||||
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching dispensaries:', error);
|
console.error('Error fetching dispensaries:', error);
|
||||||
res.status(500).json({ error: 'Failed to fetch dispensaries' });
|
res.status(500).json({ error: 'Failed to fetch dispensaries' });
|
||||||
@@ -91,6 +156,75 @@ router.get('/stats/menu-types', async (req, res) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Get crawl status stats
|
||||||
|
router.get('/stats/crawl-status', async (req, res) => {
|
||||||
|
try {
|
||||||
|
const { state, city } = req.query;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE crawl_enabled = true) as enabled_count,
|
||||||
|
COUNT(*) FILTER (WHERE crawl_enabled = false OR crawl_enabled IS NULL) as disabled_count,
|
||||||
|
COUNT(*) FILTER (WHERE dutchie_verified = true) as verified_count,
|
||||||
|
COUNT(*) FILTER (WHERE dutchie_verified = false OR dutchie_verified IS NULL) as unverified_count,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'dropped') as dropped_count,
|
||||||
|
COUNT(*) as total_count
|
||||||
|
FROM dispensaries
|
||||||
|
`;
|
||||||
|
|
||||||
|
const params: any[] = [];
|
||||||
|
const conditions: string[] = [];
|
||||||
|
|
||||||
|
if (state) {
|
||||||
|
conditions.push(`state = $${params.length + 1}`);
|
||||||
|
params.push(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (city) {
|
||||||
|
conditions.push(`city ILIKE $${params.length + 1}`);
|
||||||
|
params.push(`%${city}%`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (conditions.length > 0) {
|
||||||
|
query += ` WHERE ${conditions.join(' AND ')}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
res.json(result.rows[0]);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching crawl status stats:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch crawl status stats' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get dropped stores count (for dashboard alert)
|
||||||
|
router.get('/stats/dropped', async (req, res) => {
|
||||||
|
try {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as dropped_count,
|
||||||
|
json_agg(json_build_object(
|
||||||
|
'id', id,
|
||||||
|
'name', name,
|
||||||
|
'city', city,
|
||||||
|
'state', state,
|
||||||
|
'dropped_at', updated_at
|
||||||
|
) ORDER BY updated_at DESC) FILTER (WHERE status = 'dropped') as dropped_stores
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE status = 'dropped'
|
||||||
|
`);
|
||||||
|
|
||||||
|
const row = result.rows[0];
|
||||||
|
res.json({
|
||||||
|
dropped_count: parseInt(row.dropped_count) || 0,
|
||||||
|
dropped_stores: row.dropped_stores || []
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching dropped stores:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch dropped stores' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Get single dispensary by slug or ID
|
// Get single dispensary by slug or ID
|
||||||
router.get('/:slugOrId', async (req, res) => {
|
router.get('/:slugOrId', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
@@ -101,21 +235,36 @@ router.get('/:slugOrId', async (req, res) => {
|
|||||||
SELECT
|
SELECT
|
||||||
id,
|
id,
|
||||||
name,
|
name,
|
||||||
company_name,
|
|
||||||
slug,
|
slug,
|
||||||
address,
|
address1,
|
||||||
|
address2,
|
||||||
city,
|
city,
|
||||||
state,
|
state,
|
||||||
zip,
|
zipcode,
|
||||||
phone,
|
phone,
|
||||||
website,
|
website,
|
||||||
|
email,
|
||||||
dba_name,
|
dba_name,
|
||||||
latitude,
|
latitude,
|
||||||
longitude,
|
longitude,
|
||||||
|
timezone,
|
||||||
menu_url,
|
menu_url,
|
||||||
menu_type,
|
menu_type,
|
||||||
platform,
|
platform,
|
||||||
platform_dispensary_id,
|
platform_dispensary_id,
|
||||||
|
c_name,
|
||||||
|
chain_slug,
|
||||||
|
enterprise_id,
|
||||||
|
description,
|
||||||
|
logo_image,
|
||||||
|
banner_image,
|
||||||
|
offer_pickup,
|
||||||
|
offer_delivery,
|
||||||
|
offer_curbside_pickup,
|
||||||
|
is_medical,
|
||||||
|
is_recreational,
|
||||||
|
status,
|
||||||
|
country,
|
||||||
product_count,
|
product_count,
|
||||||
last_crawl_at,
|
last_crawl_at,
|
||||||
raw_metadata,
|
raw_metadata,
|
||||||
@@ -143,19 +292,34 @@ router.put('/:id', async (req, res) => {
|
|||||||
const {
|
const {
|
||||||
name,
|
name,
|
||||||
dba_name,
|
dba_name,
|
||||||
company_name,
|
|
||||||
website,
|
website,
|
||||||
phone,
|
phone,
|
||||||
address,
|
email,
|
||||||
|
address1,
|
||||||
|
address2,
|
||||||
city,
|
city,
|
||||||
state,
|
state,
|
||||||
zip,
|
zipcode,
|
||||||
latitude,
|
latitude,
|
||||||
longitude,
|
longitude,
|
||||||
|
timezone,
|
||||||
menu_url,
|
menu_url,
|
||||||
menu_type,
|
menu_type,
|
||||||
platform,
|
platform,
|
||||||
platform_dispensary_id,
|
platform_dispensary_id,
|
||||||
|
c_name,
|
||||||
|
chain_slug,
|
||||||
|
enterprise_id,
|
||||||
|
description,
|
||||||
|
logo_image,
|
||||||
|
banner_image,
|
||||||
|
offer_pickup,
|
||||||
|
offer_delivery,
|
||||||
|
offer_curbside_pickup,
|
||||||
|
is_medical,
|
||||||
|
is_recreational,
|
||||||
|
status,
|
||||||
|
country,
|
||||||
slug,
|
slug,
|
||||||
} = req.body;
|
} = req.body;
|
||||||
|
|
||||||
@@ -171,39 +335,69 @@ router.put('/:id', async (req, res) => {
|
|||||||
SET
|
SET
|
||||||
name = COALESCE($1, name),
|
name = COALESCE($1, name),
|
||||||
dba_name = COALESCE($2, dba_name),
|
dba_name = COALESCE($2, dba_name),
|
||||||
company_name = COALESCE($3, company_name),
|
website = COALESCE($3, website),
|
||||||
website = COALESCE($4, website),
|
phone = COALESCE($4, phone),
|
||||||
phone = COALESCE($5, phone),
|
email = COALESCE($5, email),
|
||||||
address = COALESCE($6, address),
|
address1 = COALESCE($6, address1),
|
||||||
city = COALESCE($7, city),
|
address2 = COALESCE($7, address2),
|
||||||
state = COALESCE($8, state),
|
city = COALESCE($8, city),
|
||||||
zip = COALESCE($9, zip),
|
state = COALESCE($9, state),
|
||||||
latitude = COALESCE($10, latitude),
|
zipcode = COALESCE($10, zipcode),
|
||||||
longitude = COALESCE($11, longitude),
|
latitude = COALESCE($11, latitude),
|
||||||
menu_url = COALESCE($12, menu_url),
|
longitude = COALESCE($12, longitude),
|
||||||
menu_type = COALESCE($13, menu_type),
|
timezone = COALESCE($13, timezone),
|
||||||
platform = COALESCE($14, platform),
|
menu_url = COALESCE($14, menu_url),
|
||||||
platform_dispensary_id = COALESCE($15, platform_dispensary_id),
|
menu_type = COALESCE($15, menu_type),
|
||||||
slug = COALESCE($16, slug),
|
platform = COALESCE($16, platform),
|
||||||
|
platform_dispensary_id = COALESCE($17, platform_dispensary_id),
|
||||||
|
c_name = COALESCE($18, c_name),
|
||||||
|
chain_slug = COALESCE($19, chain_slug),
|
||||||
|
enterprise_id = COALESCE($20, enterprise_id),
|
||||||
|
description = COALESCE($21, description),
|
||||||
|
logo_image = COALESCE($22, logo_image),
|
||||||
|
banner_image = COALESCE($23, banner_image),
|
||||||
|
offer_pickup = COALESCE($24, offer_pickup),
|
||||||
|
offer_delivery = COALESCE($25, offer_delivery),
|
||||||
|
offer_curbside_pickup = COALESCE($26, offer_curbside_pickup),
|
||||||
|
is_medical = COALESCE($27, is_medical),
|
||||||
|
is_recreational = COALESCE($28, is_recreational),
|
||||||
|
status = COALESCE($29, status),
|
||||||
|
country = COALESCE($30, country),
|
||||||
|
slug = COALESCE($31, slug),
|
||||||
updated_at = CURRENT_TIMESTAMP
|
updated_at = CURRENT_TIMESTAMP
|
||||||
WHERE id = $17
|
WHERE id = $32
|
||||||
RETURNING *
|
RETURNING *
|
||||||
`, [
|
`, [
|
||||||
name,
|
name,
|
||||||
dba_name,
|
dba_name,
|
||||||
company_name,
|
|
||||||
website,
|
website,
|
||||||
phone,
|
phone,
|
||||||
address,
|
email,
|
||||||
|
address1,
|
||||||
|
address2,
|
||||||
city,
|
city,
|
||||||
state,
|
state,
|
||||||
zip,
|
zipcode,
|
||||||
latitude,
|
latitude,
|
||||||
longitude,
|
longitude,
|
||||||
|
timezone,
|
||||||
menu_url,
|
menu_url,
|
||||||
menu_type,
|
menu_type,
|
||||||
platform,
|
platform,
|
||||||
platform_dispensary_id,
|
platform_dispensary_id,
|
||||||
|
c_name,
|
||||||
|
chain_slug,
|
||||||
|
enterprise_id,
|
||||||
|
description,
|
||||||
|
logo_image,
|
||||||
|
banner_image,
|
||||||
|
offer_pickup,
|
||||||
|
offer_delivery,
|
||||||
|
offer_curbside_pickup,
|
||||||
|
is_medical,
|
||||||
|
is_recreational,
|
||||||
|
status,
|
||||||
|
country,
|
||||||
slug,
|
slug,
|
||||||
id
|
id
|
||||||
]);
|
]);
|
||||||
@@ -236,40 +430,42 @@ router.get('/:slug/products', async (req, res) => {
|
|||||||
|
|
||||||
const dispensaryId = dispensaryResult.rows[0].id;
|
const dispensaryId = dispensaryResult.rows[0].id;
|
||||||
|
|
||||||
// Build query for products
|
// Build query for products using canonical store_products table
|
||||||
let query = `
|
let query = `
|
||||||
SELECT
|
SELECT
|
||||||
p.id,
|
sp.id,
|
||||||
p.name,
|
sp.name_raw as name,
|
||||||
p.brand,
|
sp.brand_name_raw as brand,
|
||||||
p.variant,
|
sp.description,
|
||||||
p.slug,
|
COALESCE(sp.stock_quantity, sp.total_quantity_available, 0) as quantity,
|
||||||
p.description,
|
sp.price_rec as regular_price,
|
||||||
p.regular_price,
|
CASE WHEN sp.price_rec_special IS NOT NULL AND sp.price_rec_special > 0
|
||||||
p.sale_price,
|
THEN sp.price_rec_special
|
||||||
p.thc_percentage,
|
ELSE NULL END as sale_price,
|
||||||
p.cbd_percentage,
|
sp.thc_percent as thc_percentage,
|
||||||
p.strain_type,
|
sp.cbd_percent as cbd_percentage,
|
||||||
p.terpenes,
|
sp.strain_type,
|
||||||
p.effects,
|
sp.effects,
|
||||||
p.flavors,
|
sp.primary_image_url as image_url,
|
||||||
p.image_url,
|
sp.stock_status,
|
||||||
p.dutchie_url,
|
sp.stock_status = 'in_stock' as in_stock,
|
||||||
p.in_stock,
|
sp.is_on_special as on_special,
|
||||||
p.created_at,
|
sp.category_raw as category,
|
||||||
p.updated_at
|
sp.subcategory_raw as subcategory,
|
||||||
FROM products p
|
sp.created_at,
|
||||||
WHERE p.dispensary_id = $1
|
sp.updated_at
|
||||||
|
FROM store_products sp
|
||||||
|
WHERE sp.dispensary_id = $1
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const params: any[] = [dispensaryId];
|
const params: any[] = [dispensaryId];
|
||||||
|
|
||||||
if (category) {
|
if (category) {
|
||||||
query += ` AND p.category = $2`;
|
query += ` AND sp.category_raw = $2`;
|
||||||
params.push(category);
|
params.push(category);
|
||||||
}
|
}
|
||||||
|
|
||||||
query += ` ORDER BY p.created_at DESC`;
|
query += ` ORDER BY sp.name_raw ASC`;
|
||||||
|
|
||||||
const result = await pool.query(query, params);
|
const result = await pool.query(query, params);
|
||||||
|
|
||||||
@@ -297,23 +493,23 @@ router.get('/:slug/brands', async (req, res) => {
|
|||||||
|
|
||||||
const dispensaryId = dispensaryResult.rows[0].id;
|
const dispensaryId = dispensaryResult.rows[0].id;
|
||||||
|
|
||||||
// Build query with optional search filter
|
// Build query with optional search filter using canonical tables
|
||||||
let query = `
|
let query = `
|
||||||
SELECT DISTINCT
|
SELECT DISTINCT
|
||||||
brand,
|
brand_name as brand,
|
||||||
COUNT(*) as product_count
|
COUNT(*) as product_count
|
||||||
FROM products
|
FROM v_products
|
||||||
WHERE dispensary_id = $1 AND brand IS NOT NULL
|
WHERE dispensary_id = $1 AND brand_name IS NOT NULL
|
||||||
`;
|
`;
|
||||||
const params: any[] = [dispensaryId];
|
const params: any[] = [dispensaryId];
|
||||||
|
|
||||||
// Add search filter if provided
|
// Add search filter if provided
|
||||||
if (search) {
|
if (search) {
|
||||||
query += ` AND brand ILIKE $2`;
|
query += ` AND brand_name ILIKE $2`;
|
||||||
params.push(`%${search}%`);
|
params.push(`%${search}%`);
|
||||||
}
|
}
|
||||||
|
|
||||||
query += ` GROUP BY brand ORDER BY product_count DESC, brand ASC`;
|
query += ` GROUP BY brand_name ORDER BY product_count DESC, brand_name ASC`;
|
||||||
|
|
||||||
const result = await pool.query(query, params);
|
const result = await pool.query(query, params);
|
||||||
|
|
||||||
@@ -341,44 +537,48 @@ router.get('/:slug/specials', async (req, res) => {
|
|||||||
|
|
||||||
const dispensaryId = dispensaryResult.rows[0].id;
|
const dispensaryId = dispensaryResult.rows[0].id;
|
||||||
|
|
||||||
// Build query to get products with discounts
|
// Build query to get products with specials/discounts using canonical tables
|
||||||
let query = `
|
let query = `
|
||||||
SELECT
|
SELECT
|
||||||
p.id,
|
p.id,
|
||||||
p.name,
|
p.name,
|
||||||
p.brand,
|
p.brand_name as brand,
|
||||||
p.variant,
|
p.subcategory as variant,
|
||||||
p.slug,
|
sp.description,
|
||||||
p.description,
|
COALESCE(snap.rec_min_price_cents, 0)::numeric / 100.0 as regular_price,
|
||||||
p.regular_price,
|
snap.rec_min_special_price_cents::numeric / 100.0 as sale_price,
|
||||||
p.sale_price,
|
snap.discount_percent,
|
||||||
p.discount_type,
|
p.thc as thc_percentage,
|
||||||
p.discount_value,
|
p.cbd as cbd_percentage,
|
||||||
p.thc_percentage,
|
|
||||||
p.cbd_percentage,
|
|
||||||
p.strain_type,
|
p.strain_type,
|
||||||
p.terpenes,
|
sp.effects,
|
||||||
p.effects,
|
p.primary_image_url as image_url,
|
||||||
p.flavors,
|
p.stock_status = 'in_stock' as in_stock,
|
||||||
p.image_url,
|
p.stock_status,
|
||||||
p.dutchie_url,
|
true as on_special,
|
||||||
p.in_stock,
|
|
||||||
p.created_at,
|
p.created_at,
|
||||||
p.updated_at
|
p.updated_at
|
||||||
FROM products p
|
FROM v_products p
|
||||||
|
JOIN store_products sp ON sp.id = p.id
|
||||||
|
INNER JOIN LATERAL (
|
||||||
|
SELECT rec_min_price_cents, rec_min_special_price_cents, discount_percent, special
|
||||||
|
FROM v_product_snapshots vps
|
||||||
|
WHERE vps.store_product_id = p.id
|
||||||
|
AND (vps.special = true OR vps.rec_min_special_price_cents > 0)
|
||||||
|
ORDER BY vps.crawled_at DESC
|
||||||
|
LIMIT 1
|
||||||
|
) snap ON true
|
||||||
WHERE p.dispensary_id = $1
|
WHERE p.dispensary_id = $1
|
||||||
AND p.discount_type IS NOT NULL
|
|
||||||
AND p.discount_value IS NOT NULL
|
|
||||||
`;
|
`;
|
||||||
const params: any[] = [dispensaryId];
|
const params: any[] = [dispensaryId];
|
||||||
|
|
||||||
// Add search filter if provided
|
// Add search filter if provided
|
||||||
if (search) {
|
if (search) {
|
||||||
query += ` AND (p.name ILIKE $2 OR p.brand ILIKE $2 OR p.description ILIKE $2)`;
|
query += ` AND (p.name ILIKE $2 OR p.brand_name ILIKE $2 OR sp.description ILIKE $2)`;
|
||||||
params.push(`%${search}%`);
|
params.push(`%${search}%`);
|
||||||
}
|
}
|
||||||
|
|
||||||
query += ` ORDER BY p.created_at DESC`;
|
query += ` ORDER BY p.updated_at DESC`;
|
||||||
|
|
||||||
const result = await pool.query(query, params);
|
const result = await pool.query(query, params);
|
||||||
|
|
||||||
|
|||||||
@@ -22,11 +22,17 @@ interface ProductClickEventPayload {
|
|||||||
store_id?: string;
|
store_id?: string;
|
||||||
brand_id?: string;
|
brand_id?: string;
|
||||||
campaign_id?: string;
|
campaign_id?: string;
|
||||||
|
dispensary_name?: string;
|
||||||
action: 'view' | 'open_store' | 'open_product' | 'compare' | 'other';
|
action: 'view' | 'open_store' | 'open_product' | 'compare' | 'other';
|
||||||
source: string;
|
source: string;
|
||||||
page_type?: string; // Page where event occurred (e.g., StoreDetailPage, BrandsIntelligence)
|
page_type?: string; // Page where event occurred (e.g., StoreDetailPage, BrandsIntelligence)
|
||||||
url_path?: string; // URL path for debugging
|
url_path?: string; // URL path for debugging
|
||||||
occurred_at?: string;
|
occurred_at?: string;
|
||||||
|
// Visitor location (from frontend IP geolocation)
|
||||||
|
visitor_city?: string;
|
||||||
|
visitor_state?: string;
|
||||||
|
visitor_lat?: number;
|
||||||
|
visitor_lng?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -77,13 +83,14 @@ router.post('/product-click', optionalAuthMiddleware, async (req: Request, res:
|
|||||||
// Insert the event with enhanced fields
|
// Insert the event with enhanced fields
|
||||||
await pool.query(
|
await pool.query(
|
||||||
`INSERT INTO product_click_events
|
`INSERT INTO product_click_events
|
||||||
(product_id, store_id, brand_id, campaign_id, action, source, user_id, ip_address, user_agent, occurred_at, event_type, page_type, url_path, device_type)
|
(product_id, store_id, brand_id, campaign_id, dispensary_name, action, source, user_id, ip_address, user_agent, occurred_at, event_type, page_type, url_path, device_type, visitor_city, visitor_state, visitor_lat, visitor_lng)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)`,
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)`,
|
||||||
[
|
[
|
||||||
payload.product_id,
|
payload.product_id,
|
||||||
payload.store_id || null,
|
payload.store_id || null,
|
||||||
payload.brand_id || null,
|
payload.brand_id || null,
|
||||||
payload.campaign_id || null,
|
payload.campaign_id || null,
|
||||||
|
payload.dispensary_name || null,
|
||||||
payload.action,
|
payload.action,
|
||||||
payload.source,
|
payload.source,
|
||||||
userId,
|
userId,
|
||||||
@@ -93,7 +100,11 @@ router.post('/product-click', optionalAuthMiddleware, async (req: Request, res:
|
|||||||
'product_click', // event_type
|
'product_click', // event_type
|
||||||
payload.page_type || null,
|
payload.page_type || null,
|
||||||
payload.url_path || null,
|
payload.url_path || null,
|
||||||
deviceType
|
deviceType,
|
||||||
|
payload.visitor_city || null,
|
||||||
|
payload.visitor_state || null,
|
||||||
|
payload.visitor_lat || null,
|
||||||
|
payload.visitor_lng || null
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -45,6 +45,8 @@ interface ApiHealth extends HealthStatus {
|
|||||||
uptime: number;
|
uptime: number;
|
||||||
timestamp: string;
|
timestamp: string;
|
||||||
version: string;
|
version: string;
|
||||||
|
build_sha: string | null;
|
||||||
|
build_time: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface DbHealth extends HealthStatus {
|
interface DbHealth extends HealthStatus {
|
||||||
@@ -113,6 +115,8 @@ async function getApiHealth(): Promise<ApiHealth> {
|
|||||||
uptime: Math.floor((Date.now() - serverStartTime) / 1000),
|
uptime: Math.floor((Date.now() - serverStartTime) / 1000),
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
version: packageVersion,
|
version: packageVersion,
|
||||||
|
build_sha: process.env.APP_GIT_SHA && process.env.APP_GIT_SHA !== 'unknown' ? process.env.APP_GIT_SHA : null,
|
||||||
|
build_time: process.env.APP_BUILD_TIME && process.env.APP_BUILD_TIME !== 'unknown' ? process.env.APP_BUILD_TIME : null,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,14 +142,16 @@ async function getDbHealth(): Promise<DbHealth> {
|
|||||||
|
|
||||||
async function getRedisHealth(): Promise<RedisHealth> {
|
async function getRedisHealth(): Promise<RedisHealth> {
|
||||||
const start = Date.now();
|
const start = Date.now();
|
||||||
|
const isLocal = process.env.NODE_ENV === 'development' || process.env.NODE_ENV === 'local' || !process.env.NODE_ENV;
|
||||||
|
|
||||||
// Check if Redis is configured
|
// Check if Redis is configured
|
||||||
if (!process.env.REDIS_URL && !process.env.REDIS_HOST) {
|
if (!process.env.REDIS_URL && !process.env.REDIS_HOST) {
|
||||||
|
// Redis is optional in local dev, required in prod/staging
|
||||||
return {
|
return {
|
||||||
status: 'ok', // Redis is optional
|
status: isLocal ? 'ok' : 'error',
|
||||||
connected: false,
|
connected: false,
|
||||||
latency_ms: 0,
|
latency_ms: 0,
|
||||||
error: 'Redis not configured',
|
error: isLocal ? 'Redis not configured (optional in local)' : 'Redis not configured (required in production)',
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
214
backend/src/routes/image-proxy.ts
Normal file
214
backend/src/routes/image-proxy.ts
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
/**
|
||||||
|
* Image Proxy Route
|
||||||
|
*
|
||||||
|
* On-demand image resizing service. Serves images with URL-based transforms.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* /img/<path>?w=200&h=200&q=80&fit=cover
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* w - width (pixels)
|
||||||
|
* h - height (pixels)
|
||||||
|
* q - quality (1-100, default 80)
|
||||||
|
* fit - resize fit: cover, contain, fill, inside, outside (default: inside)
|
||||||
|
* blur - blur sigma (0.3-1000)
|
||||||
|
* gray - grayscale (1 = enabled)
|
||||||
|
* format - output format: webp, jpeg, png, avif (default: webp)
|
||||||
|
*
|
||||||
|
* Examples:
|
||||||
|
* /img/products/az/store/brand/product/image.webp?w=200
|
||||||
|
* /img/products/az/store/brand/product/image.webp?w=600&h=400&fit=cover
|
||||||
|
* /img/products/az/store/brand/product/image.webp?w=100&blur=5&gray=1
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import * as fs from 'fs/promises';
|
||||||
|
import * as path from 'path';
|
||||||
|
// @ts-ignore
|
||||||
|
const sharp = require('sharp');
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
// Base path for images
|
||||||
|
function getImagesBasePath(): string {
|
||||||
|
if (process.env.IMAGES_PATH) {
|
||||||
|
return process.env.IMAGES_PATH;
|
||||||
|
}
|
||||||
|
if (process.env.STORAGE_BASE_PATH) {
|
||||||
|
return path.join(process.env.STORAGE_BASE_PATH, 'images');
|
||||||
|
}
|
||||||
|
return './storage/images';
|
||||||
|
}
|
||||||
|
|
||||||
|
const IMAGES_BASE_PATH = getImagesBasePath();
|
||||||
|
|
||||||
|
// Allowed fit modes
|
||||||
|
const ALLOWED_FITS = ['cover', 'contain', 'fill', 'inside', 'outside'] as const;
|
||||||
|
type FitMode = typeof ALLOWED_FITS[number];
|
||||||
|
|
||||||
|
// Allowed formats
|
||||||
|
const ALLOWED_FORMATS = ['webp', 'jpeg', 'jpg', 'png', 'avif'] as const;
|
||||||
|
type OutputFormat = typeof ALLOWED_FORMATS[number];
|
||||||
|
|
||||||
|
// Cache headers (1 year for immutable content-addressed images)
|
||||||
|
const CACHE_MAX_AGE = 31536000; // 1 year in seconds
|
||||||
|
|
||||||
|
interface TransformParams {
|
||||||
|
width?: number;
|
||||||
|
height?: number;
|
||||||
|
quality: number;
|
||||||
|
fit: FitMode;
|
||||||
|
blur?: number;
|
||||||
|
grayscale: boolean;
|
||||||
|
format: OutputFormat;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseTransformParams(query: any): TransformParams {
|
||||||
|
return {
|
||||||
|
width: query.w ? Math.min(Math.max(parseInt(query.w, 10), 1), 4000) : undefined,
|
||||||
|
height: query.h ? Math.min(Math.max(parseInt(query.h, 10), 1), 4000) : undefined,
|
||||||
|
quality: query.q ? Math.min(Math.max(parseInt(query.q, 10), 1), 100) : 80,
|
||||||
|
fit: ALLOWED_FITS.includes(query.fit) ? query.fit : 'inside',
|
||||||
|
blur: query.blur ? Math.min(Math.max(parseFloat(query.blur), 0.3), 1000) : undefined,
|
||||||
|
grayscale: query.gray === '1' || query.grayscale === '1',
|
||||||
|
format: ALLOWED_FORMATS.includes(query.format) ? query.format : 'webp',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function getContentType(format: OutputFormat): string {
|
||||||
|
switch (format) {
|
||||||
|
case 'jpeg':
|
||||||
|
case 'jpg':
|
||||||
|
return 'image/jpeg';
|
||||||
|
case 'png':
|
||||||
|
return 'image/png';
|
||||||
|
case 'avif':
|
||||||
|
return 'image/avif';
|
||||||
|
case 'webp':
|
||||||
|
default:
|
||||||
|
return 'image/webp';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Image proxy endpoint
|
||||||
|
* GET /img/*
|
||||||
|
*/
|
||||||
|
router.get('/*', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// Get the image path from URL (everything after /img/)
|
||||||
|
const imagePath = req.params[0];
|
||||||
|
|
||||||
|
if (!imagePath) {
|
||||||
|
return res.status(400).json({ error: 'Image path required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Security: prevent directory traversal
|
||||||
|
const normalizedPath = path.normalize(imagePath).replace(/^(\.\.(\/|\\|$))+/, '');
|
||||||
|
const basePath = path.resolve(IMAGES_BASE_PATH);
|
||||||
|
const fullPath = path.resolve(path.join(IMAGES_BASE_PATH, normalizedPath));
|
||||||
|
|
||||||
|
// Ensure path is within base directory
|
||||||
|
if (!fullPath.startsWith(basePath)) {
|
||||||
|
console.error(`[ImageProxy] Path traversal attempt: ${fullPath} not in ${basePath}`);
|
||||||
|
return res.status(403).json({ error: 'Access denied' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if file exists
|
||||||
|
try {
|
||||||
|
await fs.access(fullPath);
|
||||||
|
} catch {
|
||||||
|
return res.status(404).json({ error: 'Image not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse transform parameters
|
||||||
|
const params = parseTransformParams(req.query);
|
||||||
|
|
||||||
|
// Check if any transforms are requested
|
||||||
|
const hasTransforms = params.width || params.height || params.blur || params.grayscale;
|
||||||
|
|
||||||
|
// Read the original image
|
||||||
|
const imageBuffer = await fs.readFile(fullPath);
|
||||||
|
|
||||||
|
let outputBuffer: Buffer;
|
||||||
|
|
||||||
|
if (hasTransforms) {
|
||||||
|
// Apply transforms
|
||||||
|
let pipeline = sharp(imageBuffer);
|
||||||
|
|
||||||
|
// Resize
|
||||||
|
if (params.width || params.height) {
|
||||||
|
pipeline = pipeline.resize(params.width, params.height, {
|
||||||
|
fit: params.fit,
|
||||||
|
withoutEnlargement: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blur
|
||||||
|
if (params.blur) {
|
||||||
|
pipeline = pipeline.blur(params.blur);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Grayscale
|
||||||
|
if (params.grayscale) {
|
||||||
|
pipeline = pipeline.grayscale();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output format
|
||||||
|
switch (params.format) {
|
||||||
|
case 'jpeg':
|
||||||
|
case 'jpg':
|
||||||
|
pipeline = pipeline.jpeg({ quality: params.quality });
|
||||||
|
break;
|
||||||
|
case 'png':
|
||||||
|
pipeline = pipeline.png({ quality: params.quality });
|
||||||
|
break;
|
||||||
|
case 'avif':
|
||||||
|
pipeline = pipeline.avif({ quality: params.quality });
|
||||||
|
break;
|
||||||
|
case 'webp':
|
||||||
|
default:
|
||||||
|
pipeline = pipeline.webp({ quality: params.quality });
|
||||||
|
}
|
||||||
|
|
||||||
|
outputBuffer = await pipeline.toBuffer();
|
||||||
|
} else {
|
||||||
|
// No transforms - serve original (but maybe convert format)
|
||||||
|
if (params.format !== 'webp' || params.quality !== 80) {
|
||||||
|
let pipeline = sharp(imageBuffer);
|
||||||
|
switch (params.format) {
|
||||||
|
case 'jpeg':
|
||||||
|
case 'jpg':
|
||||||
|
pipeline = pipeline.jpeg({ quality: params.quality });
|
||||||
|
break;
|
||||||
|
case 'png':
|
||||||
|
pipeline = pipeline.png({ quality: params.quality });
|
||||||
|
break;
|
||||||
|
case 'avif':
|
||||||
|
pipeline = pipeline.avif({ quality: params.quality });
|
||||||
|
break;
|
||||||
|
case 'webp':
|
||||||
|
default:
|
||||||
|
pipeline = pipeline.webp({ quality: params.quality });
|
||||||
|
}
|
||||||
|
outputBuffer = await pipeline.toBuffer();
|
||||||
|
} else {
|
||||||
|
outputBuffer = imageBuffer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set headers
|
||||||
|
res.setHeader('Content-Type', getContentType(params.format));
|
||||||
|
res.setHeader('Cache-Control', `public, max-age=${CACHE_MAX_AGE}, immutable`);
|
||||||
|
res.setHeader('X-Image-Size', outputBuffer.length);
|
||||||
|
|
||||||
|
// Send image
|
||||||
|
res.send(outputBuffer);
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[ImageProxy] Error:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to process image' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
253
backend/src/routes/intelligence.ts
Normal file
253
backend/src/routes/intelligence.ts
Normal file
@@ -0,0 +1,253 @@
|
|||||||
|
/**
|
||||||
|
* Intelligence API Routes
|
||||||
|
*
|
||||||
|
* Brand and pricing intelligence endpoints for the CannaiQ admin dashboard.
|
||||||
|
* Uses canonical store_products table for aggregated analytics.
|
||||||
|
*/
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { authMiddleware } from '../auth/middleware';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
router.use(authMiddleware);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/intelligence/brands
|
||||||
|
* List all brands with state presence, store counts, and pricing
|
||||||
|
*/
|
||||||
|
router.get('/brands', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { limit = '500', offset = '0' } = req.query;
|
||||||
|
const limitNum = Math.min(parseInt(limit as string, 10), 1000);
|
||||||
|
const offsetNum = parseInt(offset as string, 10);
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
sp.brand_name_raw as brand_name,
|
||||||
|
array_agg(DISTINCT d.state) FILTER (WHERE d.state IS NOT NULL) as states,
|
||||||
|
COUNT(DISTINCT d.id) as store_count,
|
||||||
|
COUNT(DISTINCT sp.id) as sku_count,
|
||||||
|
ROUND(AVG(sp.price_rec) FILTER (WHERE sp.price_rec > 0)::numeric, 2) as avg_price_rec,
|
||||||
|
ROUND(AVG(sp.price_med) FILTER (WHERE sp.price_med > 0)::numeric, 2) as avg_price_med
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
|
||||||
|
GROUP BY sp.brand_name_raw
|
||||||
|
ORDER BY store_count DESC, sku_count DESC
|
||||||
|
LIMIT $1 OFFSET $2
|
||||||
|
`, [limitNum, offsetNum]);
|
||||||
|
|
||||||
|
// Get total count
|
||||||
|
const { rows: countRows } = await pool.query(`
|
||||||
|
SELECT COUNT(DISTINCT brand_name_raw) as total
|
||||||
|
FROM store_products
|
||||||
|
WHERE brand_name_raw IS NOT NULL AND brand_name_raw != ''
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
brands: rows.map((r: any) => ({
|
||||||
|
brandName: r.brand_name,
|
||||||
|
states: r.states || [],
|
||||||
|
storeCount: parseInt(r.store_count, 10),
|
||||||
|
skuCount: parseInt(r.sku_count, 10),
|
||||||
|
avgPriceRec: r.avg_price_rec ? parseFloat(r.avg_price_rec) : null,
|
||||||
|
avgPriceMed: r.avg_price_med ? parseFloat(r.avg_price_med) : null,
|
||||||
|
})),
|
||||||
|
total: parseInt(countRows[0]?.total || '0', 10),
|
||||||
|
limit: limitNum,
|
||||||
|
offset: offsetNum,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Intelligence] Error fetching brands:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/intelligence/brands/:brandName/penetration
|
||||||
|
* Get state-by-state penetration for a specific brand
|
||||||
|
*/
|
||||||
|
router.get('/brands/:brandName/penetration', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { brandName } = req.params;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
WITH state_totals AS (
|
||||||
|
SELECT
|
||||||
|
d.state,
|
||||||
|
s.name AS state_name,
|
||||||
|
COUNT(DISTINCT d.id) AS total_stores
|
||||||
|
FROM dispensaries d
|
||||||
|
JOIN states s ON d.state = s.code
|
||||||
|
WHERE d.state IS NOT NULL
|
||||||
|
GROUP BY d.state, s.name
|
||||||
|
),
|
||||||
|
brand_presence AS (
|
||||||
|
SELECT
|
||||||
|
d.state,
|
||||||
|
COUNT(DISTINCT d.id) AS stores_with_brand,
|
||||||
|
COUNT(DISTINCT sp.id) AS product_count,
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE sp.brand_name_raw ILIKE $1
|
||||||
|
AND d.state IS NOT NULL
|
||||||
|
GROUP BY d.state
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
st.state,
|
||||||
|
st.state_name AS "stateName",
|
||||||
|
st.total_stores AS "totalStores",
|
||||||
|
COALESCE(bp.stores_with_brand, 0) AS "storesWithBrand",
|
||||||
|
CASE
|
||||||
|
WHEN st.total_stores > 0
|
||||||
|
THEN ROUND((COALESCE(bp.stores_with_brand, 0)::numeric / st.total_stores) * 100, 2)
|
||||||
|
ELSE 0
|
||||||
|
END AS "penetrationPct",
|
||||||
|
COALESCE(bp.product_count, 0) AS "productCount",
|
||||||
|
bp.avg_price AS "avgPrice"
|
||||||
|
FROM state_totals st
|
||||||
|
LEFT JOIN brand_presence bp ON st.state = bp.state
|
||||||
|
WHERE COALESCE(bp.stores_with_brand, 0) > 0
|
||||||
|
ORDER BY COALESCE(bp.stores_with_brand, 0) DESC
|
||||||
|
`, [brandName]);
|
||||||
|
|
||||||
|
// Calculate national metrics
|
||||||
|
const { rows: nationalRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(DISTINCT d.id) AS total_stores,
|
||||||
|
COUNT(DISTINCT CASE WHEN sp.brand_name_raw ILIKE $1 THEN d.id END) AS stores_with_brand,
|
||||||
|
AVG(sp.price_rec) FILTER (WHERE sp.brand_name_raw ILIKE $1) AS avg_price
|
||||||
|
FROM dispensaries d
|
||||||
|
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
|
||||||
|
WHERE d.state IS NOT NULL
|
||||||
|
`, [brandName]);
|
||||||
|
|
||||||
|
const national = nationalRows[0];
|
||||||
|
const nationalPenetration = national.total_stores > 0
|
||||||
|
? (national.stores_with_brand / national.total_stores) * 100
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
brandName,
|
||||||
|
states: rows,
|
||||||
|
nationalPenetration: Math.round(nationalPenetration * 100) / 100,
|
||||||
|
nationalAvgPrice: national.avg_price
|
||||||
|
? Math.round(parseFloat(national.avg_price) * 100) / 100
|
||||||
|
: null,
|
||||||
|
bestPerformingState: rows[0]?.state || null,
|
||||||
|
worstPerformingState: rows[rows.length - 1]?.state || null,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Intelligence] Error fetching brand penetration:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/intelligence/pricing
|
||||||
|
* Get pricing analytics by category
|
||||||
|
*/
|
||||||
|
router.get('/pricing', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { rows: categoryRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
sp.category_raw as category,
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||||
|
MIN(sp.price_rec) as min_price,
|
||||||
|
MAX(sp.price_rec) as max_price,
|
||||||
|
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) as median_price,
|
||||||
|
COUNT(*) as product_count
|
||||||
|
FROM store_products sp
|
||||||
|
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0
|
||||||
|
GROUP BY sp.category_raw
|
||||||
|
ORDER BY product_count DESC
|
||||||
|
`);
|
||||||
|
|
||||||
|
const { rows: stateRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
d.state,
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||||
|
MIN(sp.price_rec) as min_price,
|
||||||
|
MAX(sp.price_rec) as max_price,
|
||||||
|
COUNT(DISTINCT sp.id) as product_count
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||||
|
WHERE d.state IS NOT NULL AND sp.price_rec > 0
|
||||||
|
GROUP BY d.state
|
||||||
|
ORDER BY avg_price DESC
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
byCategory: categoryRows.map((r: any) => ({
|
||||||
|
category: r.category,
|
||||||
|
avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
|
||||||
|
minPrice: r.min_price ? parseFloat(r.min_price) : null,
|
||||||
|
maxPrice: r.max_price ? parseFloat(r.max_price) : null,
|
||||||
|
medianPrice: r.median_price ? parseFloat(r.median_price) : null,
|
||||||
|
productCount: parseInt(r.product_count, 10),
|
||||||
|
})),
|
||||||
|
byState: stateRows.map((r: any) => ({
|
||||||
|
state: r.state,
|
||||||
|
avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
|
||||||
|
minPrice: r.min_price ? parseFloat(r.min_price) : null,
|
||||||
|
maxPrice: r.max_price ? parseFloat(r.max_price) : null,
|
||||||
|
productCount: parseInt(r.product_count, 10),
|
||||||
|
})),
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Intelligence] Error fetching pricing:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/intelligence/stores
|
||||||
|
* Get store intelligence summary
|
||||||
|
*/
|
||||||
|
router.get('/stores', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { rows: storeRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
d.id,
|
||||||
|
d.name,
|
||||||
|
d.dba_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
d.menu_type,
|
||||||
|
d.crawl_enabled,
|
||||||
|
COUNT(DISTINCT sp.id) as product_count,
|
||||||
|
COUNT(DISTINCT sp.brand_name_raw) as brand_count,
|
||||||
|
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||||
|
MAX(sp.updated_at) as last_product_update
|
||||||
|
FROM dispensaries d
|
||||||
|
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||||
|
WHERE d.state IS NOT NULL
|
||||||
|
GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled
|
||||||
|
ORDER BY product_count DESC
|
||||||
|
LIMIT 200
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
stores: storeRows.map((r: any) => ({
|
||||||
|
id: r.id,
|
||||||
|
name: r.name,
|
||||||
|
dbaName: r.dba_name,
|
||||||
|
city: r.city,
|
||||||
|
state: r.state,
|
||||||
|
menuType: r.menu_type,
|
||||||
|
crawlEnabled: r.crawl_enabled,
|
||||||
|
productCount: parseInt(r.product_count || '0', 10),
|
||||||
|
brandCount: parseInt(r.brand_count || '0', 10),
|
||||||
|
avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
|
||||||
|
lastProductUpdate: r.last_product_update,
|
||||||
|
})),
|
||||||
|
total: storeRows.length,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Intelligence] Error fetching stores:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
773
backend/src/routes/job-queue.ts
Normal file
773
backend/src/routes/job-queue.ts
Normal file
@@ -0,0 +1,773 @@
|
|||||||
|
/**
|
||||||
|
* Job Queue Management API Routes
|
||||||
|
*
|
||||||
|
* Endpoints for viewing and managing the crawl job queue:
|
||||||
|
* GET /api/job-queue - List all jobs (with filters)
|
||||||
|
* GET /api/job-queue/stats - Queue statistics
|
||||||
|
* GET /api/job-queue/:id - Get single job details
|
||||||
|
* PUT /api/job-queue/:id/priority - Update job priority
|
||||||
|
* POST /api/job-queue/:id/cancel - Cancel a pending job
|
||||||
|
* POST /api/job-queue/:id/retry - Retry a failed job
|
||||||
|
* POST /api/job-queue/bulk-priority - Bulk update priorities
|
||||||
|
* POST /api/job-queue/pause - Pause queue processing
|
||||||
|
* POST /api/job-queue/resume - Resume queue processing
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
// In-memory queue state (would be in Redis in production)
|
||||||
|
let queuePaused = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/job-queue - List jobs with filters
|
||||||
|
*/
|
||||||
|
router.get('/', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const {
|
||||||
|
status = 'pending',
|
||||||
|
limit = '50',
|
||||||
|
offset = '0',
|
||||||
|
job_type,
|
||||||
|
dispensary_id,
|
||||||
|
sort_by = 'priority',
|
||||||
|
sort_order = 'desc'
|
||||||
|
} = req.query;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT
|
||||||
|
j.id,
|
||||||
|
j.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
j.job_type,
|
||||||
|
j.trigger_type,
|
||||||
|
j.priority,
|
||||||
|
j.status,
|
||||||
|
j.scheduled_at,
|
||||||
|
j.started_at,
|
||||||
|
j.completed_at,
|
||||||
|
j.duration_ms,
|
||||||
|
j.products_found,
|
||||||
|
j.error_message,
|
||||||
|
j.retry_count,
|
||||||
|
j.max_retries,
|
||||||
|
j.worker_id,
|
||||||
|
j.locked_by,
|
||||||
|
j.created_at
|
||||||
|
FROM dispensary_crawl_jobs j
|
||||||
|
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
|
||||||
|
WHERE 1=1
|
||||||
|
`;
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (status && status !== 'all') {
|
||||||
|
params.push(status);
|
||||||
|
query += ` AND j.status = $${paramIndex++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (job_type) {
|
||||||
|
params.push(job_type);
|
||||||
|
query += ` AND j.job_type = $${paramIndex++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dispensary_id) {
|
||||||
|
params.push(dispensary_id);
|
||||||
|
query += ` AND j.dispensary_id = $${paramIndex++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sorting
|
||||||
|
const validSortColumns = ['priority', 'created_at', 'scheduled_at', 'dispensary_name'];
|
||||||
|
const sortCol = validSortColumns.includes(sort_by as string) ? sort_by : 'priority';
|
||||||
|
const sortDir = sort_order === 'asc' ? 'ASC' : 'DESC';
|
||||||
|
|
||||||
|
if (sortCol === 'dispensary_name') {
|
||||||
|
query += ` ORDER BY d.name ${sortDir} NULLS LAST`;
|
||||||
|
} else {
|
||||||
|
query += ` ORDER BY j.${sortCol} ${sortDir} NULLS LAST`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add secondary sort by created_at for consistent ordering
|
||||||
|
if (sortCol !== 'created_at') {
|
||||||
|
query += `, j.created_at ASC`;
|
||||||
|
}
|
||||||
|
|
||||||
|
params.push(parseInt(limit as string));
|
||||||
|
query += ` LIMIT $${paramIndex++}`;
|
||||||
|
|
||||||
|
params.push(parseInt(offset as string));
|
||||||
|
query += ` OFFSET $${paramIndex++}`;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(query, params);
|
||||||
|
|
||||||
|
// Get total count for pagination
|
||||||
|
let countQuery = `
|
||||||
|
SELECT COUNT(*) as total
|
||||||
|
FROM dispensary_crawl_jobs j
|
||||||
|
WHERE 1=1
|
||||||
|
`;
|
||||||
|
const countParams: any[] = [];
|
||||||
|
let countParamIndex = 1;
|
||||||
|
|
||||||
|
if (status && status !== 'all') {
|
||||||
|
countParams.push(status);
|
||||||
|
countQuery += ` AND j.status = $${countParamIndex++}`;
|
||||||
|
}
|
||||||
|
if (job_type) {
|
||||||
|
countParams.push(job_type);
|
||||||
|
countQuery += ` AND j.job_type = $${countParamIndex++}`;
|
||||||
|
}
|
||||||
|
if (dispensary_id) {
|
||||||
|
countParams.push(dispensary_id);
|
||||||
|
countQuery += ` AND j.dispensary_id = $${countParamIndex++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const countResult = await pool.query(countQuery, countParams);
|
||||||
|
const total = parseInt(countResult.rows[0].total);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
jobs: rows,
|
||||||
|
total,
|
||||||
|
limit: parseInt(limit as string),
|
||||||
|
offset: parseInt(offset as string),
|
||||||
|
queue_paused: queuePaused
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error listing jobs:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/job-queue/available - List dispensaries available for crawling
|
||||||
|
* Query: { state_code?: string, limit?: number }
|
||||||
|
* NOTE: Must be defined BEFORE /:id route to avoid conflict
|
||||||
|
*/
|
||||||
|
router.get('/available', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { state_code, limit = '100' } = req.query;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT
|
||||||
|
d.id,
|
||||||
|
d.name,
|
||||||
|
d.city,
|
||||||
|
s.code as state_code,
|
||||||
|
d.platform_dispensary_id,
|
||||||
|
d.crawl_enabled,
|
||||||
|
(SELECT MAX(created_at) FROM dispensary_crawl_jobs WHERE dispensary_id = d.id AND status = 'completed') as last_crawl,
|
||||||
|
EXISTS (
|
||||||
|
SELECT 1 FROM dispensary_crawl_jobs
|
||||||
|
WHERE dispensary_id = d.id AND status IN ('pending', 'running')
|
||||||
|
) as has_pending_job
|
||||||
|
FROM dispensaries d
|
||||||
|
LEFT JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
`;
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (state_code) {
|
||||||
|
params.push((state_code as string).toUpperCase());
|
||||||
|
query += ` AND s.code = $${paramIndex++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
query += ` ORDER BY d.name LIMIT $${paramIndex}`;
|
||||||
|
params.push(parseInt(limit as string));
|
||||||
|
|
||||||
|
const { rows } = await pool.query(query, params);
|
||||||
|
|
||||||
|
// Get counts by state
|
||||||
|
const { rows: stateCounts } = await pool.query(`
|
||||||
|
SELECT s.code, COUNT(*) as count
|
||||||
|
FROM dispensaries d
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
GROUP BY s.code
|
||||||
|
ORDER BY count DESC
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
dispensaries: rows,
|
||||||
|
total: rows.length,
|
||||||
|
by_state: stateCounts
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error listing available:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/job-queue/history - Get recent job history with results
|
||||||
|
* Query: { state_code?: string, status?: string, limit?: number, hours?: number }
|
||||||
|
* NOTE: Must be defined BEFORE /:id route to avoid conflict
|
||||||
|
*/
|
||||||
|
router.get('/history', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const {
|
||||||
|
state_code,
|
||||||
|
status,
|
||||||
|
limit = '50',
|
||||||
|
hours = '24'
|
||||||
|
} = req.query;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT
|
||||||
|
j.id,
|
||||||
|
j.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
s.code as state_code,
|
||||||
|
j.job_type,
|
||||||
|
j.status,
|
||||||
|
j.products_found,
|
||||||
|
j.error_message,
|
||||||
|
j.started_at,
|
||||||
|
j.completed_at,
|
||||||
|
j.duration_ms,
|
||||||
|
j.created_at
|
||||||
|
FROM dispensary_crawl_jobs j
|
||||||
|
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
|
||||||
|
LEFT JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE j.created_at > NOW() - INTERVAL '${parseInt(hours as string)} hours'
|
||||||
|
`;
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (status && status !== 'all') {
|
||||||
|
params.push(status);
|
||||||
|
query += ` AND j.status = $${paramIndex++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state_code) {
|
||||||
|
params.push((state_code as string).toUpperCase());
|
||||||
|
query += ` AND s.code = $${paramIndex++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
query += ` ORDER BY j.created_at DESC LIMIT $${paramIndex}`;
|
||||||
|
params.push(parseInt(limit as string));
|
||||||
|
|
||||||
|
const { rows } = await pool.query(query, params);
|
||||||
|
|
||||||
|
// Get summary stats
|
||||||
|
const { rows: stats } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE status = 'completed') as completed,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'failed') as failed,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'running') as running,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending') as pending,
|
||||||
|
SUM(products_found) FILTER (WHERE status = 'completed') as total_products,
|
||||||
|
AVG(duration_ms) FILTER (WHERE status = 'completed') as avg_duration_ms
|
||||||
|
FROM dispensary_crawl_jobs
|
||||||
|
WHERE created_at > NOW() - INTERVAL '${parseInt(hours as string)} hours'
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
jobs: rows,
|
||||||
|
summary: {
|
||||||
|
completed: parseInt(stats[0].completed) || 0,
|
||||||
|
failed: parseInt(stats[0].failed) || 0,
|
||||||
|
running: parseInt(stats[0].running) || 0,
|
||||||
|
pending: parseInt(stats[0].pending) || 0,
|
||||||
|
total_products: parseInt(stats[0].total_products) || 0,
|
||||||
|
avg_duration_ms: Math.round(parseFloat(stats[0].avg_duration_ms)) || null
|
||||||
|
},
|
||||||
|
hours: parseInt(hours as string)
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error getting history:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/job-queue/stats - Queue statistics
|
||||||
|
*/
|
||||||
|
router.get('/stats', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending') as pending_count,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'running') as running_count,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as completed_24h,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '24 hours') as failed_24h,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled_count,
|
||||||
|
AVG(duration_ms) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as avg_duration_ms,
|
||||||
|
MAX(priority) FILTER (WHERE status = 'pending') as max_priority,
|
||||||
|
MIN(created_at) FILTER (WHERE status = 'pending') as oldest_pending
|
||||||
|
FROM dispensary_crawl_jobs
|
||||||
|
`);
|
||||||
|
|
||||||
|
const stats = rows[0];
|
||||||
|
|
||||||
|
// Get jobs by type
|
||||||
|
const { rows: byType } = await pool.query(`
|
||||||
|
SELECT job_type, COUNT(*) as count
|
||||||
|
FROM dispensary_crawl_jobs
|
||||||
|
WHERE status = 'pending'
|
||||||
|
GROUP BY job_type
|
||||||
|
ORDER BY count DESC
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Get top priority jobs
|
||||||
|
const { rows: topPriority } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
j.id,
|
||||||
|
j.dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
j.job_type,
|
||||||
|
j.priority,
|
||||||
|
j.created_at
|
||||||
|
FROM dispensary_crawl_jobs j
|
||||||
|
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
|
||||||
|
WHERE j.status = 'pending'
|
||||||
|
ORDER BY j.priority DESC, j.created_at ASC
|
||||||
|
LIMIT 5
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Estimate wait time based on avg processing rate
|
||||||
|
const pendingCount = parseInt(stats.pending_count) || 0;
|
||||||
|
const avgDuration = parseFloat(stats.avg_duration_ms) || 30000; // default 30s
|
||||||
|
const runningCount = parseInt(stats.running_count) || 1;
|
||||||
|
const estimatedWaitMs = (pendingCount * avgDuration) / Math.max(runningCount, 1);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
stats: {
|
||||||
|
pending: parseInt(stats.pending_count) || 0,
|
||||||
|
running: parseInt(stats.running_count) || 0,
|
||||||
|
completed_24h: parseInt(stats.completed_24h) || 0,
|
||||||
|
failed_24h: parseInt(stats.failed_24h) || 0,
|
||||||
|
cancelled: parseInt(stats.cancelled_count) || 0,
|
||||||
|
avg_duration_ms: Math.round(parseFloat(stats.avg_duration_ms)) || null,
|
||||||
|
max_priority: parseInt(stats.max_priority) || 0,
|
||||||
|
oldest_pending: stats.oldest_pending,
|
||||||
|
estimated_wait_ms: Math.round(estimatedWaitMs),
|
||||||
|
queue_paused: queuePaused
|
||||||
|
},
|
||||||
|
by_type: byType,
|
||||||
|
top_priority: topPriority
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error getting stats:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/job-queue/:id - Get single job
|
||||||
|
*/
|
||||||
|
router.get('/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
j.*,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
d.menu_url
|
||||||
|
FROM dispensary_crawl_jobs j
|
||||||
|
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
|
||||||
|
WHERE j.id = $1
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Job not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, job: rows[0] });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error getting job:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PUT /api/job-queue/:id/priority - Update job priority
|
||||||
|
*/
|
||||||
|
router.put('/:id/priority', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const { priority } = req.body;
|
||||||
|
|
||||||
|
if (typeof priority !== 'number' || priority < 0 || priority > 100) {
|
||||||
|
return res.status(400).json({
|
||||||
|
success: false,
|
||||||
|
error: 'Priority must be a number between 0 and 100'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE dispensary_crawl_jobs
|
||||||
|
SET priority = $1, updated_at = NOW()
|
||||||
|
WHERE id = $2 AND status = 'pending'
|
||||||
|
RETURNING id, priority, status
|
||||||
|
`, [priority, id]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({
|
||||||
|
success: false,
|
||||||
|
error: 'Job not found or not in pending status'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, job: rows[0] });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error updating priority:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/job-queue/:id/cancel - Cancel a pending job
|
||||||
|
*/
|
||||||
|
router.post('/:id/cancel', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE dispensary_crawl_jobs
|
||||||
|
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
|
||||||
|
WHERE id = $1 AND status = 'pending'
|
||||||
|
RETURNING id, status
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({
|
||||||
|
success: false,
|
||||||
|
error: 'Job not found or not in pending status'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, job: rows[0], message: 'Job cancelled' });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error cancelling job:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/job-queue/:id/retry - Retry a failed job
|
||||||
|
*/
|
||||||
|
router.post('/:id/retry', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const { priority } = req.body;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE dispensary_crawl_jobs
|
||||||
|
SET
|
||||||
|
status = 'pending',
|
||||||
|
priority = COALESCE($2, priority),
|
||||||
|
error_message = NULL,
|
||||||
|
started_at = NULL,
|
||||||
|
completed_at = NULL,
|
||||||
|
duration_ms = NULL,
|
||||||
|
worker_id = NULL,
|
||||||
|
locked_by = NULL,
|
||||||
|
locked_at = NULL,
|
||||||
|
retry_count = retry_count + 1,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $1 AND status IN ('failed', 'cancelled')
|
||||||
|
RETURNING id, status, priority, retry_count
|
||||||
|
`, [id, priority]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({
|
||||||
|
success: false,
|
||||||
|
error: 'Job not found or not in failed/cancelled status'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, job: rows[0], message: 'Job queued for retry' });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error retrying job:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/job-queue/bulk-priority - Bulk update priorities
|
||||||
|
*/
|
||||||
|
router.post('/bulk-priority', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { jobs } = req.body; // Array of { id, priority }
|
||||||
|
|
||||||
|
if (!Array.isArray(jobs) || jobs.length === 0) {
|
||||||
|
return res.status(400).json({
|
||||||
|
success: false,
|
||||||
|
error: 'jobs array is required'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const client = await pool.connect();
|
||||||
|
try {
|
||||||
|
await client.query('BEGIN');
|
||||||
|
|
||||||
|
let updated = 0;
|
||||||
|
for (const job of jobs) {
|
||||||
|
if (typeof job.id === 'number' && typeof job.priority === 'number') {
|
||||||
|
const result = await client.query(`
|
||||||
|
UPDATE dispensary_crawl_jobs
|
||||||
|
SET priority = $1, updated_at = NOW()
|
||||||
|
WHERE id = $2 AND status = 'pending'
|
||||||
|
`, [job.priority, job.id]);
|
||||||
|
updated += result.rowCount || 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await client.query('COMMIT');
|
||||||
|
res.json({ success: true, updated, message: `Updated ${updated} jobs` });
|
||||||
|
} catch (err) {
|
||||||
|
await client.query('ROLLBACK');
|
||||||
|
throw err;
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error bulk updating priorities:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/job-queue/enqueue - Add a new job to the queue
|
||||||
|
*/
|
||||||
|
router.post('/enqueue', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
|
||||||
|
|
||||||
|
if (!dispensary_id) {
|
||||||
|
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if job already pending for this dispensary
|
||||||
|
const existing = await pool.query(`
|
||||||
|
SELECT id FROM dispensary_crawl_jobs
|
||||||
|
WHERE dispensary_id = $1 AND job_type = $2 AND status = 'pending'
|
||||||
|
`, [dispensary_id, job_type]);
|
||||||
|
|
||||||
|
if (existing.rows.length > 0) {
|
||||||
|
// Update priority if higher
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dispensary_crawl_jobs
|
||||||
|
SET priority = GREATEST(priority, $1), updated_at = NOW()
|
||||||
|
WHERE id = $2
|
||||||
|
`, [priority, existing.rows[0].id]);
|
||||||
|
|
||||||
|
return res.json({
|
||||||
|
success: true,
|
||||||
|
job_id: existing.rows[0].id,
|
||||||
|
message: 'Job already queued, priority updated'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type)
|
||||||
|
VALUES ($1, $2, $3, 'manual')
|
||||||
|
RETURNING id
|
||||||
|
`, [dispensary_id, job_type, priority]);
|
||||||
|
|
||||||
|
res.json({ success: true, job_id: rows[0].id, message: 'Job enqueued' });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error enqueuing job:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/job-queue/pause - Pause queue processing
|
||||||
|
*/
|
||||||
|
router.post('/pause', async (_req: Request, res: Response) => {
|
||||||
|
queuePaused = true;
|
||||||
|
res.json({ success: true, queue_paused: true, message: 'Queue paused' });
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/job-queue/resume - Resume queue processing
|
||||||
|
*/
|
||||||
|
router.post('/resume', async (_req: Request, res: Response) => {
|
||||||
|
queuePaused = false;
|
||||||
|
res.json({ success: true, queue_paused: false, message: 'Queue resumed' });
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/job-queue/paused - Check if queue is paused
|
||||||
|
*/
|
||||||
|
router.get('/paused', async (_req: Request, res: Response) => {
|
||||||
|
res.json({ success: true, queue_paused: queuePaused });
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/job-queue/enqueue-batch - Queue multiple dispensaries at once
|
||||||
|
* Body: { dispensary_ids: number[], job_type?: string, priority?: number }
|
||||||
|
*/
|
||||||
|
router.post('/enqueue-batch', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { dispensary_ids, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
|
||||||
|
|
||||||
|
if (!Array.isArray(dispensary_ids) || dispensary_ids.length === 0) {
|
||||||
|
return res.status(400).json({ success: false, error: 'dispensary_ids array is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dispensary_ids.length > 500) {
|
||||||
|
return res.status(400).json({ success: false, error: 'Maximum 500 dispensaries per batch' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert jobs, skipping duplicates
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type, status, created_at)
|
||||||
|
SELECT
|
||||||
|
d.id,
|
||||||
|
$2::text,
|
||||||
|
$3::integer,
|
||||||
|
'api_batch',
|
||||||
|
'pending',
|
||||||
|
NOW()
|
||||||
|
FROM dispensaries d
|
||||||
|
WHERE d.id = ANY($1::int[])
|
||||||
|
AND d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
AND NOT EXISTS (
|
||||||
|
SELECT 1 FROM dispensary_crawl_jobs cj
|
||||||
|
WHERE cj.dispensary_id = d.id
|
||||||
|
AND cj.job_type = $2::text
|
||||||
|
AND cj.status IN ('pending', 'running')
|
||||||
|
)
|
||||||
|
RETURNING id, dispensary_id
|
||||||
|
`, [dispensary_ids, job_type, priority]);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
queued: rows.length,
|
||||||
|
requested: dispensary_ids.length,
|
||||||
|
job_ids: rows.map(r => r.id),
|
||||||
|
message: `Queued ${rows.length} of ${dispensary_ids.length} dispensaries`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error batch enqueuing:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/job-queue/enqueue-state - Queue all crawl-enabled dispensaries for a state
|
||||||
|
* Body: { state_code: string, job_type?: string, priority?: number, limit?: number }
|
||||||
|
*/
|
||||||
|
router.post('/enqueue-state', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { state_code, job_type = 'dutchie_product_crawl', priority = 0, limit = 200 } = req.body;
|
||||||
|
|
||||||
|
if (!state_code) {
|
||||||
|
return res.status(400).json({ success: false, error: 'state_code is required (e.g., "AZ")' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get state_id and queue jobs
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
WITH target_state AS (
|
||||||
|
SELECT id FROM states WHERE code = $1
|
||||||
|
)
|
||||||
|
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type, status, created_at)
|
||||||
|
SELECT
|
||||||
|
d.id,
|
||||||
|
$2::text,
|
||||||
|
$3::integer,
|
||||||
|
'api_state',
|
||||||
|
'pending',
|
||||||
|
NOW()
|
||||||
|
FROM dispensaries d, target_state
|
||||||
|
WHERE d.state_id = target_state.id
|
||||||
|
AND d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
AND NOT EXISTS (
|
||||||
|
SELECT 1 FROM dispensary_crawl_jobs cj
|
||||||
|
WHERE cj.dispensary_id = d.id
|
||||||
|
AND cj.job_type = $2::text
|
||||||
|
AND cj.status IN ('pending', 'running')
|
||||||
|
)
|
||||||
|
LIMIT $4::integer
|
||||||
|
RETURNING id, dispensary_id
|
||||||
|
`, [state_code.toUpperCase(), job_type, priority, limit]);
|
||||||
|
|
||||||
|
// Get total available count
|
||||||
|
const countResult = await pool.query(`
|
||||||
|
WITH target_state AS (
|
||||||
|
SELECT id FROM states WHERE code = $1
|
||||||
|
)
|
||||||
|
SELECT COUNT(*) as total
|
||||||
|
FROM dispensaries d, target_state
|
||||||
|
WHERE d.state_id = target_state.id
|
||||||
|
AND d.crawl_enabled = true
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
`, [state_code.toUpperCase()]);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
queued: rows.length,
|
||||||
|
total_available: parseInt(countResult.rows[0].total),
|
||||||
|
state: state_code.toUpperCase(),
|
||||||
|
job_type,
|
||||||
|
message: `Queued ${rows.length} dispensaries for ${state_code.toUpperCase()}`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error enqueuing state:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/job-queue/clear-pending - Clear all pending jobs (optionally filtered)
|
||||||
|
* Body: { state_code?: string, job_type?: string }
|
||||||
|
*/
|
||||||
|
router.post('/clear-pending', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { state_code, job_type } = req.body;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
UPDATE dispensary_crawl_jobs
|
||||||
|
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
|
||||||
|
WHERE status = 'pending'
|
||||||
|
`;
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (job_type) {
|
||||||
|
params.push(job_type);
|
||||||
|
query += ` AND job_type = $${paramIndex++}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state_code) {
|
||||||
|
params.push((state_code as string).toUpperCase());
|
||||||
|
query += ` AND dispensary_id IN (
|
||||||
|
SELECT d.id FROM dispensaries d
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE s.code = $${paramIndex++}
|
||||||
|
)`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await pool.query(query, params);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
cleared: result.rowCount,
|
||||||
|
message: `Cancelled ${result.rowCount} pending jobs`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[JobQueue] Error clearing pending:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
|
export { queuePaused };
|
||||||
667
backend/src/routes/markets.ts
Normal file
667
backend/src/routes/markets.ts
Normal file
@@ -0,0 +1,667 @@
|
|||||||
|
/**
|
||||||
|
* Markets API Routes
|
||||||
|
*
|
||||||
|
* Provider-agnostic store and product endpoints for the CannaiQ admin dashboard.
|
||||||
|
* Queries the dispensaries and dutchie_products tables directly.
|
||||||
|
*/
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { authMiddleware } from '../auth/middleware';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
router.use(authMiddleware);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/markets/dashboard
|
||||||
|
* Dashboard summary with counts for dispensaries, products, brands, etc.
|
||||||
|
*/
|
||||||
|
router.get('/dashboard', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// Get dispensary count
|
||||||
|
const { rows: dispRows } = await pool.query(
|
||||||
|
`SELECT COUNT(*) as count FROM dispensaries`
|
||||||
|
);
|
||||||
|
|
||||||
|
// Get product count from store_products (canonical) or fallback to dutchie_products
|
||||||
|
const { rows: productRows } = await pool.query(`
|
||||||
|
SELECT COUNT(*) as count FROM store_products
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Get brand count
|
||||||
|
const { rows: brandRows } = await pool.query(`
|
||||||
|
SELECT COUNT(DISTINCT brand_name_raw) as count
|
||||||
|
FROM store_products
|
||||||
|
WHERE brand_name_raw IS NOT NULL
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Get category count
|
||||||
|
const { rows: categoryRows } = await pool.query(`
|
||||||
|
SELECT COUNT(DISTINCT category_raw) as count
|
||||||
|
FROM store_products
|
||||||
|
WHERE category_raw IS NOT NULL
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Get snapshot count in last 24 hours
|
||||||
|
const { rows: snapshotRows } = await pool.query(`
|
||||||
|
SELECT COUNT(*) as count
|
||||||
|
FROM store_product_snapshots
|
||||||
|
WHERE captured_at >= NOW() - INTERVAL '24 hours'
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Get last crawl time
|
||||||
|
const { rows: lastCrawlRows } = await pool.query(`
|
||||||
|
SELECT MAX(completed_at) as last_crawl
|
||||||
|
FROM crawl_orchestration_traces
|
||||||
|
WHERE success = true
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Get failed job count (jobs in last 24h that failed)
|
||||||
|
const { rows: failedRows } = await pool.query(`
|
||||||
|
SELECT COUNT(*) as count
|
||||||
|
FROM crawl_orchestration_traces
|
||||||
|
WHERE success = false
|
||||||
|
AND started_at >= NOW() - INTERVAL '24 hours'
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
dispensaryCount: parseInt(dispRows[0]?.count || '0', 10),
|
||||||
|
productCount: parseInt(productRows[0]?.count || '0', 10),
|
||||||
|
brandCount: parseInt(brandRows[0]?.count || '0', 10),
|
||||||
|
categoryCount: parseInt(categoryRows[0]?.count || '0', 10),
|
||||||
|
snapshotCount24h: parseInt(snapshotRows[0]?.count || '0', 10),
|
||||||
|
lastCrawlTime: lastCrawlRows[0]?.last_crawl || null,
|
||||||
|
failedJobCount: parseInt(failedRows[0]?.count || '0', 10),
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Markets] Error fetching dashboard:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/markets/stores
|
||||||
|
* List all stores from the dispensaries table
|
||||||
|
*/
|
||||||
|
router.get('/stores', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { city, hasPlatformId, limit = '100', offset = '0' } = req.query;
|
||||||
|
|
||||||
|
let whereClause = 'WHERE 1=1';
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (city) {
|
||||||
|
whereClause += ` AND d.city ILIKE $${paramIndex}`;
|
||||||
|
params.push(`%${city}%`);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasPlatformId === 'true') {
|
||||||
|
whereClause += ` AND d.platform_dispensary_id IS NOT NULL`;
|
||||||
|
} else if (hasPlatformId === 'false') {
|
||||||
|
whereClause += ` AND d.platform_dispensary_id IS NULL`;
|
||||||
|
}
|
||||||
|
|
||||||
|
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
d.id,
|
||||||
|
d.name,
|
||||||
|
d.dba_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
d.address1 as address,
|
||||||
|
d.zipcode as zip,
|
||||||
|
d.phone,
|
||||||
|
d.website,
|
||||||
|
d.menu_url,
|
||||||
|
d.menu_type,
|
||||||
|
d.platform_dispensary_id,
|
||||||
|
d.crawl_enabled,
|
||||||
|
d.dutchie_verified,
|
||||||
|
d.last_crawl_at,
|
||||||
|
d.product_count,
|
||||||
|
d.created_at,
|
||||||
|
d.updated_at
|
||||||
|
FROM dispensaries d
|
||||||
|
${whereClause}
|
||||||
|
ORDER BY d.name
|
||||||
|
LIMIT $${paramIndex} OFFSET $${paramIndex + 1}
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
// Get total count
|
||||||
|
const { rows: countRows } = await pool.query(
|
||||||
|
`SELECT COUNT(*) as total FROM dispensaries d ${whereClause}`,
|
||||||
|
params.slice(0, -2)
|
||||||
|
);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
stores: rows,
|
||||||
|
total: parseInt(countRows[0]?.total || '0', 10),
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Markets] Error fetching stores:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/markets/stores/:id
|
||||||
|
* Get a single store by ID
|
||||||
|
*/
|
||||||
|
router.get('/stores/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
d.id,
|
||||||
|
d.name,
|
||||||
|
d.dba_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
d.address1 as address,
|
||||||
|
d.zipcode as zip,
|
||||||
|
d.phone,
|
||||||
|
d.website,
|
||||||
|
d.menu_url,
|
||||||
|
d.menu_type,
|
||||||
|
d.platform_dispensary_id,
|
||||||
|
d.crawl_enabled,
|
||||||
|
d.dutchie_verified,
|
||||||
|
d.last_crawl_at,
|
||||||
|
d.product_count,
|
||||||
|
d.created_at,
|
||||||
|
d.updated_at
|
||||||
|
FROM dispensaries d
|
||||||
|
WHERE d.id = $1
|
||||||
|
`, [parseInt(id, 10)]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ error: 'Store not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json(rows[0]);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Markets] Error fetching store:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/markets/stores/:id/summary
|
||||||
|
* Get store summary with aggregated metrics, brands, and categories
|
||||||
|
*/
|
||||||
|
router.get('/stores/:id/summary', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const dispensaryId = parseInt(id, 10);
|
||||||
|
|
||||||
|
// Get dispensary info
|
||||||
|
const { rows: dispRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
d.id,
|
||||||
|
d.name,
|
||||||
|
d.dba_name,
|
||||||
|
d.c_name as company_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
d.address1 as address,
|
||||||
|
d.zipcode as zip,
|
||||||
|
d.phone,
|
||||||
|
d.website,
|
||||||
|
d.menu_url,
|
||||||
|
d.menu_type,
|
||||||
|
d.platform_dispensary_id,
|
||||||
|
d.crawl_enabled,
|
||||||
|
d.last_crawl_at
|
||||||
|
FROM dispensaries d
|
||||||
|
WHERE d.id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (dispRows.length === 0) {
|
||||||
|
return res.status(404).json({ error: 'Store not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const dispensary = dispRows[0];
|
||||||
|
|
||||||
|
// Get product counts using canonical store_products table
|
||||||
|
const { rows: countRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total,
|
||||||
|
COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock,
|
||||||
|
COUNT(*) FILTER (WHERE stock_status = 'out_of_stock') as out_of_stock,
|
||||||
|
COUNT(*) FILTER (WHERE stock_status NOT IN ('in_stock', 'out_of_stock') OR stock_status IS NULL) as unknown,
|
||||||
|
COUNT(*) FILTER (WHERE stock_status = 'missing_from_feed') as missing_from_feed
|
||||||
|
FROM store_products
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
const counts = countRows[0] || {};
|
||||||
|
|
||||||
|
// Get brands using canonical table
|
||||||
|
const { rows: brandRows } = await pool.query(`
|
||||||
|
SELECT brand_name_raw as brand_name, COUNT(*) as product_count
|
||||||
|
FROM store_products
|
||||||
|
WHERE dispensary_id = $1 AND brand_name_raw IS NOT NULL
|
||||||
|
GROUP BY brand_name_raw
|
||||||
|
ORDER BY product_count DESC, brand_name_raw
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
// Get categories using canonical table
|
||||||
|
const { rows: categoryRows } = await pool.query(`
|
||||||
|
SELECT category_raw as type, subcategory_raw as subcategory, COUNT(*) as product_count
|
||||||
|
FROM store_products
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
GROUP BY category_raw, subcategory_raw
|
||||||
|
ORDER BY product_count DESC
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
// Get last crawl info from job_run_logs or crawl_orchestration_traces
|
||||||
|
const { rows: crawlRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
completed_at,
|
||||||
|
CASE WHEN success THEN 'completed' ELSE 'failed' END as status,
|
||||||
|
error_message
|
||||||
|
FROM crawl_orchestration_traces
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
ORDER BY completed_at DESC
|
||||||
|
LIMIT 1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
const lastCrawl = crawlRows.length > 0 ? crawlRows[0] : null;
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
dispensary,
|
||||||
|
totalProducts: parseInt(counts.total || '0', 10),
|
||||||
|
inStockCount: parseInt(counts.in_stock || '0', 10),
|
||||||
|
outOfStockCount: parseInt(counts.out_of_stock || '0', 10),
|
||||||
|
unknownStockCount: parseInt(counts.unknown || '0', 10),
|
||||||
|
missingFromFeedCount: parseInt(counts.missing_from_feed || '0', 10),
|
||||||
|
brands: brandRows,
|
||||||
|
brandCount: brandRows.length,
|
||||||
|
categories: categoryRows,
|
||||||
|
categoryCount: categoryRows.length,
|
||||||
|
lastCrawl,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Markets] Error fetching store summary:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/markets/stores/:id/products
|
||||||
|
* Get products for a store with filtering and pagination
|
||||||
|
*/
|
||||||
|
router.get('/stores/:id/products', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const {
|
||||||
|
stockStatus,
|
||||||
|
type,
|
||||||
|
subcategory,
|
||||||
|
brandName,
|
||||||
|
search,
|
||||||
|
limit = '25',
|
||||||
|
offset = '0'
|
||||||
|
} = req.query;
|
||||||
|
|
||||||
|
const dispensaryId = parseInt(id, 10);
|
||||||
|
|
||||||
|
let whereClause = 'WHERE sp.dispensary_id = $1';
|
||||||
|
const params: any[] = [dispensaryId];
|
||||||
|
let paramIndex = 2;
|
||||||
|
|
||||||
|
if (stockStatus) {
|
||||||
|
whereClause += ` AND sp.stock_status = $${paramIndex}`;
|
||||||
|
params.push(stockStatus);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type) {
|
||||||
|
whereClause += ` AND sp.category_raw = $${paramIndex}`;
|
||||||
|
params.push(type);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (subcategory) {
|
||||||
|
whereClause += ` AND sp.subcategory_raw = $${paramIndex}`;
|
||||||
|
params.push(subcategory);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (brandName) {
|
||||||
|
whereClause += ` AND sp.brand_name_raw ILIKE $${paramIndex}`;
|
||||||
|
params.push(`%${brandName}%`);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (search) {
|
||||||
|
whereClause += ` AND (sp.name_raw ILIKE $${paramIndex} OR sp.brand_name_raw ILIKE $${paramIndex})`;
|
||||||
|
params.push(`%${search}%`);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
const limitNum = Math.min(parseInt(limit as string, 10), 100);
|
||||||
|
const offsetNum = parseInt(offset as string, 10);
|
||||||
|
params.push(limitNum, offsetNum);
|
||||||
|
|
||||||
|
// Get products with latest snapshot data using canonical tables
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
sp.id,
|
||||||
|
sp.external_product_id as external_id,
|
||||||
|
sp.name_raw as name,
|
||||||
|
sp.brand_name_raw as brand,
|
||||||
|
sp.category_raw as type,
|
||||||
|
sp.subcategory_raw as subcategory,
|
||||||
|
sp.strain_type,
|
||||||
|
sp.stock_status,
|
||||||
|
sp.stock_status = 'in_stock' as in_stock,
|
||||||
|
sp.stock_status != 'missing_from_feed' as is_present_in_feed,
|
||||||
|
sp.stock_status = 'missing_from_feed' as missing_from_feed,
|
||||||
|
sp.thc_percent as thc_percentage,
|
||||||
|
sp.cbd_percent as cbd_percentage,
|
||||||
|
sp.primary_image_url as image_url,
|
||||||
|
sp.description,
|
||||||
|
sp.total_quantity_available as total_quantity,
|
||||||
|
sp.first_seen_at,
|
||||||
|
sp.last_seen_at,
|
||||||
|
sp.updated_at,
|
||||||
|
(
|
||||||
|
SELECT jsonb_build_object(
|
||||||
|
'regular_price', COALESCE(sps.price_rec, 0)::numeric,
|
||||||
|
'sale_price', CASE WHEN sps.price_rec_special > 0
|
||||||
|
THEN sps.price_rec_special::numeric
|
||||||
|
ELSE NULL END,
|
||||||
|
'med_price', COALESCE(sps.price_med, 0)::numeric,
|
||||||
|
'med_sale_price', CASE WHEN sps.price_med_special > 0
|
||||||
|
THEN sps.price_med_special::numeric
|
||||||
|
ELSE NULL END,
|
||||||
|
'snapshot_at', sps.captured_at
|
||||||
|
)
|
||||||
|
FROM store_product_snapshots sps
|
||||||
|
WHERE sps.store_product_id = sp.id
|
||||||
|
ORDER BY sps.captured_at DESC
|
||||||
|
LIMIT 1
|
||||||
|
) as pricing
|
||||||
|
FROM store_products sp
|
||||||
|
${whereClause}
|
||||||
|
ORDER BY sp.name_raw
|
||||||
|
LIMIT $${paramIndex} OFFSET $${paramIndex + 1}
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
// Flatten pricing into the product object
|
||||||
|
const products = rows.map((row: any) => {
|
||||||
|
const pricing = row.pricing || {};
|
||||||
|
return {
|
||||||
|
...row,
|
||||||
|
regular_price: pricing.regular_price || null,
|
||||||
|
sale_price: pricing.sale_price || null,
|
||||||
|
med_price: pricing.med_price || null,
|
||||||
|
med_sale_price: pricing.med_sale_price || null,
|
||||||
|
snapshot_at: pricing.snapshot_at || null,
|
||||||
|
pricing: undefined, // Remove the nested object
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get total count
|
||||||
|
const { rows: countRows } = await pool.query(
|
||||||
|
`SELECT COUNT(*) as total FROM store_products sp ${whereClause}`,
|
||||||
|
params.slice(0, -2)
|
||||||
|
);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
products,
|
||||||
|
total: parseInt(countRows[0]?.total || '0', 10),
|
||||||
|
limit: limitNum,
|
||||||
|
offset: offsetNum,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Markets] Error fetching store products:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/markets/stores/:id/brands
|
||||||
|
* Get brands for a store
|
||||||
|
*/
|
||||||
|
router.get('/stores/:id/brands', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const dispensaryId = parseInt(id, 10);
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT brand_name_raw as brand, COUNT(*) as product_count
|
||||||
|
FROM store_products
|
||||||
|
WHERE dispensary_id = $1 AND brand_name_raw IS NOT NULL
|
||||||
|
GROUP BY brand_name_raw
|
||||||
|
ORDER BY product_count DESC, brand_name_raw
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
res.json({ brands: rows });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Markets] Error fetching store brands:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/markets/stores/:id/categories
|
||||||
|
* Get categories for a store
|
||||||
|
*/
|
||||||
|
router.get('/stores/:id/categories', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const dispensaryId = parseInt(id, 10);
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT category_raw as type, subcategory_raw as subcategory, COUNT(*) as product_count
|
||||||
|
FROM store_products
|
||||||
|
WHERE dispensary_id = $1
|
||||||
|
GROUP BY category_raw, subcategory_raw
|
||||||
|
ORDER BY product_count DESC
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
res.json({ categories: rows });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Markets] Error fetching store categories:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/markets/stores/:id/crawl
|
||||||
|
* Trigger a crawl for a store (alias for existing crawl endpoint)
|
||||||
|
*/
|
||||||
|
router.post('/stores/:id/crawl', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const dispensaryId = parseInt(id, 10);
|
||||||
|
|
||||||
|
// Verify store exists and has platform_dispensary_id
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT id, name, platform_dispensary_id, menu_type
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ error: 'Store not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const store = rows[0];
|
||||||
|
|
||||||
|
if (!store.platform_dispensary_id) {
|
||||||
|
return res.status(400).json({
|
||||||
|
error: 'Store does not have a platform ID resolved. Cannot crawl.',
|
||||||
|
store: { id: store.id, name: store.name, menu_type: store.menu_type }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert a job into the crawl queue
|
||||||
|
await pool.query(`
|
||||||
|
INSERT INTO crawl_jobs (dispensary_id, job_type, status, created_at)
|
||||||
|
VALUES ($1, 'dutchie_product_crawl', 'pending', NOW())
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
message: `Crawl queued for ${store.name}`,
|
||||||
|
store: { id: store.id, name: store.name }
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Markets] Error triggering crawl:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/markets/brands
|
||||||
|
* List all brands with product counts and store presence
|
||||||
|
*/
|
||||||
|
router.get('/brands', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { search, limit = '100', offset = '0', sortBy = 'products' } = req.query;
|
||||||
|
const limitNum = Math.min(parseInt(limit as string, 10), 500);
|
||||||
|
const offsetNum = parseInt(offset as string, 10);
|
||||||
|
|
||||||
|
let whereClause = 'WHERE brand_name_raw IS NOT NULL AND brand_name_raw != \'\'';
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (search) {
|
||||||
|
whereClause += ` AND brand_name_raw ILIKE $${paramIndex}`;
|
||||||
|
params.push(`%${search}%`);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine sort column
|
||||||
|
let orderBy = 'product_count DESC';
|
||||||
|
if (sortBy === 'stores') {
|
||||||
|
orderBy = 'store_count DESC';
|
||||||
|
} else if (sortBy === 'name') {
|
||||||
|
orderBy = 'brand_name ASC';
|
||||||
|
}
|
||||||
|
|
||||||
|
params.push(limitNum, offsetNum);
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
brand_name_raw as brand_name,
|
||||||
|
COUNT(*) as product_count,
|
||||||
|
COUNT(DISTINCT dispensary_id) as store_count,
|
||||||
|
AVG(price_rec) FILTER (WHERE price_rec > 0) as avg_price,
|
||||||
|
array_agg(DISTINCT category_raw) FILTER (WHERE category_raw IS NOT NULL) as categories,
|
||||||
|
MIN(first_seen_at) as first_seen_at,
|
||||||
|
MAX(last_seen_at) as last_seen_at
|
||||||
|
FROM store_products
|
||||||
|
${whereClause}
|
||||||
|
GROUP BY brand_name_raw
|
||||||
|
ORDER BY ${orderBy}
|
||||||
|
LIMIT $${paramIndex} OFFSET $${paramIndex + 1}
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
// Get total count
|
||||||
|
const { rows: countRows } = await pool.query(`
|
||||||
|
SELECT COUNT(DISTINCT brand_name_raw) as total
|
||||||
|
FROM store_products
|
||||||
|
${whereClause}
|
||||||
|
`, params.slice(0, -2));
|
||||||
|
|
||||||
|
// Calculate summary stats
|
||||||
|
const { rows: summaryRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(DISTINCT brand_name_raw) as total_brands,
|
||||||
|
AVG(product_count) as avg_products_per_brand
|
||||||
|
FROM (
|
||||||
|
SELECT brand_name_raw, COUNT(*) as product_count
|
||||||
|
FROM store_products
|
||||||
|
WHERE brand_name_raw IS NOT NULL AND brand_name_raw != ''
|
||||||
|
GROUP BY brand_name_raw
|
||||||
|
) brand_counts
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
brands: rows.map((r: any, idx: number) => ({
|
||||||
|
id: idx + 1 + offsetNum,
|
||||||
|
name: r.brand_name,
|
||||||
|
normalized_name: null,
|
||||||
|
product_count: parseInt(r.product_count, 10),
|
||||||
|
store_count: parseInt(r.store_count, 10),
|
||||||
|
avg_price: r.avg_price ? parseFloat(r.avg_price) : null,
|
||||||
|
categories: r.categories || [],
|
||||||
|
is_portfolio: false,
|
||||||
|
first_seen_at: r.first_seen_at,
|
||||||
|
last_seen_at: r.last_seen_at,
|
||||||
|
})),
|
||||||
|
total: parseInt(countRows[0]?.total || '0', 10),
|
||||||
|
summary: {
|
||||||
|
total_brands: parseInt(summaryRows[0]?.total_brands || '0', 10),
|
||||||
|
portfolio_brands: 0,
|
||||||
|
avg_products_per_brand: Math.round(parseFloat(summaryRows[0]?.avg_products_per_brand || '0')),
|
||||||
|
top_categories: [],
|
||||||
|
},
|
||||||
|
limit: limitNum,
|
||||||
|
offset: offsetNum,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Markets] Error fetching brands:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/markets/categories
|
||||||
|
* List all categories with product counts
|
||||||
|
*/
|
||||||
|
router.get('/categories', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { search, limit = '100' } = req.query;
|
||||||
|
const limitNum = Math.min(parseInt(limit as string, 10), 500);
|
||||||
|
|
||||||
|
let whereClause = 'WHERE category_raw IS NOT NULL AND category_raw != \'\'';
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (search) {
|
||||||
|
whereClause += ` AND category_raw ILIKE $${paramIndex}`;
|
||||||
|
params.push(`%${search}%`);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
params.push(limitNum);
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
category_raw as name,
|
||||||
|
COUNT(*) as product_count,
|
||||||
|
COUNT(DISTINCT dispensary_id) as store_count,
|
||||||
|
AVG(price_rec) FILTER (WHERE price_rec > 0) as avg_price
|
||||||
|
FROM store_products
|
||||||
|
${whereClause}
|
||||||
|
GROUP BY category_raw
|
||||||
|
ORDER BY product_count DESC
|
||||||
|
LIMIT $${paramIndex}
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
categories: rows.map((r: any, idx: number) => ({
|
||||||
|
id: idx + 1,
|
||||||
|
name: r.name,
|
||||||
|
product_count: parseInt(r.product_count, 10),
|
||||||
|
store_count: parseInt(r.store_count, 10),
|
||||||
|
avg_price: r.avg_price ? parseFloat(r.avg_price) : null,
|
||||||
|
})),
|
||||||
|
total: rows.length,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Markets] Error fetching categories:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -24,37 +24,22 @@ const router = Router();
|
|||||||
*/
|
*/
|
||||||
router.get('/metrics', async (_req: Request, res: Response) => {
|
router.get('/metrics', async (_req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
// Get aggregate metrics
|
// Get aggregate metrics using 7-stage pipeline
|
||||||
const { rows: metrics } = await pool.query(`
|
const { rows: metrics } = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
(SELECT COUNT(*) FROM dutchie_products) as total_products,
|
(SELECT COUNT(*) FROM store_products) as total_products,
|
||||||
(SELECT COUNT(DISTINCT brand_name) FROM dutchie_products WHERE brand_name IS NOT NULL) as total_brands,
|
(SELECT COUNT(DISTINCT brand_name_raw) FROM store_products WHERE brand_name_raw IS NOT NULL) as total_brands,
|
||||||
(SELECT COUNT(*) FROM dispensaries WHERE state = 'AZ') as total_stores,
|
(SELECT COUNT(*) FROM dispensaries WHERE menu_type = 'dutchie' AND crawl_enabled = true) as total_stores,
|
||||||
(
|
-- Stage counts from dispensaries table (7-stage pipeline)
|
||||||
SELECT COUNT(*)
|
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'discovered') as discovered_count,
|
||||||
FROM dispensary_crawler_profiles dcp
|
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'validated') as validated_count,
|
||||||
WHERE dcp.enabled = true
|
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'promoted') as promoted_count,
|
||||||
AND (dcp.status = 'production' OR (dcp.config->>'status')::text = 'production')
|
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'sandbox') as sandbox_count,
|
||||||
) as healthy_count,
|
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'hydrating') as hydrating_count,
|
||||||
(
|
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'production') as production_count,
|
||||||
SELECT COUNT(*)
|
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'failing') as failing_count,
|
||||||
FROM dispensary_crawler_profiles dcp
|
-- Discovery pipeline counts
|
||||||
WHERE dcp.enabled = true
|
(SELECT COUNT(*) FROM dutchie_discovery_locations WHERE stage = 'discovered' AND active = true) as discovery_pending
|
||||||
AND (dcp.status = 'sandbox' OR (dcp.config->>'status')::text = 'sandbox')
|
|
||||||
) as sandbox_count,
|
|
||||||
(
|
|
||||||
SELECT COUNT(*)
|
|
||||||
FROM dispensary_crawler_profiles dcp
|
|
||||||
WHERE dcp.enabled = true
|
|
||||||
AND (dcp.status = 'needs_manual' OR (dcp.config->>'status')::text = 'needs_manual')
|
|
||||||
) as needs_manual_count,
|
|
||||||
(
|
|
||||||
SELECT COUNT(*)
|
|
||||||
FROM dispensary_crawler_profiles dcp
|
|
||||||
JOIN dispensaries d ON d.id = dcp.dispensary_id
|
|
||||||
WHERE d.state = 'AZ'
|
|
||||||
AND dcp.status = 'needs_manual'
|
|
||||||
) as failing_count
|
|
||||||
`);
|
`);
|
||||||
|
|
||||||
const row = metrics[0] || {};
|
const row = metrics[0] || {};
|
||||||
@@ -63,13 +48,22 @@ router.get('/metrics', async (_req: Request, res: Response) => {
|
|||||||
total_products: parseInt(row.total_products || '0', 10),
|
total_products: parseInt(row.total_products || '0', 10),
|
||||||
total_brands: parseInt(row.total_brands || '0', 10),
|
total_brands: parseInt(row.total_brands || '0', 10),
|
||||||
total_stores: parseInt(row.total_stores || '0', 10),
|
total_stores: parseInt(row.total_stores || '0', 10),
|
||||||
// Placeholder sentiment values - these would come from actual analytics
|
// 7-Stage Pipeline Counts
|
||||||
market_sentiment: 'neutral',
|
stages: {
|
||||||
market_direction: 'stable',
|
discovered: parseInt(row.discovered_count || '0', 10),
|
||||||
// Health counts
|
validated: parseInt(row.validated_count || '0', 10),
|
||||||
healthy_count: parseInt(row.healthy_count || '0', 10),
|
promoted: parseInt(row.promoted_count || '0', 10),
|
||||||
|
sandbox: parseInt(row.sandbox_count || '0', 10),
|
||||||
|
hydrating: parseInt(row.hydrating_count || '0', 10),
|
||||||
|
production: parseInt(row.production_count || '0', 10),
|
||||||
|
failing: parseInt(row.failing_count || '0', 10),
|
||||||
|
},
|
||||||
|
// Discovery pipeline
|
||||||
|
discovery_pending: parseInt(row.discovery_pending || '0', 10),
|
||||||
|
// Legacy compatibility
|
||||||
|
healthy_count: parseInt(row.production_count || '0', 10),
|
||||||
sandbox_count: parseInt(row.sandbox_count || '0', 10),
|
sandbox_count: parseInt(row.sandbox_count || '0', 10),
|
||||||
needs_manual_count: parseInt(row.needs_manual_count || '0', 10),
|
needs_manual_count: parseInt(row.failing_count || '0', 10),
|
||||||
failing_count: parseInt(row.failing_count || '0', 10),
|
failing_count: parseInt(row.failing_count || '0', 10),
|
||||||
});
|
});
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
@@ -117,12 +111,13 @@ router.get('/states', async (_req: Request, res: Response) => {
|
|||||||
* Returns list of stores with orchestrator status info
|
* Returns list of stores with orchestrator status info
|
||||||
* Query params:
|
* Query params:
|
||||||
* - state: Filter by state (e.g., "AZ")
|
* - state: Filter by state (e.g., "AZ")
|
||||||
|
* - crawl_enabled: Filter by crawl status (default: true, use "all" to show all, "false" for disabled only)
|
||||||
* - limit: Max results (default 100)
|
* - limit: Max results (default 100)
|
||||||
* - offset: Pagination offset
|
* - offset: Pagination offset
|
||||||
*/
|
*/
|
||||||
router.get('/stores', async (req: Request, res: Response) => {
|
router.get('/stores', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
const { state, limit = '100', offset = '0' } = req.query;
|
const { state, crawl_enabled, limit = '100', offset = '0' } = req.query;
|
||||||
|
|
||||||
let whereClause = 'WHERE 1=1';
|
let whereClause = 'WHERE 1=1';
|
||||||
const params: any[] = [];
|
const params: any[] = [];
|
||||||
@@ -134,6 +129,16 @@ router.get('/stores', async (req: Request, res: Response) => {
|
|||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filter by crawl_enabled - defaults to showing only enabled
|
||||||
|
if (crawl_enabled === 'false' || crawl_enabled === '0') {
|
||||||
|
whereClause += ` AND (d.crawl_enabled = false OR d.crawl_enabled IS NULL)`;
|
||||||
|
} else if (crawl_enabled === 'all') {
|
||||||
|
// Show all (no filter)
|
||||||
|
} else {
|
||||||
|
// Default: show only enabled
|
||||||
|
whereClause += ` AND d.crawl_enabled = true`;
|
||||||
|
}
|
||||||
|
|
||||||
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
|
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
|
||||||
|
|
||||||
const { rows } = await pool.query(`
|
const { rows } = await pool.query(`
|
||||||
@@ -145,9 +150,15 @@ router.get('/stores', async (req: Request, res: Response) => {
|
|||||||
d.menu_type as provider,
|
d.menu_type as provider,
|
||||||
d.platform_dispensary_id,
|
d.platform_dispensary_id,
|
||||||
d.last_crawl_at,
|
d.last_crawl_at,
|
||||||
|
d.crawl_enabled,
|
||||||
|
d.stage,
|
||||||
|
d.stage_changed_at,
|
||||||
|
d.first_crawl_at,
|
||||||
|
d.last_successful_crawl_at,
|
||||||
dcp.id as profile_id,
|
dcp.id as profile_id,
|
||||||
dcp.profile_key,
|
dcp.profile_key,
|
||||||
COALESCE(dcp.status, dcp.config->>'status', 'legacy') as crawler_status,
|
dcp.consecutive_successes,
|
||||||
|
dcp.consecutive_failures,
|
||||||
(
|
(
|
||||||
SELECT MAX(cot.completed_at)
|
SELECT MAX(cot.completed_at)
|
||||||
FROM crawl_orchestration_traces cot
|
FROM crawl_orchestration_traces cot
|
||||||
@@ -160,8 +171,8 @@ router.get('/stores', async (req: Request, res: Response) => {
|
|||||||
) as last_failure_at,
|
) as last_failure_at,
|
||||||
(
|
(
|
||||||
SELECT COUNT(*)
|
SELECT COUNT(*)
|
||||||
FROM dutchie_products dp
|
FROM store_products sp
|
||||||
WHERE dp.dispensary_id = d.id
|
WHERE sp.dispensary_id = d.id
|
||||||
) as product_count
|
) as product_count
|
||||||
FROM dispensaries d
|
FROM dispensaries d
|
||||||
LEFT JOIN dispensary_crawler_profiles dcp
|
LEFT JOIN dispensary_crawler_profiles dcp
|
||||||
@@ -185,9 +196,17 @@ router.get('/stores', async (req: Request, res: Response) => {
|
|||||||
state: r.state,
|
state: r.state,
|
||||||
provider: r.provider || 'unknown',
|
provider: r.provider || 'unknown',
|
||||||
provider_raw: r.provider || null,
|
provider_raw: r.provider || null,
|
||||||
provider_display: getProviderDisplayName(r.provider),
|
// Admin routes show actual provider names (not anonymized)
|
||||||
|
provider_display: r.provider || 'Unknown',
|
||||||
platformDispensaryId: r.platform_dispensary_id,
|
platformDispensaryId: r.platform_dispensary_id,
|
||||||
status: r.crawler_status || (r.platform_dispensary_id ? 'legacy' : 'pending'),
|
crawlEnabled: r.crawl_enabled ?? false,
|
||||||
|
// Use stage from dispensaries table (6-stage pipeline)
|
||||||
|
stage: r.stage || 'discovered',
|
||||||
|
stageChangedAt: r.stage_changed_at,
|
||||||
|
firstCrawlAt: r.first_crawl_at,
|
||||||
|
lastSuccessfulCrawlAt: r.last_successful_crawl_at,
|
||||||
|
consecutiveSuccesses: r.consecutive_successes || 0,
|
||||||
|
consecutiveFailures: r.consecutive_failures || 0,
|
||||||
profileId: r.profile_id,
|
profileId: r.profile_id,
|
||||||
profileKey: r.profile_key,
|
profileKey: r.profile_key,
|
||||||
lastCrawlAt: r.last_crawl_at,
|
lastCrawlAt: r.last_crawl_at,
|
||||||
@@ -425,4 +444,392 @@ router.get('/crawl-traces/:traceId', async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STATUS MANAGEMENT
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
// 6-Stage Pipeline Statuses
|
||||||
|
const VALID_STAGES = ['discovered', 'validated', 'promoted', 'sandbox', 'production', 'failing'] as const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/admin/orchestrator/stores/:id/stage
|
||||||
|
* Manually update the stage for a store (use /api/pipeline for proper transitions)
|
||||||
|
* Body: { stage: 'discovered' | 'validated' | 'promoted' | 'sandbox' | 'production' | 'failing', reason?: string }
|
||||||
|
*/
|
||||||
|
router.post('/stores/:id/stage', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const { stage: status, reason } = req.body;
|
||||||
|
|
||||||
|
if (!status || !VALID_STAGES.includes(status)) {
|
||||||
|
return res.status(400).json({
|
||||||
|
error: `Invalid stage. Must be one of: ${VALID_STAGES.join(', ')}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const dispensaryId = parseInt(id, 10);
|
||||||
|
|
||||||
|
// Get current profile and status
|
||||||
|
const { rows: profileRows } = await pool.query(`
|
||||||
|
SELECT dcp.id, dcp.status as current_status, d.name as dispensary_name
|
||||||
|
FROM dispensary_crawler_profiles dcp
|
||||||
|
JOIN dispensaries d ON d.id = dcp.dispensary_id
|
||||||
|
WHERE dcp.dispensary_id = $1 AND dcp.enabled = true
|
||||||
|
ORDER BY dcp.updated_at DESC
|
||||||
|
LIMIT 1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (profileRows.length === 0) {
|
||||||
|
return res.status(404).json({ error: 'No crawler profile found for this store' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const profileId = profileRows[0].id;
|
||||||
|
const currentStatus = profileRows[0].current_status;
|
||||||
|
const dispensaryName = profileRows[0].dispensary_name;
|
||||||
|
|
||||||
|
// Update the status
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dispensary_crawler_profiles
|
||||||
|
SET
|
||||||
|
status = $1,
|
||||||
|
status_reason = $2,
|
||||||
|
status_changed_at = CURRENT_TIMESTAMP,
|
||||||
|
updated_at = CURRENT_TIMESTAMP
|
||||||
|
WHERE id = $3
|
||||||
|
`, [status, reason || `Manual status change to ${status}`, profileId]);
|
||||||
|
|
||||||
|
// Create status alert
|
||||||
|
const severity = status === 'production' ? 'info'
|
||||||
|
: status === 'needs_manual' ? 'warning'
|
||||||
|
: status === 'failing' ? 'error'
|
||||||
|
: 'info';
|
||||||
|
|
||||||
|
await pool.query(`
|
||||||
|
INSERT INTO crawler_status_alerts
|
||||||
|
(dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata)
|
||||||
|
VALUES ($1, $2, 'status_change', $3, $4, $5, $6, $7)
|
||||||
|
`, [
|
||||||
|
dispensaryId,
|
||||||
|
profileId,
|
||||||
|
severity,
|
||||||
|
`${dispensaryName}: Status changed from ${currentStatus || 'unknown'} to ${status}`,
|
||||||
|
currentStatus,
|
||||||
|
status,
|
||||||
|
JSON.stringify({ reason, changedBy: 'admin_api' }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
dispensaryId,
|
||||||
|
profileId,
|
||||||
|
previousStatus: currentStatus,
|
||||||
|
newStatus: status,
|
||||||
|
message: `Status updated to ${status}`,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[OrchestratorAdmin] Error updating status:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/admin/orchestrator/alerts
|
||||||
|
* Get recent status alerts for the dashboard
|
||||||
|
* Query params:
|
||||||
|
* - severity: Filter by severity (info, warning, error, critical)
|
||||||
|
* - acknowledged: Filter by acknowledged status (true/false)
|
||||||
|
* - limit: Max results (default 50)
|
||||||
|
*/
|
||||||
|
router.get('/alerts', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { severity, acknowledged, dispensary_id, limit = '50' } = req.query;
|
||||||
|
|
||||||
|
let whereClause = 'WHERE 1=1';
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (severity) {
|
||||||
|
whereClause += ` AND csa.severity = $${paramIndex}`;
|
||||||
|
params.push(severity);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (acknowledged === 'true') {
|
||||||
|
whereClause += ' AND csa.acknowledged = true';
|
||||||
|
} else if (acknowledged === 'false') {
|
||||||
|
whereClause += ' AND csa.acknowledged = false';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dispensary_id) {
|
||||||
|
whereClause += ` AND csa.dispensary_id = $${paramIndex}`;
|
||||||
|
params.push(parseInt(dispensary_id as string, 10));
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
params.push(parseInt(limit as string, 10));
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
csa.*,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state
|
||||||
|
FROM crawler_status_alerts csa
|
||||||
|
LEFT JOIN dispensaries d ON csa.dispensary_id = d.id
|
||||||
|
${whereClause}
|
||||||
|
ORDER BY csa.created_at DESC
|
||||||
|
LIMIT $${paramIndex}
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
// Get unacknowledged count by severity
|
||||||
|
const { rows: countRows } = await pool.query(`
|
||||||
|
SELECT severity, COUNT(*) as count
|
||||||
|
FROM crawler_status_alerts
|
||||||
|
WHERE acknowledged = false
|
||||||
|
GROUP BY severity
|
||||||
|
`);
|
||||||
|
|
||||||
|
const unacknowledgedCounts = countRows.reduce((acc: Record<string, number>, row: any) => {
|
||||||
|
acc[row.severity] = parseInt(row.count, 10);
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
alerts: rows.map((r: any) => ({
|
||||||
|
id: r.id,
|
||||||
|
dispensaryId: r.dispensary_id,
|
||||||
|
dispensaryName: r.dispensary_name,
|
||||||
|
city: r.city,
|
||||||
|
state: r.state,
|
||||||
|
profileId: r.profile_id,
|
||||||
|
alertType: r.alert_type,
|
||||||
|
severity: r.severity,
|
||||||
|
message: r.message,
|
||||||
|
previousStatus: r.previous_status,
|
||||||
|
newStatus: r.new_status,
|
||||||
|
errorDetails: r.error_details,
|
||||||
|
metadata: r.metadata,
|
||||||
|
acknowledged: r.acknowledged,
|
||||||
|
acknowledgedAt: r.acknowledged_at,
|
||||||
|
acknowledgedBy: r.acknowledged_by,
|
||||||
|
createdAt: r.created_at,
|
||||||
|
})),
|
||||||
|
unacknowledgedCounts,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[OrchestratorAdmin] Error fetching alerts:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/admin/orchestrator/alerts/:id/acknowledge
|
||||||
|
* Acknowledge an alert
|
||||||
|
*/
|
||||||
|
router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const { acknowledgedBy = 'admin' } = req.body;
|
||||||
|
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE crawler_status_alerts
|
||||||
|
SET acknowledged = true, acknowledged_at = CURRENT_TIMESTAMP, acknowledged_by = $1
|
||||||
|
WHERE id = $2
|
||||||
|
`, [acknowledgedBy, parseInt(id, 10)]);
|
||||||
|
|
||||||
|
res.json({ success: true, alertId: parseInt(id, 10) });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[OrchestratorAdmin] Error acknowledging alert:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/admin/orchestrator/alerts/acknowledge-all
|
||||||
|
* Acknowledge all unacknowledged alerts (optionally filtered)
|
||||||
|
*/
|
||||||
|
router.post('/alerts/acknowledge-all', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { severity, dispensaryId, acknowledgedBy = 'admin' } = req.body;
|
||||||
|
|
||||||
|
let whereClause = 'WHERE acknowledged = false';
|
||||||
|
const params: any[] = [acknowledgedBy];
|
||||||
|
let paramIndex = 2;
|
||||||
|
|
||||||
|
if (severity) {
|
||||||
|
whereClause += ` AND severity = $${paramIndex}`;
|
||||||
|
params.push(severity);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dispensaryId) {
|
||||||
|
whereClause += ` AND dispensary_id = $${paramIndex}`;
|
||||||
|
params.push(dispensaryId);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await pool.query(`
|
||||||
|
UPDATE crawler_status_alerts
|
||||||
|
SET acknowledged = true, acknowledged_at = CURRENT_TIMESTAMP, acknowledged_by = $1
|
||||||
|
${whereClause}
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
res.json({ success: true, acknowledgedCount: result.rowCount });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[OrchestratorAdmin] Error acknowledging alerts:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/admin/orchestrator/crawl-outcome
|
||||||
|
* Record a crawl outcome and update status based on success/failure
|
||||||
|
* This endpoint is called by the crawler after each crawl attempt
|
||||||
|
*/
|
||||||
|
router.post('/crawl-outcome', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const {
|
||||||
|
dispensaryId,
|
||||||
|
success,
|
||||||
|
productsFound = 0,
|
||||||
|
error,
|
||||||
|
metadata = {},
|
||||||
|
} = req.body;
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
return res.status(400).json({ error: 'dispensaryId is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get current profile
|
||||||
|
const { rows: profileRows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
dcp.id,
|
||||||
|
dcp.status,
|
||||||
|
dcp.consecutive_successes,
|
||||||
|
dcp.consecutive_failures,
|
||||||
|
d.name as dispensary_name
|
||||||
|
FROM dispensary_crawler_profiles dcp
|
||||||
|
JOIN dispensaries d ON d.id = dcp.dispensary_id
|
||||||
|
WHERE dcp.dispensary_id = $1 AND dcp.enabled = true
|
||||||
|
ORDER BY dcp.updated_at DESC
|
||||||
|
LIMIT 1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (profileRows.length === 0) {
|
||||||
|
return res.status(404).json({ error: 'No crawler profile found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const profile = profileRows[0];
|
||||||
|
const currentStatus = profile.status;
|
||||||
|
let newStatus = currentStatus;
|
||||||
|
let statusChanged = false;
|
||||||
|
let consecutiveSuccesses = profile.consecutive_successes || 0;
|
||||||
|
let consecutiveFailures = profile.consecutive_failures || 0;
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
consecutiveSuccesses++;
|
||||||
|
consecutiveFailures = 0;
|
||||||
|
|
||||||
|
// Auto-promote from sandbox to production after 3 consecutive successes
|
||||||
|
if (currentStatus === 'sandbox' && consecutiveSuccesses >= 3) {
|
||||||
|
newStatus = 'production';
|
||||||
|
statusChanged = true;
|
||||||
|
}
|
||||||
|
// Auto-recover from needs_manual/failing after 2 consecutive successes
|
||||||
|
else if ((currentStatus === 'needs_manual' || currentStatus === 'failing') && consecutiveSuccesses >= 2) {
|
||||||
|
newStatus = 'production';
|
||||||
|
statusChanged = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
consecutiveFailures++;
|
||||||
|
consecutiveSuccesses = 0;
|
||||||
|
|
||||||
|
// Demote to needs_manual after 2 consecutive failures
|
||||||
|
if (currentStatus === 'production' && consecutiveFailures >= 2) {
|
||||||
|
newStatus = 'needs_manual';
|
||||||
|
statusChanged = true;
|
||||||
|
}
|
||||||
|
// Demote to failing after 5 consecutive failures
|
||||||
|
else if (currentStatus === 'needs_manual' && consecutiveFailures >= 5) {
|
||||||
|
newStatus = 'failing';
|
||||||
|
statusChanged = true;
|
||||||
|
}
|
||||||
|
// Keep sandbox as sandbox even with failures (needs manual intervention to fix)
|
||||||
|
else if (currentStatus === 'sandbox' && consecutiveFailures >= 3) {
|
||||||
|
newStatus = 'needs_manual';
|
||||||
|
statusChanged = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update profile
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE dispensary_crawler_profiles
|
||||||
|
SET
|
||||||
|
consecutive_successes = $1,
|
||||||
|
consecutive_failures = $2,
|
||||||
|
status = $3,
|
||||||
|
status_reason = CASE WHEN $4 THEN $5 ELSE status_reason END,
|
||||||
|
status_changed_at = CASE WHEN $4 THEN CURRENT_TIMESTAMP ELSE status_changed_at END,
|
||||||
|
updated_at = CURRENT_TIMESTAMP
|
||||||
|
WHERE id = $6
|
||||||
|
`, [
|
||||||
|
consecutiveSuccesses,
|
||||||
|
consecutiveFailures,
|
||||||
|
newStatus,
|
||||||
|
statusChanged,
|
||||||
|
statusChanged ? (success ? 'Auto-promoted after consecutive successes' : `Auto-demoted after ${consecutiveFailures} consecutive failures`) : null,
|
||||||
|
profile.id,
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Create alert if status changed or error occurred
|
||||||
|
if (statusChanged) {
|
||||||
|
const severity = newStatus === 'production' ? 'info'
|
||||||
|
: newStatus === 'needs_manual' ? 'warning'
|
||||||
|
: 'error';
|
||||||
|
|
||||||
|
await pool.query(`
|
||||||
|
INSERT INTO crawler_status_alerts
|
||||||
|
(dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata)
|
||||||
|
VALUES ($1, $2, 'status_change', $3, $4, $5, $6, $7)
|
||||||
|
`, [
|
||||||
|
dispensaryId,
|
||||||
|
profile.id,
|
||||||
|
severity,
|
||||||
|
`${profile.dispensary_name}: ${success ? 'Promoted' : 'Demoted'} from ${currentStatus} to ${newStatus}`,
|
||||||
|
currentStatus,
|
||||||
|
newStatus,
|
||||||
|
JSON.stringify({ productsFound, consecutiveSuccesses, consecutiveFailures, ...metadata }),
|
||||||
|
]);
|
||||||
|
} else if (!success && error) {
|
||||||
|
// Log crawl error as alert
|
||||||
|
await pool.query(`
|
||||||
|
INSERT INTO crawler_status_alerts
|
||||||
|
(dispensary_id, profile_id, alert_type, severity, message, error_details, metadata)
|
||||||
|
VALUES ($1, $2, 'crawl_error', $3, $4, $5, $6)
|
||||||
|
`, [
|
||||||
|
dispensaryId,
|
||||||
|
profile.id,
|
||||||
|
consecutiveFailures >= 2 ? 'warning' : 'info',
|
||||||
|
`${profile.dispensary_name}: Crawl failed - ${error}`,
|
||||||
|
JSON.stringify({ error, stack: metadata.stack }),
|
||||||
|
JSON.stringify({ consecutiveFailures, ...metadata }),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
dispensaryId,
|
||||||
|
profileId: profile.id,
|
||||||
|
statusChanged,
|
||||||
|
previousStatus: currentStatus,
|
||||||
|
newStatus,
|
||||||
|
consecutiveSuccesses,
|
||||||
|
consecutiveFailures,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[OrchestratorAdmin] Error recording crawl outcome:', error.message);
|
||||||
|
res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
1084
backend/src/routes/pipeline.ts
Normal file
1084
backend/src/routes/pipeline.ts
Normal file
File diff suppressed because it is too large
Load Diff
472
backend/src/routes/price-analytics.ts
Normal file
472
backend/src/routes/price-analytics.ts
Normal file
@@ -0,0 +1,472 @@
|
|||||||
|
/**
|
||||||
|
* Price Analytics API Routes
|
||||||
|
*
|
||||||
|
* Endpoints for price history, specials, and price comparison analytics
|
||||||
|
* Uses the new product_variants and product_variant_snapshots tables
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// PRICE HISTORY
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/price-analytics/products/:id/history
|
||||||
|
* Get price and stock history for a product variant
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - days: Number of days to look back (default: 30, max: 90)
|
||||||
|
* - option: Specific variant option (e.g., "1g", "3.5g")
|
||||||
|
*/
|
||||||
|
router.get('/products/:id/history', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const { days = '30', option } = req.query;
|
||||||
|
const daysNum = Math.min(parseInt(days as string, 10) || 30, 90);
|
||||||
|
|
||||||
|
// Get product info
|
||||||
|
const productResult = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
sp.id, sp.name_raw as name, sp.brand_name_raw as brand,
|
||||||
|
sp.category_raw as category, sp.dispensary_id,
|
||||||
|
d.name as dispensary_name
|
||||||
|
FROM store_products sp
|
||||||
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
WHERE sp.id = $1
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
if (productResult.rows.length === 0) {
|
||||||
|
return res.status(404).json({ error: 'Product not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const product = productResult.rows[0];
|
||||||
|
|
||||||
|
// Get variant history
|
||||||
|
let variantQuery = `
|
||||||
|
SELECT
|
||||||
|
pv.id as variant_id,
|
||||||
|
pv.option,
|
||||||
|
pvs.price_rec,
|
||||||
|
pvs.price_med,
|
||||||
|
pvs.price_rec_special,
|
||||||
|
pvs.price_med_special,
|
||||||
|
pvs.quantity,
|
||||||
|
pvs.in_stock,
|
||||||
|
pvs.is_on_special,
|
||||||
|
pvs.captured_at
|
||||||
|
FROM product_variant_snapshots pvs
|
||||||
|
JOIN product_variants pv ON pv.id = pvs.product_variant_id
|
||||||
|
WHERE pv.store_product_id = $1
|
||||||
|
AND pvs.captured_at >= NOW() - ($2 || ' days')::INTERVAL
|
||||||
|
`;
|
||||||
|
const params: any[] = [id, daysNum];
|
||||||
|
|
||||||
|
if (option) {
|
||||||
|
variantQuery += ` AND pv.option = $3`;
|
||||||
|
params.push(option);
|
||||||
|
}
|
||||||
|
|
||||||
|
variantQuery += ` ORDER BY pv.option, pvs.captured_at ASC`;
|
||||||
|
|
||||||
|
const historyResult = await pool.query(variantQuery, params);
|
||||||
|
|
||||||
|
// Get current variants
|
||||||
|
const currentResult = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id, option, price_rec, price_med, price_rec_special, price_med_special,
|
||||||
|
quantity, in_stock, is_on_special, last_price_change_at, last_stock_change_at
|
||||||
|
FROM product_variants
|
||||||
|
WHERE store_product_id = $1
|
||||||
|
ORDER BY option
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
// Get sale stats using the function
|
||||||
|
const saleStatsResult = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
pv.option,
|
||||||
|
(get_variant_sale_stats(pv.id, $2)).*
|
||||||
|
FROM product_variants pv
|
||||||
|
WHERE pv.store_product_id = $1
|
||||||
|
`, [id, daysNum]);
|
||||||
|
|
||||||
|
// Group history by variant
|
||||||
|
const historyByVariant: Record<string, any[]> = {};
|
||||||
|
for (const row of historyResult.rows) {
|
||||||
|
if (!historyByVariant[row.option]) {
|
||||||
|
historyByVariant[row.option] = [];
|
||||||
|
}
|
||||||
|
historyByVariant[row.option].push({
|
||||||
|
price_rec: row.price_rec ? parseFloat(row.price_rec) : null,
|
||||||
|
price_med: row.price_med ? parseFloat(row.price_med) : null,
|
||||||
|
price_rec_special: row.price_rec_special ? parseFloat(row.price_rec_special) : null,
|
||||||
|
quantity: row.quantity,
|
||||||
|
in_stock: row.in_stock,
|
||||||
|
is_on_special: row.is_on_special,
|
||||||
|
captured_at: row.captured_at,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
product: {
|
||||||
|
id: product.id,
|
||||||
|
name: product.name,
|
||||||
|
brand: product.brand,
|
||||||
|
category: product.category,
|
||||||
|
dispensary_id: product.dispensary_id,
|
||||||
|
dispensary_name: product.dispensary_name,
|
||||||
|
},
|
||||||
|
current_variants: currentResult.rows.map((v: any) => ({
|
||||||
|
...v,
|
||||||
|
price_rec: v.price_rec ? parseFloat(v.price_rec) : null,
|
||||||
|
price_med: v.price_med ? parseFloat(v.price_med) : null,
|
||||||
|
price_rec_special: v.price_rec_special ? parseFloat(v.price_rec_special) : null,
|
||||||
|
})),
|
||||||
|
history: historyByVariant,
|
||||||
|
sale_stats: saleStatsResult.rows.reduce((acc: any, row: any) => {
|
||||||
|
acc[row.option] = {
|
||||||
|
total_snapshots: parseInt(row.total_snapshots),
|
||||||
|
times_on_special: parseInt(row.times_on_special),
|
||||||
|
special_frequency_pct: row.special_frequency_pct ? parseFloat(row.special_frequency_pct) : 0,
|
||||||
|
avg_discount_pct: row.avg_discount_pct ? parseFloat(row.avg_discount_pct) : null,
|
||||||
|
min_price: row.min_price ? parseFloat(row.min_price) : null,
|
||||||
|
max_price: row.max_price ? parseFloat(row.max_price) : null,
|
||||||
|
avg_price: row.avg_price ? parseFloat(row.avg_price) : null,
|
||||||
|
};
|
||||||
|
return acc;
|
||||||
|
}, {}),
|
||||||
|
days: daysNum,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Product history error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch product history', message: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// CURRENT SPECIALS
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/price-analytics/specials
|
||||||
|
* Get all products currently on special
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - state: Filter by state code
|
||||||
|
* - city: Filter by city
|
||||||
|
* - category: Filter by category
|
||||||
|
* - min_discount: Minimum discount percentage
|
||||||
|
* - limit: Max results (default: 100, max: 500)
|
||||||
|
* - offset: Pagination offset
|
||||||
|
*/
|
||||||
|
router.get('/specials', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const {
|
||||||
|
state,
|
||||||
|
city,
|
||||||
|
category,
|
||||||
|
min_discount = '0',
|
||||||
|
limit = '100',
|
||||||
|
offset = '0',
|
||||||
|
} = req.query;
|
||||||
|
|
||||||
|
const limitNum = Math.min(parseInt(limit as string, 10) || 100, 500);
|
||||||
|
const offsetNum = parseInt(offset as string, 10) || 0;
|
||||||
|
const minDiscountNum = parseFloat(min_discount as string) || 0;
|
||||||
|
|
||||||
|
let whereClause = `WHERE pv.is_on_special = TRUE AND pv.in_stock = TRUE`;
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (state) {
|
||||||
|
whereClause += ` AND d.state = $${paramIndex}`;
|
||||||
|
params.push(state);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (city) {
|
||||||
|
whereClause += ` AND LOWER(d.city) LIKE LOWER($${paramIndex})`;
|
||||||
|
params.push(`%${city}%`);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (category) {
|
||||||
|
whereClause += ` AND LOWER(sp.category_raw) = LOWER($${paramIndex})`;
|
||||||
|
params.push(category);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate discount and filter
|
||||||
|
const discountCalc = `ROUND(((pv.price_rec - pv.price_rec_special) / NULLIF(pv.price_rec, 0)) * 100, 1)`;
|
||||||
|
|
||||||
|
if (minDiscountNum > 0) {
|
||||||
|
whereClause += ` AND ${discountCalc} >= $${paramIndex}`;
|
||||||
|
params.push(minDiscountNum);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
params.push(limitNum, offsetNum);
|
||||||
|
|
||||||
|
const { rows: specials } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
pv.id as variant_id,
|
||||||
|
sp.id as product_id,
|
||||||
|
sp.name_raw as product_name,
|
||||||
|
sp.brand_name_raw as brand_name,
|
||||||
|
sp.category_raw as category,
|
||||||
|
sp.image_url,
|
||||||
|
d.id as dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
pv.option,
|
||||||
|
pv.price_rec,
|
||||||
|
pv.price_rec_special,
|
||||||
|
${discountCalc} as discount_percent,
|
||||||
|
pv.quantity,
|
||||||
|
pv.last_seen_at
|
||||||
|
FROM product_variants pv
|
||||||
|
JOIN store_products sp ON sp.id = pv.store_product_id
|
||||||
|
JOIN dispensaries d ON d.id = pv.dispensary_id
|
||||||
|
${whereClause}
|
||||||
|
AND pv.price_rec_special IS NOT NULL
|
||||||
|
AND pv.price_rec_special < pv.price_rec
|
||||||
|
ORDER BY ${discountCalc} DESC
|
||||||
|
LIMIT $${paramIndex} OFFSET $${paramIndex + 1}
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
// Get count
|
||||||
|
const countParams = params.slice(0, -2);
|
||||||
|
const { rows: countRows } = await pool.query(`
|
||||||
|
SELECT COUNT(*) as total
|
||||||
|
FROM product_variants pv
|
||||||
|
JOIN store_products sp ON sp.id = pv.store_product_id
|
||||||
|
JOIN dispensaries d ON d.id = pv.dispensary_id
|
||||||
|
${whereClause}
|
||||||
|
AND pv.price_rec_special IS NOT NULL
|
||||||
|
AND pv.price_rec_special < pv.price_rec
|
||||||
|
`, countParams);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
specials: specials.map((s: any) => ({
|
||||||
|
...s,
|
||||||
|
price_rec: s.price_rec ? parseFloat(s.price_rec) : null,
|
||||||
|
price_rec_special: s.price_rec_special ? parseFloat(s.price_rec_special) : null,
|
||||||
|
discount_percent: s.discount_percent ? parseFloat(s.discount_percent) : null,
|
||||||
|
})),
|
||||||
|
pagination: {
|
||||||
|
total: parseInt(countRows[0]?.total || '0', 10),
|
||||||
|
limit: limitNum,
|
||||||
|
offset: offsetNum,
|
||||||
|
has_more: offsetNum + specials.length < parseInt(countRows[0]?.total || '0', 10),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Specials error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch specials', message: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// PRICE COMPARISON
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/price-analytics/compare
|
||||||
|
* Compare prices for a product across stores
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - name: Product name to search
|
||||||
|
* - option: Variant option (e.g., "1g", "3.5g")
|
||||||
|
* - state: Filter by state
|
||||||
|
* - limit: Max results (default: 50)
|
||||||
|
*/
|
||||||
|
router.get('/compare', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { name, option, state, limit = '50' } = req.query;
|
||||||
|
|
||||||
|
if (!name) {
|
||||||
|
return res.status(400).json({ error: 'Product name is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const limitNum = Math.min(parseInt(limit as string, 10) || 50, 200);
|
||||||
|
|
||||||
|
let whereClause = `WHERE sp.name_raw ILIKE $1 AND pv.in_stock = TRUE`;
|
||||||
|
const params: any[] = [`%${name}%`];
|
||||||
|
let paramIndex = 2;
|
||||||
|
|
||||||
|
if (option) {
|
||||||
|
whereClause += ` AND pv.option = $${paramIndex}`;
|
||||||
|
params.push(option);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state) {
|
||||||
|
whereClause += ` AND d.state = $${paramIndex}`;
|
||||||
|
params.push(state);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
params.push(limitNum);
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
sp.id as product_id,
|
||||||
|
sp.name_raw as product_name,
|
||||||
|
sp.brand_name_raw as brand_name,
|
||||||
|
sp.category_raw as category,
|
||||||
|
sp.image_url,
|
||||||
|
d.id as dispensary_id,
|
||||||
|
d.name as dispensary_name,
|
||||||
|
d.city,
|
||||||
|
d.state,
|
||||||
|
pv.option,
|
||||||
|
pv.price_rec,
|
||||||
|
pv.price_rec_special,
|
||||||
|
pv.is_on_special,
|
||||||
|
pv.quantity,
|
||||||
|
COALESCE(pv.price_rec_special, pv.price_rec) as effective_price,
|
||||||
|
RANK() OVER (PARTITION BY pv.option ORDER BY COALESCE(pv.price_rec_special, pv.price_rec) ASC) as price_rank
|
||||||
|
FROM product_variants pv
|
||||||
|
JOIN store_products sp ON sp.id = pv.store_product_id
|
||||||
|
JOIN dispensaries d ON d.id = pv.dispensary_id
|
||||||
|
${whereClause}
|
||||||
|
AND (pv.price_rec IS NOT NULL OR pv.price_rec_special IS NOT NULL)
|
||||||
|
ORDER BY pv.option, effective_price ASC
|
||||||
|
LIMIT $${paramIndex}
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
// Group by option
|
||||||
|
const byOption: Record<string, any[]> = {};
|
||||||
|
for (const row of rows) {
|
||||||
|
if (!byOption[row.option]) {
|
||||||
|
byOption[row.option] = [];
|
||||||
|
}
|
||||||
|
byOption[row.option].push({
|
||||||
|
product_id: row.product_id,
|
||||||
|
product_name: row.product_name,
|
||||||
|
brand_name: row.brand_name,
|
||||||
|
category: row.category,
|
||||||
|
image_url: row.image_url,
|
||||||
|
dispensary_id: row.dispensary_id,
|
||||||
|
dispensary_name: row.dispensary_name,
|
||||||
|
city: row.city,
|
||||||
|
state: row.state,
|
||||||
|
price_rec: row.price_rec ? parseFloat(row.price_rec) : null,
|
||||||
|
price_rec_special: row.price_rec_special ? parseFloat(row.price_rec_special) : null,
|
||||||
|
effective_price: row.effective_price ? parseFloat(row.effective_price) : null,
|
||||||
|
is_on_special: row.is_on_special,
|
||||||
|
quantity: row.quantity,
|
||||||
|
price_rank: parseInt(row.price_rank),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate stats per option
|
||||||
|
const stats: Record<string, any> = {};
|
||||||
|
for (const [opt, items] of Object.entries(byOption)) {
|
||||||
|
const prices = items.map((i: any) => i.effective_price).filter((p: any) => p !== null);
|
||||||
|
stats[opt] = {
|
||||||
|
count: items.length,
|
||||||
|
min_price: Math.min(...prices),
|
||||||
|
max_price: Math.max(...prices),
|
||||||
|
avg_price: prices.reduce((a: number, b: number) => a + b, 0) / prices.length,
|
||||||
|
cheapest_store: items[0]?.dispensary_name,
|
||||||
|
on_special_count: items.filter((i: any) => i.is_on_special).length,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
search_term: name,
|
||||||
|
results: byOption,
|
||||||
|
stats,
|
||||||
|
options: Object.keys(byOption),
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Price compare error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to compare prices', message: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// MARKET SUMMARY
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/price-analytics/market-summary
|
||||||
|
* Get overall market analytics summary
|
||||||
|
*/
|
||||||
|
router.get('/market-summary', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { state } = req.query;
|
||||||
|
|
||||||
|
let stateFilter = '';
|
||||||
|
const params: any[] = [];
|
||||||
|
if (state) {
|
||||||
|
stateFilter = 'WHERE d.state = $1';
|
||||||
|
params.push(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get variant counts
|
||||||
|
const variantStats = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(DISTINCT pv.id) as total_variants,
|
||||||
|
COUNT(DISTINCT pv.id) FILTER (WHERE pv.is_on_special) as on_special,
|
||||||
|
COUNT(DISTINCT pv.id) FILTER (WHERE pv.in_stock) as in_stock,
|
||||||
|
COUNT(DISTINCT pv.store_product_id) as total_products,
|
||||||
|
COUNT(DISTINCT pv.dispensary_id) as total_stores
|
||||||
|
FROM product_variants pv
|
||||||
|
JOIN dispensaries d ON d.id = pv.dispensary_id
|
||||||
|
${stateFilter}
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
// Get category breakdown
|
||||||
|
const categoryStats = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
sp.category_raw as category,
|
||||||
|
COUNT(DISTINCT pv.id) as variant_count,
|
||||||
|
AVG(COALESCE(pv.price_rec_special, pv.price_rec)) as avg_price,
|
||||||
|
COUNT(DISTINCT pv.id) FILTER (WHERE pv.is_on_special) as on_special_count
|
||||||
|
FROM product_variants pv
|
||||||
|
JOIN store_products sp ON sp.id = pv.store_product_id
|
||||||
|
JOIN dispensaries d ON d.id = pv.dispensary_id
|
||||||
|
${stateFilter}
|
||||||
|
GROUP BY sp.category_raw
|
||||||
|
ORDER BY variant_count DESC
|
||||||
|
LIMIT 10
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
// Get recent price changes (last 24h)
|
||||||
|
const recentChanges = await pool.query(`
|
||||||
|
SELECT COUNT(*) as price_changes_24h
|
||||||
|
FROM product_variants pv
|
||||||
|
JOIN dispensaries d ON d.id = pv.dispensary_id
|
||||||
|
${stateFilter ? stateFilter + ' AND' : 'WHERE'}
|
||||||
|
pv.last_price_change_at >= NOW() - INTERVAL '24 hours'
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
summary: {
|
||||||
|
total_variants: parseInt(variantStats.rows[0]?.total_variants || '0'),
|
||||||
|
on_special: parseInt(variantStats.rows[0]?.on_special || '0'),
|
||||||
|
in_stock: parseInt(variantStats.rows[0]?.in_stock || '0'),
|
||||||
|
total_products: parseInt(variantStats.rows[0]?.total_products || '0'),
|
||||||
|
total_stores: parseInt(variantStats.rows[0]?.total_stores || '0'),
|
||||||
|
price_changes_24h: parseInt(recentChanges.rows[0]?.price_changes_24h || '0'),
|
||||||
|
},
|
||||||
|
categories: categoryStats.rows.map((c: any) => ({
|
||||||
|
category: c.category || 'Unknown',
|
||||||
|
variant_count: parseInt(c.variant_count),
|
||||||
|
avg_price: c.avg_price ? parseFloat(c.avg_price).toFixed(2) : null,
|
||||||
|
on_special_count: parseInt(c.on_special_count),
|
||||||
|
})),
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Market summary error:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch market summary', message: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -1,11 +1,29 @@
|
|||||||
import { Router } from 'express';
|
import { Router } from 'express';
|
||||||
import { authMiddleware } from '../auth/middleware';
|
import { authMiddleware } from '../auth/middleware';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import { getImageUrl } from '../utils/minio';
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
router.use(authMiddleware);
|
router.use(authMiddleware);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert local image path to proxy URL
|
||||||
|
* /images/products/... -> /img/products/...
|
||||||
|
*/
|
||||||
|
function getImageUrl(localPath: string): string {
|
||||||
|
if (!localPath) return '';
|
||||||
|
// If already a full URL, return as-is
|
||||||
|
if (localPath.startsWith('http')) return localPath;
|
||||||
|
// Convert /images/ path to /img/ proxy path
|
||||||
|
if (localPath.startsWith('/images/')) {
|
||||||
|
return '/img' + localPath.substring(7);
|
||||||
|
}
|
||||||
|
// Handle paths without leading slash
|
||||||
|
if (localPath.startsWith('images/')) {
|
||||||
|
return '/img/' + localPath.substring(7);
|
||||||
|
}
|
||||||
|
return '/img/' + localPath;
|
||||||
|
}
|
||||||
|
|
||||||
// Freshness threshold: data older than this is considered stale
|
// Freshness threshold: data older than this is considered stale
|
||||||
const STALE_THRESHOLD_HOURS = 4;
|
const STALE_THRESHOLD_HOURS = 4;
|
||||||
|
|
||||||
@@ -101,10 +119,27 @@ router.get('/', async (req, res) => {
|
|||||||
const sortDirection = (sort_order as string).toLowerCase() === 'asc' ? 'ASC' : 'DESC';
|
const sortDirection = (sort_order as string).toLowerCase() === 'asc' ? 'ASC' : 'DESC';
|
||||||
|
|
||||||
let query = `
|
let query = `
|
||||||
SELECT p.*, s.name as store_name, c.name as category_name
|
SELECT
|
||||||
FROM products p
|
p.id,
|
||||||
LEFT JOIN stores s ON p.store_id = s.id
|
p.dispensary_id as store_id,
|
||||||
LEFT JOIN categories c ON p.category_id = c.id
|
p.name_raw as name,
|
||||||
|
p.brand_name_raw as brand,
|
||||||
|
p.category_raw as category_name,
|
||||||
|
p.subcategory_raw as subcategory,
|
||||||
|
p.description,
|
||||||
|
p.price_rec as price,
|
||||||
|
p.thc_percent as thc_percentage,
|
||||||
|
p.cbd_percent as cbd_percentage,
|
||||||
|
p.strain_type,
|
||||||
|
p.primary_image_url as image_url,
|
||||||
|
p.stock_status,
|
||||||
|
p.stock_status = 'in_stock' as in_stock,
|
||||||
|
p.created_at,
|
||||||
|
p.updated_at,
|
||||||
|
p.last_seen_at,
|
||||||
|
d.name as store_name
|
||||||
|
FROM store_products p
|
||||||
|
LEFT JOIN dispensaries d ON p.dispensary_id = d.id
|
||||||
WHERE 1=1
|
WHERE 1=1
|
||||||
`;
|
`;
|
||||||
const params: any[] = [];
|
const params: any[] = [];
|
||||||
@@ -112,61 +147,60 @@ router.get('/', async (req, res) => {
|
|||||||
|
|
||||||
// Store filter
|
// Store filter
|
||||||
if (store_id) {
|
if (store_id) {
|
||||||
query += ` AND p.store_id = $${paramCount}`;
|
query += ` AND p.dispensary_id = $${paramCount}`;
|
||||||
params.push(store_id);
|
params.push(store_id);
|
||||||
paramCount++;
|
paramCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Category filter
|
// Category filter (uses category name now)
|
||||||
if (category_id) {
|
if (category_id) {
|
||||||
query += ` AND p.category_id = $${paramCount}`;
|
query += ` AND p.category_raw = $${paramCount}`;
|
||||||
params.push(category_id);
|
params.push(category_id);
|
||||||
paramCount++;
|
paramCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stock filter
|
// Stock filter
|
||||||
if (in_stock !== undefined) {
|
if (in_stock !== undefined) {
|
||||||
query += ` AND p.in_stock = $${paramCount}`;
|
const inStockVal = in_stock === 'true';
|
||||||
params.push(in_stock === 'true');
|
query += inStockVal ? ` AND p.stock_status = 'in_stock'` : ` AND p.stock_status != 'in_stock'`;
|
||||||
paramCount++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Search filter
|
// Search filter
|
||||||
if (search) {
|
if (search) {
|
||||||
query += ` AND (p.name ILIKE $${paramCount} OR p.brand ILIKE $${paramCount} OR p.description ILIKE $${paramCount})`;
|
query += ` AND (p.name_raw ILIKE $${paramCount} OR p.brand_name_raw ILIKE $${paramCount} OR p.description ILIKE $${paramCount})`;
|
||||||
params.push(`%${search}%`);
|
params.push(`%${search}%`);
|
||||||
paramCount++;
|
paramCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Brand filter
|
// Brand filter
|
||||||
if (brand) {
|
if (brand) {
|
||||||
query += ` AND p.brand ILIKE $${paramCount}`;
|
query += ` AND p.brand_name_raw ILIKE $${paramCount}`;
|
||||||
params.push(`%${brand}%`);
|
params.push(`%${brand}%`);
|
||||||
paramCount++;
|
paramCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Price range filter
|
// Price range filter
|
||||||
if (min_price) {
|
if (min_price) {
|
||||||
query += ` AND p.price >= $${paramCount}`;
|
query += ` AND p.price_rec >= $${paramCount}`;
|
||||||
params.push(parseFloat(min_price as string));
|
params.push(parseFloat(min_price as string));
|
||||||
paramCount++;
|
paramCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_price) {
|
if (max_price) {
|
||||||
query += ` AND p.price <= $${paramCount}`;
|
query += ` AND p.price_rec <= $${paramCount}`;
|
||||||
params.push(parseFloat(max_price as string));
|
params.push(parseFloat(max_price as string));
|
||||||
paramCount++;
|
paramCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// THC range filter
|
// THC range filter
|
||||||
if (min_thc) {
|
if (min_thc) {
|
||||||
query += ` AND p.thc_percentage >= $${paramCount}`;
|
query += ` AND p.thc_percent >= $${paramCount}`;
|
||||||
params.push(parseFloat(min_thc as string));
|
params.push(parseFloat(min_thc as string));
|
||||||
paramCount++;
|
paramCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_thc) {
|
if (max_thc) {
|
||||||
query += ` AND p.thc_percentage <= $${paramCount}`;
|
query += ` AND p.thc_percent <= $${paramCount}`;
|
||||||
params.push(parseFloat(max_thc as string));
|
params.push(parseFloat(max_thc as string));
|
||||||
paramCount++;
|
paramCount++;
|
||||||
}
|
}
|
||||||
@@ -199,60 +233,59 @@ router.get('/', async (req, res) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get total count (reuse same filters)
|
// Get total count (reuse same filters)
|
||||||
let countQuery = `SELECT COUNT(*) FROM products p WHERE 1=1`;
|
let countQuery = `SELECT COUNT(*) FROM store_products p WHERE 1=1`;
|
||||||
const countParams: any[] = [];
|
const countParams: any[] = [];
|
||||||
let countParamCount = 1;
|
let countParamCount = 1;
|
||||||
|
|
||||||
if (store_id) {
|
if (store_id) {
|
||||||
countQuery += ` AND p.store_id = $${countParamCount}`;
|
countQuery += ` AND p.dispensary_id = $${countParamCount}`;
|
||||||
countParams.push(store_id);
|
countParams.push(store_id);
|
||||||
countParamCount++;
|
countParamCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (category_id) {
|
if (category_id) {
|
||||||
countQuery += ` AND p.category_id = $${countParamCount}`;
|
countQuery += ` AND p.category_raw = $${countParamCount}`;
|
||||||
countParams.push(category_id);
|
countParams.push(category_id);
|
||||||
countParamCount++;
|
countParamCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (in_stock !== undefined) {
|
if (in_stock !== undefined) {
|
||||||
countQuery += ` AND p.in_stock = $${countParamCount}`;
|
const inStockVal = in_stock === 'true';
|
||||||
countParams.push(in_stock === 'true');
|
countQuery += inStockVal ? ` AND p.stock_status = 'in_stock'` : ` AND p.stock_status != 'in_stock'`;
|
||||||
countParamCount++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (search) {
|
if (search) {
|
||||||
countQuery += ` AND (p.name ILIKE $${countParamCount} OR p.brand ILIKE $${countParamCount} OR p.description ILIKE $${countParamCount})`;
|
countQuery += ` AND (p.name_raw ILIKE $${countParamCount} OR p.brand_name_raw ILIKE $${countParamCount} OR p.description ILIKE $${countParamCount})`;
|
||||||
countParams.push(`%${search}%`);
|
countParams.push(`%${search}%`);
|
||||||
countParamCount++;
|
countParamCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (brand) {
|
if (brand) {
|
||||||
countQuery += ` AND p.brand ILIKE $${countParamCount}`;
|
countQuery += ` AND p.brand_name_raw ILIKE $${countParamCount}`;
|
||||||
countParams.push(`%${brand}%`);
|
countParams.push(`%${brand}%`);
|
||||||
countParamCount++;
|
countParamCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (min_price) {
|
if (min_price) {
|
||||||
countQuery += ` AND p.price >= $${countParamCount}`;
|
countQuery += ` AND p.price_rec >= $${countParamCount}`;
|
||||||
countParams.push(parseFloat(min_price as string));
|
countParams.push(parseFloat(min_price as string));
|
||||||
countParamCount++;
|
countParamCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_price) {
|
if (max_price) {
|
||||||
countQuery += ` AND p.price <= $${countParamCount}`;
|
countQuery += ` AND p.price_rec <= $${countParamCount}`;
|
||||||
countParams.push(parseFloat(max_price as string));
|
countParams.push(parseFloat(max_price as string));
|
||||||
countParamCount++;
|
countParamCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (min_thc) {
|
if (min_thc) {
|
||||||
countQuery += ` AND p.thc_percentage >= $${countParamCount}`;
|
countQuery += ` AND p.thc_percent >= $${countParamCount}`;
|
||||||
countParams.push(parseFloat(min_thc as string));
|
countParams.push(parseFloat(min_thc as string));
|
||||||
countParamCount++;
|
countParamCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_thc) {
|
if (max_thc) {
|
||||||
countQuery += ` AND p.thc_percentage <= $${countParamCount}`;
|
countQuery += ` AND p.thc_percent <= $${countParamCount}`;
|
||||||
countParams.push(parseFloat(max_thc as string));
|
countParams.push(parseFloat(max_thc as string));
|
||||||
countParamCount++;
|
countParamCount++;
|
||||||
}
|
}
|
||||||
@@ -271,7 +304,7 @@ router.get('/', async (req, res) => {
|
|||||||
|
|
||||||
if (store_id) {
|
if (store_id) {
|
||||||
const storeResult = await pool.query(
|
const storeResult = await pool.query(
|
||||||
'SELECT id, name, last_scraped_at FROM stores WHERE id = $1',
|
'SELECT id, name, last_crawled_at as last_scraped_at FROM dispensaries WHERE id = $1',
|
||||||
[store_id]
|
[store_id]
|
||||||
);
|
);
|
||||||
if (storeResult.rows.length > 0) {
|
if (storeResult.rows.length > 0) {
|
||||||
@@ -322,10 +355,27 @@ router.get('/:id', async (req, res) => {
|
|||||||
const { fields } = req.query;
|
const { fields } = req.query;
|
||||||
|
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
SELECT p.*, s.name as store_name, c.name as category_name
|
SELECT
|
||||||
FROM products p
|
p.id,
|
||||||
LEFT JOIN stores s ON p.store_id = s.id
|
p.dispensary_id as store_id,
|
||||||
LEFT JOIN categories c ON p.category_id = c.id
|
p.name_raw as name,
|
||||||
|
p.brand_name_raw as brand,
|
||||||
|
p.category_raw as category_name,
|
||||||
|
p.subcategory_raw as subcategory,
|
||||||
|
p.description,
|
||||||
|
p.price_rec as price,
|
||||||
|
p.thc_percent as thc_percentage,
|
||||||
|
p.cbd_percent as cbd_percentage,
|
||||||
|
p.strain_type,
|
||||||
|
p.primary_image_url as image_url,
|
||||||
|
p.stock_status,
|
||||||
|
p.stock_status = 'in_stock' as in_stock,
|
||||||
|
p.created_at,
|
||||||
|
p.updated_at,
|
||||||
|
p.last_seen_at,
|
||||||
|
d.name as store_name
|
||||||
|
FROM store_products p
|
||||||
|
LEFT JOIN dispensaries d ON p.dispensary_id = d.id
|
||||||
WHERE p.id = $1
|
WHERE p.id = $1
|
||||||
`, [id]);
|
`, [id]);
|
||||||
|
|
||||||
@@ -359,18 +409,18 @@ router.get('/meta/brands', async (req, res) => {
|
|||||||
const { store_id } = req.query;
|
const { store_id } = req.query;
|
||||||
|
|
||||||
let query = `
|
let query = `
|
||||||
SELECT DISTINCT brand
|
SELECT DISTINCT brand_name_raw as brand
|
||||||
FROM products
|
FROM store_products
|
||||||
WHERE brand IS NOT NULL AND brand != ''
|
WHERE brand_name_raw IS NOT NULL AND brand_name_raw != ''
|
||||||
`;
|
`;
|
||||||
const params: any[] = [];
|
const params: any[] = [];
|
||||||
|
|
||||||
if (store_id) {
|
if (store_id) {
|
||||||
query += ' AND store_id = $1';
|
query += ' AND dispensary_id = $1';
|
||||||
params.push(store_id);
|
params.push(store_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
query += ' ORDER BY brand';
|
query += ' ORDER BY brand_name_raw';
|
||||||
|
|
||||||
const result = await pool.query(query, params);
|
const result = await pool.query(query, params);
|
||||||
const brands = result.rows.map((row: { brand: string }) => row.brand);
|
const brands = result.rows.map((row: { brand: string }) => row.brand);
|
||||||
@@ -389,16 +439,16 @@ router.get('/meta/price-range', async (req, res) => {
|
|||||||
|
|
||||||
let query = `
|
let query = `
|
||||||
SELECT
|
SELECT
|
||||||
MIN(price) as min_price,
|
MIN(price_rec) as min_price,
|
||||||
MAX(price) as max_price,
|
MAX(price_rec) as max_price,
|
||||||
AVG(price) as avg_price
|
AVG(price_rec) as avg_price
|
||||||
FROM products
|
FROM store_products
|
||||||
WHERE price IS NOT NULL
|
WHERE price_rec IS NOT NULL
|
||||||
`;
|
`;
|
||||||
const params: any[] = [];
|
const params: any[] = [];
|
||||||
|
|
||||||
if (store_id) {
|
if (store_id) {
|
||||||
query += ' AND store_id = $1';
|
query += ' AND dispensary_id = $1';
|
||||||
params.push(store_id);
|
params.push(store_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -415,4 +465,133 @@ router.get('/meta/price-range', async (req, res) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Get product stats - inventory movement, price history, etc.
|
||||||
|
router.get('/:id/stats', async (req, res) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
|
||||||
|
// Get current product info
|
||||||
|
const productResult = await pool.query(`
|
||||||
|
SELECT id, name_raw as name, stock_quantity, total_quantity_available,
|
||||||
|
price_rec, price_rec_special, price_med, price_med_special,
|
||||||
|
first_seen_at, last_seen_at
|
||||||
|
FROM store_products
|
||||||
|
WHERE id = $1
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
if (productResult.rows.length === 0) {
|
||||||
|
return res.status(404).json({ error: 'Product not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const product = productResult.rows[0];
|
||||||
|
const currentQty = product.stock_quantity || product.total_quantity_available || 0;
|
||||||
|
const currentPrice = parseFloat(product.price_rec) || 0;
|
||||||
|
|
||||||
|
// Get snapshot history for the last 30 days
|
||||||
|
const historyResult = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
DATE(crawled_at) as date,
|
||||||
|
AVG(COALESCE(stock_quantity, total_quantity_available, 0)) as avg_quantity,
|
||||||
|
MIN(COALESCE(stock_quantity, total_quantity_available, 0)) as min_quantity,
|
||||||
|
MAX(COALESCE(stock_quantity, total_quantity_available, 0)) as max_quantity,
|
||||||
|
AVG(price_rec) as avg_price,
|
||||||
|
MIN(price_rec) as min_price,
|
||||||
|
MAX(price_rec) as max_price,
|
||||||
|
COUNT(*) as snapshot_count
|
||||||
|
FROM store_product_snapshots
|
||||||
|
WHERE store_product_id = $1
|
||||||
|
AND crawled_at >= NOW() - INTERVAL '30 days'
|
||||||
|
GROUP BY DATE(crawled_at)
|
||||||
|
ORDER BY date DESC
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
// Calculate inventory movement stats
|
||||||
|
const history = historyResult.rows;
|
||||||
|
const today = history[0] || null;
|
||||||
|
const weekAgo = history.find((h: any) => {
|
||||||
|
const date = new Date(h.date);
|
||||||
|
const diff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24);
|
||||||
|
return diff >= 6 && diff <= 8;
|
||||||
|
});
|
||||||
|
const monthAgo = history.find((h: any) => {
|
||||||
|
const date = new Date(h.date);
|
||||||
|
const diff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24);
|
||||||
|
return diff >= 27 && diff <= 31;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Inventory movement calculations
|
||||||
|
const inventoryStats = {
|
||||||
|
current: currentQty,
|
||||||
|
daily: today ? {
|
||||||
|
change: currentQty - (parseFloat(today.avg_quantity) || 0),
|
||||||
|
start: parseFloat(today.avg_quantity) || 0,
|
||||||
|
end: currentQty
|
||||||
|
} : null,
|
||||||
|
weekly: weekAgo ? {
|
||||||
|
change: currentQty - (parseFloat(weekAgo.avg_quantity) || 0),
|
||||||
|
start: parseFloat(weekAgo.avg_quantity) || 0,
|
||||||
|
end: currentQty,
|
||||||
|
percent_change: weekAgo.avg_quantity > 0
|
||||||
|
? ((currentQty - parseFloat(weekAgo.avg_quantity)) / parseFloat(weekAgo.avg_quantity) * 100).toFixed(1)
|
||||||
|
: null
|
||||||
|
} : null,
|
||||||
|
monthly: monthAgo ? {
|
||||||
|
change: currentQty - (parseFloat(monthAgo.avg_quantity) || 0),
|
||||||
|
start: parseFloat(monthAgo.avg_quantity) || 0,
|
||||||
|
end: currentQty,
|
||||||
|
percent_change: monthAgo.avg_quantity > 0
|
||||||
|
? ((currentQty - parseFloat(monthAgo.avg_quantity)) / parseFloat(monthAgo.avg_quantity) * 100).toFixed(1)
|
||||||
|
: null
|
||||||
|
} : null
|
||||||
|
};
|
||||||
|
|
||||||
|
// Price movement calculations
|
||||||
|
const priceStats = {
|
||||||
|
current: currentPrice,
|
||||||
|
weekly: weekAgo ? {
|
||||||
|
change: currentPrice - (parseFloat(weekAgo.avg_price) || 0),
|
||||||
|
start: parseFloat(weekAgo.avg_price) || 0,
|
||||||
|
end: currentPrice,
|
||||||
|
percent_change: weekAgo.avg_price > 0
|
||||||
|
? ((currentPrice - parseFloat(weekAgo.avg_price)) / parseFloat(weekAgo.avg_price) * 100).toFixed(1)
|
||||||
|
: null
|
||||||
|
} : null,
|
||||||
|
monthly: monthAgo ? {
|
||||||
|
change: currentPrice - (parseFloat(monthAgo.avg_price) || 0),
|
||||||
|
start: parseFloat(monthAgo.avg_price) || 0,
|
||||||
|
end: currentPrice,
|
||||||
|
percent_change: monthAgo.avg_price > 0
|
||||||
|
? ((currentPrice - parseFloat(monthAgo.avg_price)) / parseFloat(monthAgo.avg_price) * 100).toFixed(1)
|
||||||
|
: null
|
||||||
|
} : null
|
||||||
|
};
|
||||||
|
|
||||||
|
// Get total snapshots count
|
||||||
|
const snapshotCountResult = await pool.query(`
|
||||||
|
SELECT COUNT(*) as total_snapshots
|
||||||
|
FROM store_product_snapshots
|
||||||
|
WHERE store_product_id = $1
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
product_id: parseInt(id),
|
||||||
|
product_name: product.name,
|
||||||
|
first_seen: product.first_seen_at,
|
||||||
|
last_seen: product.last_seen_at,
|
||||||
|
total_snapshots: parseInt(snapshotCountResult.rows[0].total_snapshots),
|
||||||
|
inventory: inventoryStats,
|
||||||
|
price: priceStats,
|
||||||
|
history: history.slice(0, 30).map((h: any) => ({
|
||||||
|
date: h.date,
|
||||||
|
avg_quantity: parseFloat(h.avg_quantity) || 0,
|
||||||
|
avg_price: parseFloat(h.avg_price) || 0,
|
||||||
|
snapshots: parseInt(h.snapshot_count)
|
||||||
|
}))
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching product stats:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch product stats' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import { Router } from 'express';
|
|||||||
import { authMiddleware, requireRole } from '../auth/middleware';
|
import { authMiddleware, requireRole } from '../auth/middleware';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import { testProxy, addProxy, addProxiesFromList } from '../services/proxy';
|
import { testProxy, addProxy, addProxiesFromList } from '../services/proxy';
|
||||||
import { createProxyTestJob, getProxyTestJob, getActiveProxyTestJob, cancelProxyTestJob } from '../services/proxyTestQueue';
|
import { createProxyTestJob, getProxyTestJob, getActiveProxyTestJob, cancelProxyTestJob, ProxyTestMode } from '../services/proxyTestQueue';
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
router.use(authMiddleware);
|
router.use(authMiddleware);
|
||||||
@@ -11,9 +11,10 @@ router.use(authMiddleware);
|
|||||||
router.get('/', async (req, res) => {
|
router.get('/', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
SELECT id, host, port, protocol, active, is_anonymous,
|
SELECT id, host, port, protocol, username, password, active, is_anonymous,
|
||||||
last_tested_at, test_result, response_time_ms, created_at,
|
last_tested_at, test_result, response_time_ms, created_at,
|
||||||
city, state, country, country_code, location_updated_at
|
city, state, country, country_code, location_updated_at,
|
||||||
|
COALESCE(max_connections, 1) as max_connections
|
||||||
FROM proxies
|
FROM proxies
|
||||||
ORDER BY created_at DESC
|
ORDER BY created_at DESC
|
||||||
`);
|
`);
|
||||||
@@ -166,13 +167,39 @@ router.post('/:id/test', requireRole('superadmin', 'admin'), async (req, res) =>
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Start proxy test job
|
// Start proxy test job
|
||||||
|
// Query params: mode=all|failed|inactive, concurrency=10
|
||||||
router.post('/test-all', requireRole('superadmin', 'admin'), async (req, res) => {
|
router.post('/test-all', requireRole('superadmin', 'admin'), async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const jobId = await createProxyTestJob();
|
const mode = (req.query.mode as ProxyTestMode) || 'all';
|
||||||
res.json({ jobId, message: 'Proxy test job started' });
|
const concurrency = parseInt(req.query.concurrency as string) || 10;
|
||||||
} catch (error) {
|
|
||||||
|
// Validate mode
|
||||||
|
if (!['all', 'failed', 'inactive'].includes(mode)) {
|
||||||
|
return res.status(400).json({ error: 'Invalid mode. Use: all, failed, or inactive' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate concurrency (1-50)
|
||||||
|
if (concurrency < 1 || concurrency > 50) {
|
||||||
|
return res.status(400).json({ error: 'Concurrency must be between 1 and 50' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const { jobId, totalProxies } = await createProxyTestJob(mode, concurrency);
|
||||||
|
res.json({ jobId, total: totalProxies, mode, concurrency, message: `Proxy test job started (mode: ${mode}, concurrency: ${concurrency})` });
|
||||||
|
} catch (error: any) {
|
||||||
console.error('Error starting proxy test job:', error);
|
console.error('Error starting proxy test job:', error);
|
||||||
res.status(500).json({ error: 'Failed to start proxy test job' });
|
res.status(500).json({ error: error.message || 'Failed to start proxy test job' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Convenience endpoint: Test only failed proxies
|
||||||
|
router.post('/test-failed', requireRole('superadmin', 'admin'), async (req, res) => {
|
||||||
|
try {
|
||||||
|
const concurrency = parseInt(req.query.concurrency as string) || 10;
|
||||||
|
const { jobId, totalProxies } = await createProxyTestJob('failed', concurrency);
|
||||||
|
res.json({ jobId, total: totalProxies, mode: 'failed', concurrency, message: 'Retesting failed proxies...' });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Error starting failed proxy test:', error);
|
||||||
|
res.status(500).json({ error: error.message || 'Failed to start proxy test job' });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -197,8 +224,8 @@ router.post('/test-job/:jobId/cancel', requireRole('superadmin', 'admin'), async
|
|||||||
router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
|
router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { id } = req.params;
|
const { id } = req.params;
|
||||||
const { host, port, protocol, username, password, active } = req.body;
|
const { host, port, protocol, username, password, active, max_connections } = req.body;
|
||||||
|
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
UPDATE proxies
|
UPDATE proxies
|
||||||
SET host = COALESCE($1, host),
|
SET host = COALESCE($1, host),
|
||||||
@@ -207,10 +234,11 @@ router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
|
|||||||
username = COALESCE($4, username),
|
username = COALESCE($4, username),
|
||||||
password = COALESCE($5, password),
|
password = COALESCE($5, password),
|
||||||
active = COALESCE($6, active),
|
active = COALESCE($6, active),
|
||||||
|
max_connections = COALESCE($7, max_connections),
|
||||||
updated_at = CURRENT_TIMESTAMP
|
updated_at = CURRENT_TIMESTAMP
|
||||||
WHERE id = $7
|
WHERE id = $8
|
||||||
RETURNING *
|
RETURNING *
|
||||||
`, [host, port, protocol, username, password, active, id]);
|
`, [host, port, protocol, username, password, active, max_connections, id]);
|
||||||
|
|
||||||
if (result.rows.length === 0) {
|
if (result.rows.length === 0) {
|
||||||
return res.status(404).json({ error: 'Proxy not found' });
|
return res.status(404).json({ error: 'Proxy not found' });
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -92,9 +92,9 @@ router.get('/history', async (req, res) => {
|
|||||||
dcj.error_message,
|
dcj.error_message,
|
||||||
(
|
(
|
||||||
SELECT COUNT(*)
|
SELECT COUNT(*)
|
||||||
FROM products p
|
FROM store_products sp
|
||||||
WHERE p.dispensary_id = d.id
|
WHERE sp.dispensary_id = d.id
|
||||||
AND p.last_seen_at >= NOW() - INTERVAL '7 days'
|
AND sp.last_seen_at >= NOW() - INTERVAL '7 days'
|
||||||
) as product_count
|
) as product_count
|
||||||
FROM dispensary_crawl_jobs dcj
|
FROM dispensary_crawl_jobs dcj
|
||||||
JOIN dispensaries d ON d.id = dcj.dispensary_id
|
JOIN dispensaries d ON d.id = dcj.dispensary_id
|
||||||
|
|||||||
@@ -10,6 +10,25 @@ import { getPool } from '../db/pool';
|
|||||||
import { authMiddleware } from '../auth/middleware';
|
import { authMiddleware } from '../auth/middleware';
|
||||||
import { ContentValidator } from '../utils/ContentValidator';
|
import { ContentValidator } from '../utils/ContentValidator';
|
||||||
import { generateSeoPageWithClaude } from '../services/seoGenerator';
|
import { generateSeoPageWithClaude } from '../services/seoGenerator';
|
||||||
|
import {
|
||||||
|
getAllSettings,
|
||||||
|
setSetting,
|
||||||
|
setMultipleSettings,
|
||||||
|
resetToDefaults,
|
||||||
|
ensureSettingsExist,
|
||||||
|
DEFAULT_SETTINGS,
|
||||||
|
} from '../seo/settings';
|
||||||
|
import {
|
||||||
|
applyTemplateVariables,
|
||||||
|
getTemplateForPageType,
|
||||||
|
generatePreview,
|
||||||
|
generatePageContent,
|
||||||
|
regenerateContent,
|
||||||
|
getAllTemplates,
|
||||||
|
validateTemplate,
|
||||||
|
MOCK_DATA,
|
||||||
|
PageType,
|
||||||
|
} from '../seo/template-engine';
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
@@ -160,10 +179,12 @@ router.get('/pages', authMiddleware, async (req: Request, res: Response) => {
|
|||||||
const metricsResult = await pool.query(`
|
const metricsResult = await pool.query(`
|
||||||
SELECT COUNT(DISTINCT d.id) as dispensary_count,
|
SELECT COUNT(DISTINCT d.id) as dispensary_count,
|
||||||
COUNT(DISTINCT p.id) as product_count,
|
COUNT(DISTINCT p.id) as product_count,
|
||||||
COUNT(DISTINCT p.brand_name) as brand_count
|
COUNT(DISTINCT p.brand_name_raw) as brand_count
|
||||||
FROM dispensaries d
|
FROM dispensaries d
|
||||||
LEFT JOIN dutchie_products p ON p.dispensary_id = d.id
|
LEFT JOIN store_products p ON p.dispensary_id = d.id
|
||||||
WHERE d.state = $1
|
WHERE d.state = $1
|
||||||
|
AND d.menu_type = 'dutchie'
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
`, [stateCode]);
|
`, [stateCode]);
|
||||||
const m = metricsResult.rows[0];
|
const m = metricsResult.rows[0];
|
||||||
metrics = {
|
metrics = {
|
||||||
@@ -199,11 +220,13 @@ router.post('/sync-state-pages', authMiddleware, async (req: Request, res: Respo
|
|||||||
try {
|
try {
|
||||||
const pool = getPool();
|
const pool = getPool();
|
||||||
|
|
||||||
// Get all states that have dispensaries
|
// Get all states that have active/crawlable dispensaries
|
||||||
const statesResult = await pool.query(`
|
const statesResult = await pool.query(`
|
||||||
SELECT DISTINCT state, COUNT(*) as dispensary_count
|
SELECT DISTINCT state, COUNT(*) as dispensary_count
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
WHERE state IS NOT NULL AND state != ''
|
WHERE state IS NOT NULL AND state != ''
|
||||||
|
AND menu_type = 'dutchie'
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
GROUP BY state
|
GROUP BY state
|
||||||
HAVING COUNT(*) > 0
|
HAVING COUNT(*) > 0
|
||||||
ORDER BY state
|
ORDER BY state
|
||||||
@@ -245,6 +268,45 @@ router.post('/sync-state-pages', authMiddleware, async (req: Request, res: Respo
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/seo/state-metrics - Get all state metrics for SEO dashboard
|
||||||
|
*/
|
||||||
|
router.get('/state-metrics', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const pool = getPool();
|
||||||
|
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
d.state as state_code,
|
||||||
|
COALESCE(s.name, d.state) as state_name,
|
||||||
|
COUNT(DISTINCT d.id) as dispensary_count,
|
||||||
|
COUNT(DISTINCT sp.id) as product_count,
|
||||||
|
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) as brand_count
|
||||||
|
FROM dispensaries d
|
||||||
|
LEFT JOIN states s ON d.state = s.code
|
||||||
|
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||||
|
WHERE d.state IS NOT NULL AND d.state != ''
|
||||||
|
AND d.menu_type = 'dutchie'
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
GROUP BY d.state, s.name
|
||||||
|
ORDER BY dispensary_count DESC
|
||||||
|
`);
|
||||||
|
|
||||||
|
const states = result.rows.map(row => ({
|
||||||
|
stateCode: row.state_code,
|
||||||
|
stateName: row.state_name || row.state_code,
|
||||||
|
dispensaryCount: parseInt(row.dispensary_count, 10) || 0,
|
||||||
|
productCount: parseInt(row.product_count, 10) || 0,
|
||||||
|
brandCount: parseInt(row.brand_count, 10) || 0,
|
||||||
|
}));
|
||||||
|
|
||||||
|
res.json({ states });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO] Error fetching state metrics:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch state metrics' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/seo/state/:stateCode - State SEO data with metrics
|
* GET /api/seo/state/:stateCode - State SEO data with metrics
|
||||||
*/
|
*/
|
||||||
@@ -257,16 +319,20 @@ router.get('/state/:stateCode', async (req: Request, res: Response) => {
|
|||||||
const metricsResult = await pool.query(`
|
const metricsResult = await pool.query(`
|
||||||
SELECT COUNT(DISTINCT d.id) as dispensary_count,
|
SELECT COUNT(DISTINCT d.id) as dispensary_count,
|
||||||
COUNT(DISTINCT p.id) as product_count,
|
COUNT(DISTINCT p.id) as product_count,
|
||||||
COUNT(DISTINCT p.brand_name) as brand_count
|
COUNT(DISTINCT p.brand_name_raw) as brand_count
|
||||||
FROM dispensaries d
|
FROM dispensaries d
|
||||||
LEFT JOIN dutchie_products p ON p.dispensary_id = d.id
|
LEFT JOIN store_products p ON p.dispensary_id = d.id
|
||||||
WHERE d.state = $1`, [code]);
|
WHERE d.state = $1
|
||||||
|
AND d.menu_type = 'dutchie'
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL`, [code]);
|
||||||
|
|
||||||
const brandsResult = await pool.query(`
|
const brandsResult = await pool.query(`
|
||||||
SELECT brand_name, COUNT(*) as product_count
|
SELECT brand_name_raw as brand_name, COUNT(*) as product_count
|
||||||
FROM dutchie_products p JOIN dispensaries d ON p.dispensary_id = d.id
|
FROM store_products p JOIN dispensaries d ON p.dispensary_id = d.id
|
||||||
WHERE d.state = $1 AND p.brand_name IS NOT NULL
|
WHERE d.state = $1 AND p.brand_name_raw IS NOT NULL
|
||||||
GROUP BY brand_name ORDER BY product_count DESC LIMIT 10`, [code]);
|
AND d.menu_type = 'dutchie'
|
||||||
|
AND d.platform_dispensary_id IS NOT NULL
|
||||||
|
GROUP BY brand_name_raw ORDER BY product_count DESC LIMIT 10`, [code]);
|
||||||
|
|
||||||
const metrics = metricsResult.rows[0];
|
const metrics = metricsResult.rows[0];
|
||||||
const response = ContentValidator.sanitizeContent({
|
const response = ContentValidator.sanitizeContent({
|
||||||
@@ -359,4 +425,259 @@ router.get('/public/content', async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// SEO Settings Endpoints
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/seo/settings - Get all SEO settings
|
||||||
|
*/
|
||||||
|
router.get('/settings', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// Ensure settings exist on first access
|
||||||
|
await ensureSettingsExist();
|
||||||
|
|
||||||
|
const settings = await getAllSettings();
|
||||||
|
res.json({ settings });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO] Error fetching settings:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch SEO settings' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/seo/settings - Save a single setting
|
||||||
|
*/
|
||||||
|
router.post('/settings', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { key, value } = req.body;
|
||||||
|
|
||||||
|
if (!key || typeof key !== 'string') {
|
||||||
|
return res.status(400).json({ error: 'key is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value === undefined) {
|
||||||
|
return res.status(400).json({ error: 'value is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
await setSetting(key, value);
|
||||||
|
|
||||||
|
res.json({ success: true, key, value });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO] Error saving setting:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to save SEO setting' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/seo/settings/bulk - Save multiple settings at once
|
||||||
|
*/
|
||||||
|
router.post('/settings/bulk', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { settings } = req.body;
|
||||||
|
|
||||||
|
if (!settings || typeof settings !== 'object') {
|
||||||
|
return res.status(400).json({ error: 'settings object is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
await setMultipleSettings(settings);
|
||||||
|
|
||||||
|
res.json({ success: true, count: Object.keys(settings).length });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO] Error saving bulk settings:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to save SEO settings' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/seo/settings/reset - Reset all settings to defaults
|
||||||
|
*/
|
||||||
|
router.post('/settings/reset', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const settings = await resetToDefaults();
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
message: 'Settings reset to defaults',
|
||||||
|
settings,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO] Error resetting settings:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to reset SEO settings' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/seo/settings/defaults - Get default settings (without modifying DB)
|
||||||
|
*/
|
||||||
|
router.get('/settings/defaults', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
res.json({ settings: DEFAULT_SETTINGS });
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/seo/settings/preview - Preview merged prompt with sample variables
|
||||||
|
*/
|
||||||
|
router.post('/settings/preview', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { template, variables } = req.body;
|
||||||
|
|
||||||
|
if (!template || typeof template !== 'string') {
|
||||||
|
return res.status(400).json({ error: 'template is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sample variables for preview
|
||||||
|
const sampleVariables: Record<string, string> = {
|
||||||
|
page_type: 'state',
|
||||||
|
subject: 'Arizona Dispensaries',
|
||||||
|
focus_areas: 'local stores, product variety, pricing',
|
||||||
|
tone: 'informational',
|
||||||
|
length: 'medium',
|
||||||
|
state_name: 'Arizona',
|
||||||
|
state_code: 'AZ',
|
||||||
|
state_code_lower: 'az',
|
||||||
|
dispensary_count: '150',
|
||||||
|
improvement_areas: 'SEO keywords, local relevance',
|
||||||
|
...variables,
|
||||||
|
};
|
||||||
|
|
||||||
|
let preview = template;
|
||||||
|
for (const [key, value] of Object.entries(sampleVariables)) {
|
||||||
|
preview = preview.replace(new RegExp(`{{${key}}}`, 'g'), value);
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ preview, variables: sampleVariables });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO] Error generating preview:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to generate preview' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Template Library Endpoints
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/seo/templates - Get all templates with metadata
|
||||||
|
*/
|
||||||
|
router.get('/templates', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const templates = await getAllTemplates();
|
||||||
|
res.json({ templates });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO] Error fetching templates:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch templates' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/seo/templates/preview - Preview a template with mock data by page type
|
||||||
|
*/
|
||||||
|
router.post('/templates/preview', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { pageType, customTemplate } = req.body;
|
||||||
|
|
||||||
|
if (!pageType || typeof pageType !== 'string') {
|
||||||
|
return res.status(400).json({ error: 'pageType is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await generatePreview(pageType, customTemplate);
|
||||||
|
res.json(result);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO] Error generating template preview:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to generate template preview' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/seo/templates/validate - Validate a template string
|
||||||
|
*/
|
||||||
|
router.post('/templates/validate', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { template } = req.body;
|
||||||
|
|
||||||
|
if (!template || typeof template !== 'string') {
|
||||||
|
return res.status(400).json({ error: 'template is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const validation = validateTemplate(template);
|
||||||
|
res.json(validation);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO] Error validating template:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to validate template' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/seo/templates/generate - Generate content using a template
|
||||||
|
*/
|
||||||
|
router.post('/templates/generate', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { pageType, data } = req.body;
|
||||||
|
|
||||||
|
if (!pageType || typeof pageType !== 'string') {
|
||||||
|
return res.status(400).json({ error: 'pageType is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!data || typeof data !== 'object') {
|
||||||
|
return res.status(400).json({ error: 'data object is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await generatePageContent(pageType, data);
|
||||||
|
res.json(result);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO] Error generating from template:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to generate content from template' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/seo/templates/regenerate - Regenerate content with improvements
|
||||||
|
*/
|
||||||
|
router.post('/templates/regenerate', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { pageType, originalContent, newData, improvementAreas } = req.body;
|
||||||
|
|
||||||
|
if (!pageType || typeof pageType !== 'string') {
|
||||||
|
return res.status(400).json({ error: 'pageType is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!originalContent || typeof originalContent !== 'string') {
|
||||||
|
return res.status(400).json({ error: 'originalContent is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await regenerateContent(
|
||||||
|
pageType,
|
||||||
|
originalContent,
|
||||||
|
newData || {},
|
||||||
|
improvementAreas
|
||||||
|
);
|
||||||
|
|
||||||
|
res.json(result);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO] Error regenerating content:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to regenerate content' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/seo/templates/variables/:pageType - Get available variables for a page type
|
||||||
|
*/
|
||||||
|
router.get('/templates/variables/:pageType', authMiddleware, async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { pageType } = req.params;
|
||||||
|
const normalizedType = (pageType?.toLowerCase().trim() || 'state') as PageType;
|
||||||
|
|
||||||
|
const mockData = MOCK_DATA[normalizedType] || MOCK_DATA.state;
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
pageType: normalizedType,
|
||||||
|
variables: Object.keys(mockData),
|
||||||
|
sampleValues: mockData,
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO] Error fetching template variables:', error.message);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch template variables' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
@@ -78,6 +78,60 @@ router.put('/:key', requireRole('superadmin', 'admin'), async (req, res) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Test AI provider connection
|
||||||
|
router.post('/test-ai', requireRole('superadmin', 'admin'), async (req, res) => {
|
||||||
|
try {
|
||||||
|
const { provider, apiKey } = req.body;
|
||||||
|
|
||||||
|
if (!provider || !apiKey) {
|
||||||
|
return res.status(400).json({ success: false, error: 'Provider and API key required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (provider === 'anthropic') {
|
||||||
|
// Test Anthropic API
|
||||||
|
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-api-key': apiKey,
|
||||||
|
'anthropic-version': '2023-06-01'
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: 'claude-3-haiku-20240307',
|
||||||
|
max_tokens: 10,
|
||||||
|
messages: [{ role: 'user', content: 'Hi' }]
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
if (response.ok) {
|
||||||
|
res.json({ success: true, model: 'claude-3-haiku-20240307' });
|
||||||
|
} else {
|
||||||
|
const error = await response.json().catch(() => ({ error: { message: 'Unknown error' } }));
|
||||||
|
res.json({ success: false, error: error.error?.message || 'Invalid API key' });
|
||||||
|
}
|
||||||
|
} else if (provider === 'openai') {
|
||||||
|
// Test OpenAI API
|
||||||
|
const response = await fetch('https://api.openai.com/v1/models', {
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${apiKey}`
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (response.ok) {
|
||||||
|
res.json({ success: true, model: 'gpt-4' });
|
||||||
|
} else {
|
||||||
|
const error = await response.json().catch(() => ({ error: { message: 'Unknown error' } }));
|
||||||
|
res.json({ success: false, error: error.error?.message || 'Invalid API key' });
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
res.status(400).json({ success: false, error: 'Unknown provider' });
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Error testing AI connection:', error);
|
||||||
|
res.json({ success: false, error: error.message || 'Connection failed' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Update multiple settings at once
|
// Update multiple settings at once
|
||||||
router.put('/', requireRole('superadmin', 'admin'), async (req, res) => {
|
router.put('/', requireRole('superadmin', 'admin'), async (req, res) => {
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ function detectProvider(menuUrl: string | null): string {
|
|||||||
// Get all stores (from dispensaries table)
|
// Get all stores (from dispensaries table)
|
||||||
router.get('/', async (req, res) => {
|
router.get('/', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { city, state, menu_type } = req.query;
|
const { city, state, menu_type, crawl_enabled, dutchie_verified } = req.query;
|
||||||
|
|
||||||
let query = `
|
let query = `
|
||||||
SELECT
|
SELECT
|
||||||
@@ -79,18 +79,36 @@ router.get('/', async (req, res) => {
|
|||||||
slug,
|
slug,
|
||||||
city,
|
city,
|
||||||
state,
|
state,
|
||||||
address,
|
address1,
|
||||||
zip,
|
address2,
|
||||||
|
zipcode,
|
||||||
phone,
|
phone,
|
||||||
website,
|
website,
|
||||||
|
email,
|
||||||
latitude,
|
latitude,
|
||||||
longitude,
|
longitude,
|
||||||
|
timezone,
|
||||||
menu_url,
|
menu_url,
|
||||||
menu_type,
|
menu_type,
|
||||||
platform,
|
platform,
|
||||||
platform_dispensary_id,
|
platform_dispensary_id,
|
||||||
|
c_name,
|
||||||
|
chain_slug,
|
||||||
|
enterprise_id,
|
||||||
|
description,
|
||||||
|
logo_image,
|
||||||
|
banner_image,
|
||||||
|
offer_pickup,
|
||||||
|
offer_delivery,
|
||||||
|
offer_curbside_pickup,
|
||||||
|
is_medical,
|
||||||
|
is_recreational,
|
||||||
|
status,
|
||||||
|
country,
|
||||||
product_count,
|
product_count,
|
||||||
last_crawl_at,
|
last_crawl_at,
|
||||||
|
crawl_enabled,
|
||||||
|
dutchie_verified,
|
||||||
created_at,
|
created_at,
|
||||||
updated_at
|
updated_at
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
@@ -99,21 +117,45 @@ router.get('/', async (req, res) => {
|
|||||||
const params: any[] = [];
|
const params: any[] = [];
|
||||||
const conditions: string[] = [];
|
const conditions: string[] = [];
|
||||||
|
|
||||||
|
// Filter by city (partial match)
|
||||||
if (city) {
|
if (city) {
|
||||||
conditions.push(`city ILIKE $${params.length + 1}`);
|
conditions.push(`city ILIKE $${params.length + 1}`);
|
||||||
params.push(city);
|
params.push(`%${city}%`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filter by state
|
||||||
if (state) {
|
if (state) {
|
||||||
conditions.push(`state = $${params.length + 1}`);
|
conditions.push(`state = $${params.length + 1}`);
|
||||||
params.push(state);
|
params.push(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filter by menu_type
|
||||||
if (menu_type) {
|
if (menu_type) {
|
||||||
conditions.push(`menu_type = $${params.length + 1}`);
|
conditions.push(`menu_type = $${params.length + 1}`);
|
||||||
params.push(menu_type);
|
params.push(menu_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filter by crawl_enabled - defaults to showing only enabled
|
||||||
|
if (crawl_enabled === 'false' || crawl_enabled === '0') {
|
||||||
|
// Explicitly show disabled only
|
||||||
|
conditions.push(`(crawl_enabled = false OR crawl_enabled IS NULL)`);
|
||||||
|
} else if (crawl_enabled === 'all') {
|
||||||
|
// Show all (no filter)
|
||||||
|
} else {
|
||||||
|
// Default: show only enabled
|
||||||
|
conditions.push(`crawl_enabled = true`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter by dutchie_verified
|
||||||
|
if (dutchie_verified !== undefined) {
|
||||||
|
const verified = dutchie_verified === 'true' || dutchie_verified === '1';
|
||||||
|
if (verified) {
|
||||||
|
conditions.push(`dutchie_verified = true`);
|
||||||
|
} else {
|
||||||
|
conditions.push(`(dutchie_verified = false OR dutchie_verified IS NULL)`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (conditions.length > 0) {
|
if (conditions.length > 0) {
|
||||||
query += ` WHERE ${conditions.join(' AND ')}`;
|
query += ` WHERE ${conditions.join(' AND ')}`;
|
||||||
}
|
}
|
||||||
@@ -129,7 +171,7 @@ router.get('/', async (req, res) => {
|
|||||||
...calculateFreshness(row.last_crawl_at)
|
...calculateFreshness(row.last_crawl_at)
|
||||||
}));
|
}));
|
||||||
|
|
||||||
res.json({ stores });
|
res.json({ stores, total: result.rowCount });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error fetching stores:', error);
|
console.error('Error fetching stores:', error);
|
||||||
res.status(500).json({ error: 'Failed to fetch stores' });
|
res.status(500).json({ error: 'Failed to fetch stores' });
|
||||||
@@ -148,18 +190,33 @@ router.get('/:id', async (req, res) => {
|
|||||||
slug,
|
slug,
|
||||||
city,
|
city,
|
||||||
state,
|
state,
|
||||||
address,
|
address1,
|
||||||
zip,
|
address2,
|
||||||
|
zipcode,
|
||||||
phone,
|
phone,
|
||||||
website,
|
website,
|
||||||
|
email,
|
||||||
dba_name,
|
dba_name,
|
||||||
company_name,
|
|
||||||
latitude,
|
latitude,
|
||||||
longitude,
|
longitude,
|
||||||
|
timezone,
|
||||||
menu_url,
|
menu_url,
|
||||||
menu_type,
|
menu_type,
|
||||||
platform,
|
platform,
|
||||||
platform_dispensary_id,
|
platform_dispensary_id,
|
||||||
|
c_name,
|
||||||
|
chain_slug,
|
||||||
|
enterprise_id,
|
||||||
|
description,
|
||||||
|
logo_image,
|
||||||
|
banner_image,
|
||||||
|
offer_pickup,
|
||||||
|
offer_delivery,
|
||||||
|
offer_curbside_pickup,
|
||||||
|
is_medical,
|
||||||
|
is_recreational,
|
||||||
|
status,
|
||||||
|
country,
|
||||||
product_count,
|
product_count,
|
||||||
last_crawl_at,
|
last_crawl_at,
|
||||||
raw_metadata,
|
raw_metadata,
|
||||||
@@ -203,16 +260,32 @@ router.post('/', requireRole('superadmin', 'admin'), async (req, res) => {
|
|||||||
slug,
|
slug,
|
||||||
city,
|
city,
|
||||||
state,
|
state,
|
||||||
address,
|
address1,
|
||||||
zip,
|
address2,
|
||||||
|
zipcode,
|
||||||
phone,
|
phone,
|
||||||
website,
|
website,
|
||||||
|
email,
|
||||||
menu_url,
|
menu_url,
|
||||||
menu_type,
|
menu_type,
|
||||||
platform,
|
platform,
|
||||||
platform_dispensary_id,
|
platform_dispensary_id,
|
||||||
|
c_name,
|
||||||
|
chain_slug,
|
||||||
|
enterprise_id,
|
||||||
latitude,
|
latitude,
|
||||||
longitude
|
longitude,
|
||||||
|
timezone,
|
||||||
|
description,
|
||||||
|
logo_image,
|
||||||
|
banner_image,
|
||||||
|
offer_pickup,
|
||||||
|
offer_delivery,
|
||||||
|
offer_curbside_pickup,
|
||||||
|
is_medical,
|
||||||
|
is_recreational,
|
||||||
|
status,
|
||||||
|
country
|
||||||
} = req.body;
|
} = req.body;
|
||||||
|
|
||||||
if (!name || !slug || !city || !state) {
|
if (!name || !slug || !city || !state) {
|
||||||
@@ -221,16 +294,19 @@ router.post('/', requireRole('superadmin', 'admin'), async (req, res) => {
|
|||||||
|
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
INSERT INTO dispensaries (
|
INSERT INTO dispensaries (
|
||||||
name, slug, city, state, address, zip, phone, website,
|
name, slug, city, state, address1, address2, zipcode, phone, website, email,
|
||||||
menu_url, menu_type, platform, platform_dispensary_id,
|
menu_url, menu_type, platform, platform_dispensary_id, c_name, chain_slug, enterprise_id,
|
||||||
latitude, longitude, created_at, updated_at
|
latitude, longitude, timezone, description, logo_image, banner_image,
|
||||||
|
offer_pickup, offer_delivery, offer_curbside_pickup, is_medical, is_recreational, status, country,
|
||||||
|
created_at, updated_at
|
||||||
)
|
)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
||||||
RETURNING *
|
RETURNING *
|
||||||
`, [
|
`, [
|
||||||
name, slug, city, state, address, zip, phone, website,
|
name, slug, city, state, address1, address2, zipcode, phone, website, email,
|
||||||
menu_url, menu_type, platform || 'dutchie', platform_dispensary_id,
|
menu_url, menu_type, platform || 'dutchie', platform_dispensary_id, c_name, chain_slug, enterprise_id,
|
||||||
latitude, longitude
|
latitude, longitude, timezone, description, logo_image, banner_image,
|
||||||
|
offer_pickup, offer_delivery, offer_curbside_pickup, is_medical, is_recreational, status, country || 'United States'
|
||||||
]);
|
]);
|
||||||
|
|
||||||
res.status(201).json(result.rows[0]);
|
res.status(201).json(result.rows[0]);
|
||||||
@@ -253,16 +329,32 @@ router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
|
|||||||
slug,
|
slug,
|
||||||
city,
|
city,
|
||||||
state,
|
state,
|
||||||
address,
|
address1,
|
||||||
zip,
|
address2,
|
||||||
|
zipcode,
|
||||||
phone,
|
phone,
|
||||||
website,
|
website,
|
||||||
|
email,
|
||||||
menu_url,
|
menu_url,
|
||||||
menu_type,
|
menu_type,
|
||||||
platform,
|
platform,
|
||||||
platform_dispensary_id,
|
platform_dispensary_id,
|
||||||
|
c_name,
|
||||||
|
chain_slug,
|
||||||
|
enterprise_id,
|
||||||
latitude,
|
latitude,
|
||||||
longitude
|
longitude,
|
||||||
|
timezone,
|
||||||
|
description,
|
||||||
|
logo_image,
|
||||||
|
banner_image,
|
||||||
|
offer_pickup,
|
||||||
|
offer_delivery,
|
||||||
|
offer_curbside_pickup,
|
||||||
|
is_medical,
|
||||||
|
is_recreational,
|
||||||
|
status,
|
||||||
|
country
|
||||||
} = req.body;
|
} = req.body;
|
||||||
|
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
@@ -272,23 +364,40 @@ router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
|
|||||||
slug = COALESCE($2, slug),
|
slug = COALESCE($2, slug),
|
||||||
city = COALESCE($3, city),
|
city = COALESCE($3, city),
|
||||||
state = COALESCE($4, state),
|
state = COALESCE($4, state),
|
||||||
address = COALESCE($5, address),
|
address1 = COALESCE($5, address1),
|
||||||
zip = COALESCE($6, zip),
|
address2 = COALESCE($6, address2),
|
||||||
phone = COALESCE($7, phone),
|
zipcode = COALESCE($7, zipcode),
|
||||||
website = COALESCE($8, website),
|
phone = COALESCE($8, phone),
|
||||||
menu_url = COALESCE($9, menu_url),
|
website = COALESCE($9, website),
|
||||||
menu_type = COALESCE($10, menu_type),
|
email = COALESCE($10, email),
|
||||||
platform = COALESCE($11, platform),
|
menu_url = COALESCE($11, menu_url),
|
||||||
platform_dispensary_id = COALESCE($12, platform_dispensary_id),
|
menu_type = COALESCE($12, menu_type),
|
||||||
latitude = COALESCE($13, latitude),
|
platform = COALESCE($13, platform),
|
||||||
longitude = COALESCE($14, longitude),
|
platform_dispensary_id = COALESCE($14, platform_dispensary_id),
|
||||||
|
c_name = COALESCE($15, c_name),
|
||||||
|
chain_slug = COALESCE($16, chain_slug),
|
||||||
|
enterprise_id = COALESCE($17, enterprise_id),
|
||||||
|
latitude = COALESCE($18, latitude),
|
||||||
|
longitude = COALESCE($19, longitude),
|
||||||
|
timezone = COALESCE($20, timezone),
|
||||||
|
description = COALESCE($21, description),
|
||||||
|
logo_image = COALESCE($22, logo_image),
|
||||||
|
banner_image = COALESCE($23, banner_image),
|
||||||
|
offer_pickup = COALESCE($24, offer_pickup),
|
||||||
|
offer_delivery = COALESCE($25, offer_delivery),
|
||||||
|
offer_curbside_pickup = COALESCE($26, offer_curbside_pickup),
|
||||||
|
is_medical = COALESCE($27, is_medical),
|
||||||
|
is_recreational = COALESCE($28, is_recreational),
|
||||||
|
status = COALESCE($29, status),
|
||||||
|
country = COALESCE($30, country),
|
||||||
updated_at = CURRENT_TIMESTAMP
|
updated_at = CURRENT_TIMESTAMP
|
||||||
WHERE id = $15
|
WHERE id = $31
|
||||||
RETURNING *
|
RETURNING *
|
||||||
`, [
|
`, [
|
||||||
name, slug, city, state, address, zip, phone, website,
|
name, slug, city, state, address1, address2, zipcode, phone, website, email,
|
||||||
menu_url, menu_type, platform, platform_dispensary_id,
|
menu_url, menu_type, platform, platform_dispensary_id, c_name, chain_slug, enterprise_id,
|
||||||
latitude, longitude, id
|
latitude, longitude, timezone, description, logo_image, banner_image,
|
||||||
|
offer_pickup, offer_delivery, offer_curbside_pickup, is_medical, is_recreational, status, country, id
|
||||||
]);
|
]);
|
||||||
|
|
||||||
if (result.rows.length === 0) {
|
if (result.rows.length === 0) {
|
||||||
@@ -320,28 +429,49 @@ router.delete('/:id', requireRole('superadmin'), async (req, res) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Get products for a store (uses dutchie_products table)
|
// Get products for a store (uses store_products via v_products view with snapshot pricing)
|
||||||
router.get('/:id/products', async (req, res) => {
|
router.get('/:id/products', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
const { id } = req.params;
|
const { id } = req.params;
|
||||||
|
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
id,
|
p.id,
|
||||||
name,
|
p.name,
|
||||||
brand_name,
|
p.brand_name,
|
||||||
type,
|
p.type,
|
||||||
subcategory,
|
p.subcategory,
|
||||||
stock_status,
|
p.strain_type,
|
||||||
thc_content,
|
p.stock_status,
|
||||||
cbd_content,
|
p.thc as thc_content,
|
||||||
primary_image_url,
|
p.cbd as cbd_content,
|
||||||
external_product_id,
|
sp.description,
|
||||||
created_at,
|
sp.total_quantity_available as quantity,
|
||||||
updated_at
|
p.primary_image_url,
|
||||||
FROM dutchie_products
|
p.external_product_id,
|
||||||
WHERE dispensary_id = $1
|
p.created_at,
|
||||||
ORDER BY name
|
p.updated_at,
|
||||||
|
COALESCE(snap.rec_min_price_cents, 0)::numeric / 100.0 as regular_price,
|
||||||
|
CASE WHEN snap.rec_min_special_price_cents > 0
|
||||||
|
THEN snap.rec_min_special_price_cents::numeric / 100.0
|
||||||
|
ELSE NULL END as sale_price,
|
||||||
|
COALESCE(snap.med_min_price_cents, 0)::numeric / 100.0 as med_price,
|
||||||
|
CASE WHEN snap.med_min_special_price_cents > 0
|
||||||
|
THEN snap.med_min_special_price_cents::numeric / 100.0
|
||||||
|
ELSE NULL END as med_sale_price,
|
||||||
|
snap.special as on_special
|
||||||
|
FROM v_products p
|
||||||
|
JOIN store_products sp ON sp.id = p.id
|
||||||
|
LEFT JOIN LATERAL (
|
||||||
|
SELECT rec_min_price_cents, rec_min_special_price_cents,
|
||||||
|
med_min_price_cents, med_min_special_price_cents, special
|
||||||
|
FROM v_product_snapshots vps
|
||||||
|
WHERE vps.store_product_id = p.id
|
||||||
|
ORDER BY vps.crawled_at DESC
|
||||||
|
LIMIT 1
|
||||||
|
) snap ON true
|
||||||
|
WHERE p.dispensary_id = $1
|
||||||
|
ORDER BY p.name
|
||||||
`, [id]);
|
`, [id]);
|
||||||
|
|
||||||
res.json({ products: result.rows });
|
res.json({ products: result.rows });
|
||||||
@@ -351,6 +481,55 @@ router.get('/:id/products', async (req, res) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Get specials for a store (products with sale prices or on_special flag)
|
||||||
|
router.get('/:id/specials', async (req, res) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
p.id,
|
||||||
|
p.name,
|
||||||
|
p.brand_name,
|
||||||
|
p.type,
|
||||||
|
p.subcategory,
|
||||||
|
p.strain_type,
|
||||||
|
p.stock_status,
|
||||||
|
p.thc as thc_content,
|
||||||
|
p.cbd as cbd_content,
|
||||||
|
sp.description,
|
||||||
|
sp.total_quantity_available as quantity,
|
||||||
|
p.primary_image_url,
|
||||||
|
p.external_product_id,
|
||||||
|
p.created_at,
|
||||||
|
p.updated_at,
|
||||||
|
COALESCE(snap.rec_min_price_cents, 0)::numeric / 100.0 as regular_price,
|
||||||
|
snap.rec_min_special_price_cents::numeric / 100.0 as sale_price,
|
||||||
|
COALESCE(snap.med_min_price_cents, 0)::numeric / 100.0 as med_price,
|
||||||
|
snap.med_min_special_price_cents::numeric / 100.0 as med_sale_price,
|
||||||
|
true as on_special
|
||||||
|
FROM v_products p
|
||||||
|
JOIN store_products sp ON sp.id = p.id
|
||||||
|
INNER JOIN LATERAL (
|
||||||
|
SELECT rec_min_price_cents, rec_min_special_price_cents,
|
||||||
|
med_min_price_cents, med_min_special_price_cents, special
|
||||||
|
FROM v_product_snapshots vps
|
||||||
|
WHERE vps.store_product_id = p.id
|
||||||
|
AND (vps.special = true OR vps.rec_min_special_price_cents > 0 OR vps.med_min_special_price_cents > 0)
|
||||||
|
ORDER BY vps.crawled_at DESC
|
||||||
|
LIMIT 1
|
||||||
|
) snap ON true
|
||||||
|
WHERE p.dispensary_id = $1
|
||||||
|
ORDER BY p.name
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
res.json({ specials: result.rows });
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching store specials:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to fetch specials' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// Get brands for a store
|
// Get brands for a store
|
||||||
router.get('/:id/brands', async (req, res) => {
|
router.get('/:id/brands', async (req, res) => {
|
||||||
try {
|
try {
|
||||||
@@ -358,7 +537,7 @@ router.get('/:id/brands', async (req, res) => {
|
|||||||
|
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
SELECT DISTINCT brand_name as name, COUNT(*) as product_count
|
SELECT DISTINCT brand_name as name, COUNT(*) as product_count
|
||||||
FROM dutchie_products
|
FROM v_products
|
||||||
WHERE dispensary_id = $1 AND brand_name IS NOT NULL
|
WHERE dispensary_id = $1 AND brand_name IS NOT NULL
|
||||||
GROUP BY brand_name
|
GROUP BY brand_name
|
||||||
ORDER BY product_count DESC, brand_name
|
ORDER BY product_count DESC, brand_name
|
||||||
|
|||||||
595
backend/src/routes/tasks.ts
Normal file
595
backend/src/routes/tasks.ts
Normal file
@@ -0,0 +1,595 @@
|
|||||||
|
/**
|
||||||
|
* Task Queue API Routes
|
||||||
|
*
|
||||||
|
* Endpoints for managing worker tasks, viewing capacity metrics,
|
||||||
|
* and generating batch tasks.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import {
|
||||||
|
taskService,
|
||||||
|
TaskRole,
|
||||||
|
TaskStatus,
|
||||||
|
TaskFilter,
|
||||||
|
} from '../tasks/task-service';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks
|
||||||
|
* List tasks with optional filters
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - role: Filter by role
|
||||||
|
* - status: Filter by status (comma-separated for multiple)
|
||||||
|
* - dispensary_id: Filter by dispensary
|
||||||
|
* - worker_id: Filter by worker
|
||||||
|
* - limit: Max results (default 100)
|
||||||
|
* - offset: Pagination offset
|
||||||
|
*/
|
||||||
|
router.get('/', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const filter: TaskFilter = {};
|
||||||
|
|
||||||
|
if (req.query.role) {
|
||||||
|
filter.role = req.query.role as TaskRole;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req.query.status) {
|
||||||
|
const statuses = (req.query.status as string).split(',') as TaskStatus[];
|
||||||
|
filter.status = statuses.length === 1 ? statuses[0] : statuses;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req.query.dispensary_id) {
|
||||||
|
filter.dispensary_id = parseInt(req.query.dispensary_id as string, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req.query.worker_id) {
|
||||||
|
filter.worker_id = req.query.worker_id as string;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req.query.limit) {
|
||||||
|
filter.limit = parseInt(req.query.limit as string, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (req.query.offset) {
|
||||||
|
filter.offset = parseInt(req.query.offset as string, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
const tasks = await taskService.listTasks(filter);
|
||||||
|
res.json({ tasks, count: tasks.length });
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error listing tasks:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to list tasks' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/counts
|
||||||
|
* Get task counts by status
|
||||||
|
*/
|
||||||
|
router.get('/counts', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const counts = await taskService.getTaskCounts();
|
||||||
|
res.json(counts);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting task counts:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get task counts' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/capacity
|
||||||
|
* Get capacity metrics for all roles
|
||||||
|
*/
|
||||||
|
router.get('/capacity', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const metrics = await taskService.getCapacityMetrics();
|
||||||
|
res.json({ metrics });
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting capacity metrics:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get capacity metrics' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/capacity/:role
|
||||||
|
* Get capacity metrics for a specific role
|
||||||
|
*/
|
||||||
|
router.get('/capacity/:role', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const role = req.params.role as TaskRole;
|
||||||
|
const capacity = await taskService.getRoleCapacity(role);
|
||||||
|
|
||||||
|
if (!capacity) {
|
||||||
|
return res.status(404).json({ error: 'Role not found or no data' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate workers needed for different SLAs
|
||||||
|
const workersFor1Hour = await taskService.calculateWorkersNeeded(role, 1);
|
||||||
|
const workersFor4Hours = await taskService.calculateWorkersNeeded(role, 4);
|
||||||
|
const workersFor8Hours = await taskService.calculateWorkersNeeded(role, 8);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
...capacity,
|
||||||
|
workers_needed: {
|
||||||
|
for_1_hour: workersFor1Hour,
|
||||||
|
for_4_hours: workersFor4Hours,
|
||||||
|
for_8_hours: workersFor8Hours,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting role capacity:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get role capacity' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/:id
|
||||||
|
* Get a specific task by ID
|
||||||
|
*/
|
||||||
|
router.get('/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const taskId = parseInt(req.params.id, 10);
|
||||||
|
const task = await taskService.getTask(taskId);
|
||||||
|
|
||||||
|
if (!task) {
|
||||||
|
return res.status(404).json({ error: 'Task not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json(task);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting task:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get task' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DELETE /api/tasks/:id
|
||||||
|
* Delete a specific task by ID
|
||||||
|
* Only allows deletion of failed, completed, or pending tasks (not running)
|
||||||
|
*/
|
||||||
|
router.delete('/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const taskId = parseInt(req.params.id, 10);
|
||||||
|
|
||||||
|
// First check if task exists and its status
|
||||||
|
const task = await taskService.getTask(taskId);
|
||||||
|
if (!task) {
|
||||||
|
return res.status(404).json({ error: 'Task not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't allow deleting running tasks
|
||||||
|
if (task.status === 'running' || task.status === 'claimed') {
|
||||||
|
return res.status(400).json({ error: 'Cannot delete a running or claimed task' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete the task
|
||||||
|
await pool.query('DELETE FROM worker_tasks WHERE id = $1', [taskId]);
|
||||||
|
|
||||||
|
res.json({ success: true, message: `Task ${taskId} deleted` });
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error deleting task:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to delete task' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks
|
||||||
|
* Create a new task
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - role: TaskRole (required)
|
||||||
|
* - dispensary_id: number (optional)
|
||||||
|
* - platform: string (optional)
|
||||||
|
* - priority: number (optional, default 0)
|
||||||
|
* - scheduled_for: ISO date string (optional)
|
||||||
|
*/
|
||||||
|
router.post('/', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { role, dispensary_id, platform, priority, scheduled_for } = req.body;
|
||||||
|
|
||||||
|
if (!role) {
|
||||||
|
return res.status(400).json({ error: 'Role is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if store already has an active task
|
||||||
|
if (dispensary_id) {
|
||||||
|
const hasActive = await taskService.hasActiveTask(dispensary_id);
|
||||||
|
if (hasActive) {
|
||||||
|
return res.status(409).json({
|
||||||
|
error: 'Store already has an active task',
|
||||||
|
dispensary_id,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const task = await taskService.createTask({
|
||||||
|
role,
|
||||||
|
dispensary_id,
|
||||||
|
platform,
|
||||||
|
priority,
|
||||||
|
scheduled_for: scheduled_for ? new Date(scheduled_for) : undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
res.status(201).json(task);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error creating task:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to create task' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/generate/resync
|
||||||
|
* Generate daily resync tasks for all active stores
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - batches_per_day: number (optional, default 6 = every 4 hours)
|
||||||
|
* - date: ISO date string (optional, default today)
|
||||||
|
*/
|
||||||
|
router.post('/generate/resync', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { batches_per_day, date } = req.body;
|
||||||
|
const batchesPerDay = batches_per_day ?? 6;
|
||||||
|
const targetDate = date ? new Date(date) : new Date();
|
||||||
|
|
||||||
|
const createdCount = await taskService.generateDailyResyncTasks(
|
||||||
|
batchesPerDay,
|
||||||
|
targetDate
|
||||||
|
);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
tasks_created: createdCount,
|
||||||
|
batches_per_day: batchesPerDay,
|
||||||
|
date: targetDate.toISOString().split('T')[0],
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error generating resync tasks:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to generate resync tasks' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/generate/discovery
|
||||||
|
* Generate store discovery tasks for a platform
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - platform: string (required, e.g., 'dutchie')
|
||||||
|
* - state_code: string (optional, e.g., 'AZ')
|
||||||
|
* - priority: number (optional)
|
||||||
|
*/
|
||||||
|
router.post('/generate/discovery', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { platform, state_code, priority } = req.body;
|
||||||
|
|
||||||
|
if (!platform) {
|
||||||
|
return res.status(400).json({ error: 'Platform is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const task = await taskService.createStoreDiscoveryTask(
|
||||||
|
platform,
|
||||||
|
state_code,
|
||||||
|
priority ?? 0
|
||||||
|
);
|
||||||
|
|
||||||
|
res.status(201).json(task);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error creating discovery task:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to create discovery task' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/recover-stale
|
||||||
|
* Recover stale tasks from dead workers
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - threshold_minutes: number (optional, default 10)
|
||||||
|
*/
|
||||||
|
router.post('/recover-stale', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { threshold_minutes } = req.body;
|
||||||
|
const recovered = await taskService.recoverStaleTasks(threshold_minutes ?? 10);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
tasks_recovered: recovered,
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error recovering stale tasks:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to recover stale tasks' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/role/:role/last-completion
|
||||||
|
* Get the last completion time for a role
|
||||||
|
*/
|
||||||
|
router.get('/role/:role/last-completion', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const role = req.params.role as TaskRole;
|
||||||
|
const lastCompletion = await taskService.getLastCompletion(role);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
role,
|
||||||
|
last_completion: lastCompletion?.toISOString() ?? null,
|
||||||
|
time_since: lastCompletion
|
||||||
|
? Math.floor((Date.now() - lastCompletion.getTime()) / 1000)
|
||||||
|
: null,
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting last completion:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get last completion' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/role/:role/recent
|
||||||
|
* Get recent completions for a role
|
||||||
|
*/
|
||||||
|
router.get('/role/:role/recent', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const role = req.params.role as TaskRole;
|
||||||
|
const limit = parseInt(req.query.limit as string, 10) || 10;
|
||||||
|
|
||||||
|
const tasks = await taskService.getRecentCompletions(role, limit);
|
||||||
|
res.json({ tasks });
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting recent completions:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get recent completions' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/store/:dispensaryId/active
|
||||||
|
* Check if a store has an active task
|
||||||
|
*/
|
||||||
|
router.get('/store/:dispensaryId/active', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const dispensaryId = parseInt(req.params.dispensaryId, 10);
|
||||||
|
const hasActive = await taskService.hasActiveTask(dispensaryId);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
dispensary_id: dispensaryId,
|
||||||
|
has_active_task: hasActive,
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error checking active task:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to check active task' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// MIGRATION ROUTES - Disable old job systems
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/tasks/migration/status
|
||||||
|
* Get status of old job systems vs new task queue
|
||||||
|
*/
|
||||||
|
router.get('/migration/status', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// Get old job system counts
|
||||||
|
const [schedules, crawlJobs, rawPayloads, taskCounts] = await Promise.all([
|
||||||
|
pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total,
|
||||||
|
COUNT(*) FILTER (WHERE enabled = true) as enabled
|
||||||
|
FROM job_schedules
|
||||||
|
`),
|
||||||
|
pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'pending') as pending,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'running') as running
|
||||||
|
FROM dispensary_crawl_jobs
|
||||||
|
`),
|
||||||
|
pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total,
|
||||||
|
COUNT(*) FILTER (WHERE processed = false) as unprocessed
|
||||||
|
FROM raw_payloads
|
||||||
|
`),
|
||||||
|
taskService.getTaskCounts(),
|
||||||
|
]);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
old_systems: {
|
||||||
|
job_schedules: {
|
||||||
|
total: parseInt(schedules.rows[0].total) || 0,
|
||||||
|
enabled: parseInt(schedules.rows[0].enabled) || 0,
|
||||||
|
},
|
||||||
|
dispensary_crawl_jobs: {
|
||||||
|
total: parseInt(crawlJobs.rows[0].total) || 0,
|
||||||
|
pending: parseInt(crawlJobs.rows[0].pending) || 0,
|
||||||
|
running: parseInt(crawlJobs.rows[0].running) || 0,
|
||||||
|
},
|
||||||
|
raw_payloads: {
|
||||||
|
total: parseInt(rawPayloads.rows[0].total) || 0,
|
||||||
|
unprocessed: parseInt(rawPayloads.rows[0].unprocessed) || 0,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
new_task_queue: taskCounts,
|
||||||
|
recommendation: schedules.rows[0].enabled > 0
|
||||||
|
? 'Disable old job schedules before switching to new task queue'
|
||||||
|
: 'Ready to use new task queue',
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error getting migration status:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to get migration status' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/migration/disable-old-schedules
|
||||||
|
* Disable all old job schedules to prepare for new task queue
|
||||||
|
*/
|
||||||
|
router.post('/migration/disable-old-schedules', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const result = await pool.query(`
|
||||||
|
UPDATE job_schedules
|
||||||
|
SET enabled = false,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE enabled = true
|
||||||
|
RETURNING id, job_name
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
disabled_count: result.rowCount,
|
||||||
|
disabled_schedules: result.rows.map(r => ({ id: r.id, job_name: r.job_name })),
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error disabling old schedules:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to disable old schedules' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/migration/cancel-pending-crawl-jobs
|
||||||
|
* Cancel all pending crawl jobs from the old system
|
||||||
|
*/
|
||||||
|
router.post('/migration/cancel-pending-crawl-jobs', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const result = await pool.query(`
|
||||||
|
UPDATE dispensary_crawl_jobs
|
||||||
|
SET status = 'cancelled',
|
||||||
|
completed_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE status = 'pending'
|
||||||
|
RETURNING id
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
cancelled_count: result.rowCount,
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error cancelling pending crawl jobs:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to cancel pending crawl jobs' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/migration/create-resync-tasks
|
||||||
|
* Create product_refresh tasks for all crawl-enabled dispensaries
|
||||||
|
*/
|
||||||
|
router.post('/migration/create-resync-tasks', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { priority = 0, state_code } = req.body;
|
||||||
|
|
||||||
|
let query = `
|
||||||
|
SELECT id, name FROM dispensaries
|
||||||
|
WHERE crawl_enabled = true
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
|
`;
|
||||||
|
const params: any[] = [];
|
||||||
|
|
||||||
|
if (state_code) {
|
||||||
|
query += `
|
||||||
|
AND state_id = (SELECT id FROM states WHERE code = $1)
|
||||||
|
`;
|
||||||
|
params.push(state_code.toUpperCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
query += ` ORDER BY id`;
|
||||||
|
|
||||||
|
const dispensaries = await pool.query(query, params);
|
||||||
|
let created = 0;
|
||||||
|
|
||||||
|
for (const disp of dispensaries.rows) {
|
||||||
|
// Check if already has pending/running task
|
||||||
|
const hasActive = await taskService.hasActiveTask(disp.id);
|
||||||
|
if (!hasActive) {
|
||||||
|
await taskService.createTask({
|
||||||
|
role: 'product_refresh',
|
||||||
|
dispensary_id: disp.id,
|
||||||
|
platform: 'dutchie',
|
||||||
|
priority,
|
||||||
|
});
|
||||||
|
created++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
tasks_created: created,
|
||||||
|
dispensaries_checked: dispensaries.rows.length,
|
||||||
|
state_filter: state_code || 'all',
|
||||||
|
});
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error creating resync tasks:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to create resync tasks' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/tasks/migration/full-migrate
|
||||||
|
* One-click migration: disable old systems, create new tasks
|
||||||
|
*/
|
||||||
|
router.post('/migration/full-migrate', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const results: any = {
|
||||||
|
success: true,
|
||||||
|
steps: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Step 1: Disable old job schedules
|
||||||
|
const disableResult = await pool.query(`
|
||||||
|
UPDATE job_schedules
|
||||||
|
SET enabled = false, updated_at = NOW()
|
||||||
|
WHERE enabled = true
|
||||||
|
RETURNING id
|
||||||
|
`);
|
||||||
|
results.steps.push({
|
||||||
|
step: 'disable_job_schedules',
|
||||||
|
count: disableResult.rowCount,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step 2: Cancel pending crawl jobs
|
||||||
|
const cancelResult = await pool.query(`
|
||||||
|
UPDATE dispensary_crawl_jobs
|
||||||
|
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
|
||||||
|
WHERE status = 'pending'
|
||||||
|
RETURNING id
|
||||||
|
`);
|
||||||
|
results.steps.push({
|
||||||
|
step: 'cancel_pending_crawl_jobs',
|
||||||
|
count: cancelResult.rowCount,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step 3: Generate initial resync tasks
|
||||||
|
const resyncCount = await taskService.generateDailyResyncTasks(6);
|
||||||
|
results.steps.push({
|
||||||
|
step: 'generate_resync_tasks',
|
||||||
|
count: resyncCount,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step 4: Create store discovery task
|
||||||
|
const discoveryTask = await taskService.createStoreDiscoveryTask('dutchie', undefined, 0);
|
||||||
|
results.steps.push({
|
||||||
|
step: 'create_discovery_task',
|
||||||
|
task_id: discoveryTask.id,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step 5: Create analytics refresh task
|
||||||
|
const analyticsTask = await taskService.createTask({
|
||||||
|
role: 'analytics_refresh',
|
||||||
|
priority: 0,
|
||||||
|
});
|
||||||
|
results.steps.push({
|
||||||
|
step: 'create_analytics_task',
|
||||||
|
task_id: analyticsTask.id,
|
||||||
|
});
|
||||||
|
|
||||||
|
results.message = 'Migration complete. New task workers will pick up tasks.';
|
||||||
|
res.json(results);
|
||||||
|
} catch (error: unknown) {
|
||||||
|
console.error('Error during full migration:', error);
|
||||||
|
res.status(500).json({ error: 'Failed to complete migration' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -14,23 +14,36 @@ router.get('/', async (req: AuthRequest, res) => {
|
|||||||
try {
|
try {
|
||||||
const { search, domain } = req.query;
|
const { search, domain } = req.query;
|
||||||
|
|
||||||
let query = `
|
// Check which columns exist (schema-tolerant)
|
||||||
SELECT id, email, role, first_name, last_name, phone, domain, created_at, updated_at
|
const columnsResult = await pool.query(`
|
||||||
FROM users
|
SELECT column_name FROM information_schema.columns
|
||||||
WHERE 1=1
|
WHERE table_name = 'users' AND column_name IN ('first_name', 'last_name', 'phone', 'domain')
|
||||||
`;
|
`);
|
||||||
|
const existingColumns = new Set(columnsResult.rows.map((r: any) => r.column_name));
|
||||||
|
|
||||||
|
// Build column list based on what exists
|
||||||
|
const selectCols = ['id', 'email', 'role', 'created_at', 'updated_at'];
|
||||||
|
if (existingColumns.has('first_name')) selectCols.push('first_name');
|
||||||
|
if (existingColumns.has('last_name')) selectCols.push('last_name');
|
||||||
|
if (existingColumns.has('phone')) selectCols.push('phone');
|
||||||
|
if (existingColumns.has('domain')) selectCols.push('domain');
|
||||||
|
|
||||||
|
let query = `SELECT ${selectCols.join(', ')} FROM users WHERE 1=1`;
|
||||||
const params: any[] = [];
|
const params: any[] = [];
|
||||||
let paramIndex = 1;
|
let paramIndex = 1;
|
||||||
|
|
||||||
// Search by email, first_name, or last_name
|
// Search by email (and optionally first_name, last_name if they exist)
|
||||||
if (search && typeof search === 'string') {
|
if (search && typeof search === 'string') {
|
||||||
query += ` AND (email ILIKE $${paramIndex} OR first_name ILIKE $${paramIndex} OR last_name ILIKE $${paramIndex})`;
|
const searchClauses = ['email ILIKE $' + paramIndex];
|
||||||
|
if (existingColumns.has('first_name')) searchClauses.push('first_name ILIKE $' + paramIndex);
|
||||||
|
if (existingColumns.has('last_name')) searchClauses.push('last_name ILIKE $' + paramIndex);
|
||||||
|
query += ` AND (${searchClauses.join(' OR ')})`;
|
||||||
params.push(`%${search}%`);
|
params.push(`%${search}%`);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter by domain
|
// Filter by domain (if column exists)
|
||||||
if (domain && typeof domain === 'string') {
|
if (domain && typeof domain === 'string' && existingColumns.has('domain')) {
|
||||||
query += ` AND domain = $${paramIndex}`;
|
query += ` AND domain = $${paramIndex}`;
|
||||||
params.push(domain);
|
params.push(domain);
|
||||||
paramIndex++;
|
paramIndex++;
|
||||||
@@ -50,8 +63,22 @@ router.get('/', async (req: AuthRequest, res) => {
|
|||||||
router.get('/:id', async (req: AuthRequest, res) => {
|
router.get('/:id', async (req: AuthRequest, res) => {
|
||||||
try {
|
try {
|
||||||
const { id } = req.params;
|
const { id } = req.params;
|
||||||
|
|
||||||
|
// Check which columns exist (schema-tolerant)
|
||||||
|
const columnsResult = await pool.query(`
|
||||||
|
SELECT column_name FROM information_schema.columns
|
||||||
|
WHERE table_name = 'users' AND column_name IN ('first_name', 'last_name', 'phone', 'domain')
|
||||||
|
`);
|
||||||
|
const existingColumns = new Set(columnsResult.rows.map((r: any) => r.column_name));
|
||||||
|
|
||||||
|
const selectCols = ['id', 'email', 'role', 'created_at', 'updated_at'];
|
||||||
|
if (existingColumns.has('first_name')) selectCols.push('first_name');
|
||||||
|
if (existingColumns.has('last_name')) selectCols.push('last_name');
|
||||||
|
if (existingColumns.has('phone')) selectCols.push('phone');
|
||||||
|
if (existingColumns.has('domain')) selectCols.push('domain');
|
||||||
|
|
||||||
const result = await pool.query(`
|
const result = await pool.query(`
|
||||||
SELECT id, email, role, first_name, last_name, phone, domain, created_at, updated_at
|
SELECT ${selectCols.join(', ')}
|
||||||
FROM users
|
FROM users
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, [id]);
|
`, [id]);
|
||||||
|
|||||||
@@ -1,18 +1,32 @@
|
|||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { readFileSync } from 'fs';
|
||||||
|
import { join } from 'path';
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
|
// Read package.json version at startup
|
||||||
|
let packageVersion = 'unknown';
|
||||||
|
try {
|
||||||
|
const packageJson = JSON.parse(readFileSync(join(__dirname, '../../package.json'), 'utf-8'));
|
||||||
|
packageVersion = packageJson.version || 'unknown';
|
||||||
|
} catch {
|
||||||
|
// Fallback if package.json not found
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* GET /api/version
|
* GET /api/version
|
||||||
* Returns build version information for display in admin UI
|
* Returns build version information for display in admin UI
|
||||||
*/
|
*/
|
||||||
router.get('/', async (req: Request, res: Response) => {
|
router.get('/', async (req: Request, res: Response) => {
|
||||||
try {
|
try {
|
||||||
|
const gitSha = process.env.APP_GIT_SHA || 'unknown';
|
||||||
const versionInfo = {
|
const versionInfo = {
|
||||||
build_version: process.env.APP_BUILD_VERSION || 'dev',
|
version: packageVersion,
|
||||||
git_sha: process.env.APP_GIT_SHA || 'local',
|
build_version: process.env.APP_BUILD_VERSION?.slice(0, 8) || 'dev',
|
||||||
build_time: process.env.APP_BUILD_TIME || new Date().toISOString(),
|
git_sha: gitSha.slice(0, 8) || 'unknown',
|
||||||
image_tag: process.env.CONTAINER_IMAGE_TAG || 'local',
|
git_sha_full: gitSha,
|
||||||
|
build_time: process.env.APP_BUILD_TIME || 'unknown',
|
||||||
|
image_tag: process.env.CONTAINER_IMAGE_TAG?.slice(0, 8) || 'local',
|
||||||
};
|
};
|
||||||
|
|
||||||
res.json(versionInfo);
|
res.json(versionInfo);
|
||||||
|
|||||||
675
backend/src/routes/worker-registry.ts
Normal file
675
backend/src/routes/worker-registry.ts
Normal file
@@ -0,0 +1,675 @@
|
|||||||
|
/**
|
||||||
|
* Worker Registry API Routes
|
||||||
|
*
|
||||||
|
* Dynamic worker management - workers register on startup, get assigned names,
|
||||||
|
* and report heartbeats. Everything is API-driven, no hardcoding.
|
||||||
|
*
|
||||||
|
* Endpoints:
|
||||||
|
* POST /api/worker-registry/register - Worker reports for duty
|
||||||
|
* POST /api/worker-registry/heartbeat - Worker heartbeat
|
||||||
|
* POST /api/worker-registry/deregister - Worker signing off
|
||||||
|
* GET /api/worker-registry/workers - List all workers (for dashboard)
|
||||||
|
* GET /api/worker-registry/workers/:id - Get specific worker
|
||||||
|
* POST /api/worker-registry/cleanup - Mark stale workers offline
|
||||||
|
*
|
||||||
|
* GET /api/worker-registry/names - List all names in pool
|
||||||
|
* POST /api/worker-registry/names - Add names to pool
|
||||||
|
* DELETE /api/worker-registry/names/:name - Remove name from pool
|
||||||
|
*
|
||||||
|
* GET /api/worker-registry/roles - List available task roles
|
||||||
|
* POST /api/worker-registry/roles - Add a new role (future)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Router, Request, Response } from 'express';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
import os from 'os';
|
||||||
|
|
||||||
|
const router = Router();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// WORKER REGISTRATION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/register
|
||||||
|
* Worker reports for duty - gets assigned a friendly name
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - role: string (optional) - task role, or null for role-agnostic workers
|
||||||
|
* - worker_id: string (optional) - custom ID, auto-generated if not provided
|
||||||
|
* - pod_name: string (optional) - k8s pod name
|
||||||
|
* - hostname: string (optional) - machine hostname
|
||||||
|
* - metadata: object (optional) - additional worker info
|
||||||
|
*
|
||||||
|
* Returns:
|
||||||
|
* - worker_id: assigned worker ID
|
||||||
|
* - friendly_name: assigned name from pool
|
||||||
|
* - role: confirmed role (or null if agnostic)
|
||||||
|
* - message: welcome message
|
||||||
|
*/
|
||||||
|
router.post('/register', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const {
|
||||||
|
role = null, // Role is now optional - null means agnostic
|
||||||
|
worker_id,
|
||||||
|
pod_name,
|
||||||
|
hostname,
|
||||||
|
ip_address,
|
||||||
|
metadata = {}
|
||||||
|
} = req.body;
|
||||||
|
|
||||||
|
// Generate worker_id if not provided
|
||||||
|
const finalWorkerId = worker_id || `worker-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||||
|
const finalHostname = hostname || os.hostname();
|
||||||
|
const clientIp = ip_address || req.ip || req.socket.remoteAddress;
|
||||||
|
|
||||||
|
// Check if worker already registered
|
||||||
|
const existing = await pool.query(
|
||||||
|
'SELECT id, friendly_name, status FROM worker_registry WHERE worker_id = $1',
|
||||||
|
[finalWorkerId]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (existing.rows.length > 0) {
|
||||||
|
// Re-activate existing worker
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET status = 'active',
|
||||||
|
role = $1,
|
||||||
|
pod_name = $2,
|
||||||
|
hostname = $3,
|
||||||
|
ip_address = $4,
|
||||||
|
last_heartbeat_at = NOW(),
|
||||||
|
started_at = NOW(),
|
||||||
|
metadata = $5,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = $6
|
||||||
|
RETURNING id, worker_id, friendly_name, role
|
||||||
|
`, [role, pod_name, finalHostname, clientIp, metadata, finalWorkerId]);
|
||||||
|
|
||||||
|
const worker = rows[0];
|
||||||
|
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
||||||
|
console.log(`[WorkerRegistry] Worker "${worker.friendly_name}" (${finalWorkerId}) re-registered ${roleMsg}`);
|
||||||
|
|
||||||
|
return res.json({
|
||||||
|
success: true,
|
||||||
|
worker_id: worker.worker_id,
|
||||||
|
friendly_name: worker.friendly_name,
|
||||||
|
role: worker.role,
|
||||||
|
message: role
|
||||||
|
? `Welcome back, ${worker.friendly_name}! You are assigned to ${role}.`
|
||||||
|
: `Welcome back, ${worker.friendly_name}! You are ready to take any task.`
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assign a friendly name
|
||||||
|
const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]);
|
||||||
|
const friendlyName = nameResult.rows[0].name;
|
||||||
|
|
||||||
|
// Register the worker
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
INSERT INTO worker_registry (
|
||||||
|
worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata
|
||||||
|
) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7)
|
||||||
|
RETURNING id, worker_id, friendly_name, role
|
||||||
|
`, [finalWorkerId, friendlyName, role, pod_name, finalHostname, clientIp, metadata]);
|
||||||
|
|
||||||
|
const worker = rows[0];
|
||||||
|
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
||||||
|
console.log(`[WorkerRegistry] New worker "${friendlyName}" (${finalWorkerId}) reporting for duty ${roleMsg}`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
worker_id: worker.worker_id,
|
||||||
|
friendly_name: worker.friendly_name,
|
||||||
|
role: worker.role,
|
||||||
|
message: role
|
||||||
|
? `Hello ${friendlyName}! You are now registered for ${role}. Ready for work!`
|
||||||
|
: `Hello ${friendlyName}! You are ready to take any task from the pool.`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[WorkerRegistry] Registration error:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/heartbeat
|
||||||
|
* Worker sends heartbeat to stay alive
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - worker_id: string (required)
|
||||||
|
* - current_task_id: number (optional) - task currently being processed
|
||||||
|
* - status: string (optional) - 'active', 'idle'
|
||||||
|
*/
|
||||||
|
router.post('/heartbeat', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { worker_id, current_task_id, status = 'active', resources } = req.body;
|
||||||
|
|
||||||
|
if (!worker_id) {
|
||||||
|
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store resources in metadata jsonb column
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET last_heartbeat_at = NOW(),
|
||||||
|
current_task_id = $1,
|
||||||
|
status = $2,
|
||||||
|
metadata = COALESCE(metadata, '{}'::jsonb) || COALESCE($4::jsonb, '{}'::jsonb),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = $3
|
||||||
|
RETURNING id, friendly_name, status
|
||||||
|
`, [current_task_id || null, status, worker_id, resources ? JSON.stringify(resources) : null]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found - please register first' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
worker: rows[0]
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[WorkerRegistry] Heartbeat error:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/task-completed
|
||||||
|
* Worker reports task completion
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - worker_id: string (required)
|
||||||
|
* - success: boolean (required)
|
||||||
|
*/
|
||||||
|
router.post('/task-completed', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { worker_id, success } = req.body;
|
||||||
|
|
||||||
|
if (!worker_id) {
|
||||||
|
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const incrementField = success ? 'tasks_completed' : 'tasks_failed';
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET ${incrementField} = ${incrementField} + 1,
|
||||||
|
last_task_at = NOW(),
|
||||||
|
current_task_id = NULL,
|
||||||
|
status = 'idle',
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = $1
|
||||||
|
RETURNING id, friendly_name, tasks_completed, tasks_failed
|
||||||
|
`, [worker_id]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, worker: rows[0] });
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/deregister
|
||||||
|
* Worker signing off (graceful shutdown)
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - worker_id: string (required)
|
||||||
|
*/
|
||||||
|
router.post('/deregister', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { worker_id } = req.body;
|
||||||
|
|
||||||
|
if (!worker_id) {
|
||||||
|
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Release the name back to the pool
|
||||||
|
await pool.query('SELECT release_worker_name($1)', [worker_id]);
|
||||||
|
|
||||||
|
// Mark as terminated
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE worker_registry
|
||||||
|
SET status = 'terminated',
|
||||||
|
current_task_id = NULL,
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE worker_id = $1
|
||||||
|
RETURNING id, friendly_name
|
||||||
|
`, [worker_id]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[WorkerRegistry] Worker "${rows[0].friendly_name}" (${worker_id}) signed off`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
message: `Goodbye ${rows[0].friendly_name}! Thanks for your work.`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[WorkerRegistry] Deregister error:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// WORKER LISTING (for Dashboard)
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/worker-registry/workers
|
||||||
|
* List all workers (for dashboard)
|
||||||
|
*
|
||||||
|
* Query params:
|
||||||
|
* - status: filter by status (active, idle, offline, all)
|
||||||
|
* - role: filter by role
|
||||||
|
* - include_terminated: include terminated workers (default: false)
|
||||||
|
*/
|
||||||
|
router.get('/workers', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// Check if worker_registry table exists
|
||||||
|
const tableCheck = await pool.query(`
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT FROM information_schema.tables
|
||||||
|
WHERE table_name = 'worker_registry'
|
||||||
|
) as exists
|
||||||
|
`);
|
||||||
|
|
||||||
|
if (!tableCheck.rows[0].exists) {
|
||||||
|
// Return empty result if table doesn't exist yet
|
||||||
|
return res.json({
|
||||||
|
success: true,
|
||||||
|
workers: [],
|
||||||
|
summary: {
|
||||||
|
active_count: 0,
|
||||||
|
idle_count: 0,
|
||||||
|
offline_count: 0,
|
||||||
|
total_count: 0,
|
||||||
|
active_roles: 0
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const { status, role, include_terminated = 'false' } = req.query;
|
||||||
|
|
||||||
|
let whereClause = include_terminated === 'true' ? 'WHERE 1=1' : "WHERE status != 'terminated'";
|
||||||
|
const params: any[] = [];
|
||||||
|
let paramIndex = 1;
|
||||||
|
|
||||||
|
if (status && status !== 'all') {
|
||||||
|
whereClause += ` AND status = $${paramIndex}`;
|
||||||
|
params.push(status);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (role) {
|
||||||
|
whereClause += ` AND role = $${paramIndex}`;
|
||||||
|
params.push(role);
|
||||||
|
paramIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
worker_id,
|
||||||
|
friendly_name,
|
||||||
|
role,
|
||||||
|
pod_name,
|
||||||
|
hostname,
|
||||||
|
ip_address,
|
||||||
|
status,
|
||||||
|
started_at,
|
||||||
|
last_heartbeat_at,
|
||||||
|
last_task_at,
|
||||||
|
tasks_completed,
|
||||||
|
tasks_failed,
|
||||||
|
current_task_id,
|
||||||
|
metadata,
|
||||||
|
EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat,
|
||||||
|
CASE
|
||||||
|
WHEN status = 'offline' OR status = 'terminated' THEN status
|
||||||
|
WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||||
|
WHEN current_task_id IS NOT NULL THEN 'busy'
|
||||||
|
ELSE 'ready'
|
||||||
|
END as health_status,
|
||||||
|
created_at
|
||||||
|
FROM worker_registry
|
||||||
|
${whereClause}
|
||||||
|
ORDER BY
|
||||||
|
CASE status
|
||||||
|
WHEN 'active' THEN 1
|
||||||
|
WHEN 'idle' THEN 2
|
||||||
|
WHEN 'offline' THEN 3
|
||||||
|
ELSE 4
|
||||||
|
END,
|
||||||
|
last_heartbeat_at DESC
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
// Get summary counts
|
||||||
|
const { rows: summary } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE status = 'active') as active_count,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'idle') as idle_count,
|
||||||
|
COUNT(*) FILTER (WHERE status = 'offline') as offline_count,
|
||||||
|
COUNT(*) FILTER (WHERE status != 'terminated') as total_count,
|
||||||
|
COUNT(DISTINCT role) FILTER (WHERE status IN ('active', 'idle')) as active_roles
|
||||||
|
FROM worker_registry
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
workers: rows,
|
||||||
|
summary: summary[0]
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[WorkerRegistry] List workers error:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/worker-registry/workers/:workerId
|
||||||
|
* Get specific worker details
|
||||||
|
*/
|
||||||
|
router.get('/workers/:workerId', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { workerId } = req.params;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT * FROM worker_registry WHERE worker_id = $1
|
||||||
|
`, [workerId]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, worker: rows[0] });
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DELETE /api/worker-registry/workers/:workerId
|
||||||
|
* Remove a worker (admin action)
|
||||||
|
*/
|
||||||
|
router.delete('/workers/:workerId', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { workerId } = req.params;
|
||||||
|
|
||||||
|
// Release name
|
||||||
|
await pool.query('SELECT release_worker_name($1)', [workerId]);
|
||||||
|
|
||||||
|
// Delete worker
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
DELETE FROM worker_registry WHERE worker_id = $1 RETURNING friendly_name
|
||||||
|
`, [workerId]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, message: `Worker ${rows[0].friendly_name} removed` });
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/cleanup
|
||||||
|
* Mark stale workers as offline
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - stale_threshold_minutes: number (default: 5)
|
||||||
|
*/
|
||||||
|
router.post('/cleanup', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { stale_threshold_minutes = 5 } = req.body;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(
|
||||||
|
'SELECT mark_stale_workers($1) as count',
|
||||||
|
[stale_threshold_minutes]
|
||||||
|
);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
stale_workers_marked: rows[0].count,
|
||||||
|
message: `Marked ${rows[0].count} stale workers as offline`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// NAME POOL MANAGEMENT
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/worker-registry/names
|
||||||
|
* List all names in the pool
|
||||||
|
*/
|
||||||
|
router.get('/names', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
in_use,
|
||||||
|
assigned_to,
|
||||||
|
assigned_at
|
||||||
|
FROM worker_name_pool
|
||||||
|
ORDER BY in_use DESC, name ASC
|
||||||
|
`);
|
||||||
|
|
||||||
|
const { rows: summary } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
COUNT(*) as total,
|
||||||
|
COUNT(*) FILTER (WHERE in_use = true) as in_use,
|
||||||
|
COUNT(*) FILTER (WHERE in_use = false) as available
|
||||||
|
FROM worker_name_pool
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
names: rows,
|
||||||
|
summary: summary[0]
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/worker-registry/names
|
||||||
|
* Add names to the pool
|
||||||
|
*
|
||||||
|
* Body:
|
||||||
|
* - names: string[] (required) - array of names to add
|
||||||
|
*/
|
||||||
|
router.post('/names', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { names } = req.body;
|
||||||
|
|
||||||
|
if (!names || !Array.isArray(names) || names.length === 0) {
|
||||||
|
return res.status(400).json({ success: false, error: 'names array is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const values = names.map(n => `('${n.replace(/'/g, "''")}')`).join(', ');
|
||||||
|
|
||||||
|
const { rowCount } = await pool.query(`
|
||||||
|
INSERT INTO worker_name_pool (name)
|
||||||
|
VALUES ${values}
|
||||||
|
ON CONFLICT (name) DO NOTHING
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
added: rowCount,
|
||||||
|
message: `Added ${rowCount} new names to the pool`
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DELETE /api/worker-registry/names/:name
|
||||||
|
* Remove a name from the pool (only if not in use)
|
||||||
|
*/
|
||||||
|
router.delete('/names/:name', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { name } = req.params;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
DELETE FROM worker_name_pool
|
||||||
|
WHERE name = $1 AND in_use = false
|
||||||
|
RETURNING name
|
||||||
|
`, [name]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(400).json({
|
||||||
|
success: false,
|
||||||
|
error: 'Name not found or currently in use'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, message: `Name "${name}" removed from pool` });
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// ROLE MANAGEMENT
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/worker-registry/roles
|
||||||
|
* List available task roles
|
||||||
|
*/
|
||||||
|
router.get('/roles', async (_req: Request, res: Response) => {
|
||||||
|
// These are the roles the task handlers support
|
||||||
|
const roles = [
|
||||||
|
{
|
||||||
|
id: 'product_refresh',
|
||||||
|
name: 'Product Refresh',
|
||||||
|
description: 'Re-crawl dispensary products for price/stock changes',
|
||||||
|
handler: 'handleProductRefresh'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'product_discovery',
|
||||||
|
name: 'Product Discovery',
|
||||||
|
description: 'Initial product discovery for new dispensaries',
|
||||||
|
handler: 'handleProductDiscovery'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'store_discovery',
|
||||||
|
name: 'Store Discovery',
|
||||||
|
description: 'Discover new dispensary locations',
|
||||||
|
handler: 'handleStoreDiscovery'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'entry_point_discovery',
|
||||||
|
name: 'Entry Point Discovery',
|
||||||
|
description: 'Resolve platform IDs from menu URLs',
|
||||||
|
handler: 'handleEntryPointDiscovery'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'analytics_refresh',
|
||||||
|
name: 'Analytics Refresh',
|
||||||
|
description: 'Refresh materialized views and analytics',
|
||||||
|
handler: 'handleAnalyticsRefresh'
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
// Get active worker counts per role
|
||||||
|
try {
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT role, COUNT(*) as worker_count
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE status IN ('active', 'idle')
|
||||||
|
GROUP BY role
|
||||||
|
`);
|
||||||
|
|
||||||
|
const countMap = new Map(rows.map(r => [r.role, parseInt(r.worker_count)]));
|
||||||
|
|
||||||
|
const rolesWithCounts = roles.map(r => ({
|
||||||
|
...r,
|
||||||
|
active_workers: countMap.get(r.id) || 0
|
||||||
|
}));
|
||||||
|
|
||||||
|
res.json({ success: true, roles: rolesWithCounts });
|
||||||
|
} catch {
|
||||||
|
// If table doesn't exist yet, just return roles without counts
|
||||||
|
res.json({ success: true, roles: roles.map(r => ({ ...r, active_workers: 0 })) });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/worker-registry/capacity
|
||||||
|
* Get capacity planning info
|
||||||
|
*/
|
||||||
|
router.get('/capacity', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
// Get worker counts by role
|
||||||
|
const { rows: workerCounts } = await pool.query(`
|
||||||
|
SELECT role, COUNT(*) as count
|
||||||
|
FROM worker_registry
|
||||||
|
WHERE status IN ('active', 'idle')
|
||||||
|
GROUP BY role
|
||||||
|
`);
|
||||||
|
|
||||||
|
// Get pending task counts by role (if worker_tasks exists)
|
||||||
|
let taskCounts: any[] = [];
|
||||||
|
try {
|
||||||
|
const result = await pool.query(`
|
||||||
|
SELECT role, COUNT(*) as pending_count
|
||||||
|
FROM worker_tasks
|
||||||
|
WHERE status = 'pending'
|
||||||
|
GROUP BY role
|
||||||
|
`);
|
||||||
|
taskCounts = result.rows;
|
||||||
|
} catch {
|
||||||
|
// worker_tasks might not exist yet
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get crawl-enabled store count
|
||||||
|
const storeCountResult = await pool.query(`
|
||||||
|
SELECT COUNT(*) as count
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE crawl_enabled = true AND platform_dispensary_id IS NOT NULL
|
||||||
|
`);
|
||||||
|
const totalStores = parseInt(storeCountResult.rows[0].count);
|
||||||
|
|
||||||
|
const workerMap = new Map(workerCounts.map(r => [r.role, parseInt(r.count)]));
|
||||||
|
const taskMap = new Map(taskCounts.map(r => [r.role, parseInt(r.pending_count)]));
|
||||||
|
|
||||||
|
const roles = ['product_refresh', 'product_discovery', 'store_discovery', 'entry_point_discovery', 'analytics_refresh'];
|
||||||
|
|
||||||
|
const capacity = roles.map(role => ({
|
||||||
|
role,
|
||||||
|
active_workers: workerMap.get(role) || 0,
|
||||||
|
pending_tasks: taskMap.get(role) || 0,
|
||||||
|
// Rough estimate: 20 seconds per task, 4-hour cycle
|
||||||
|
tasks_per_worker_per_cycle: 720,
|
||||||
|
workers_needed_for_all_stores: Math.ceil(totalStores / 720)
|
||||||
|
}));
|
||||||
|
|
||||||
|
res.json({
|
||||||
|
success: true,
|
||||||
|
total_stores: totalStores,
|
||||||
|
capacity
|
||||||
|
});
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
export default router;
|
||||||
@@ -24,6 +24,95 @@ import { pool } from '../db/pool';
|
|||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STATIC ROUTES (must come before parameterized routes)
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/workers/roles - List available worker roles
|
||||||
|
*/
|
||||||
|
router.get('/roles', async (_req: Request, res: Response) => {
|
||||||
|
const roles = [
|
||||||
|
{ id: 'product_sync', name: 'Product Sync', description: 'Crawls products from dispensary menus' },
|
||||||
|
{ id: 'store_discovery', name: 'Store Discovery', description: 'Discovers new dispensary locations' },
|
||||||
|
{ id: 'entry_point_finder', name: 'Entry Point Finder', description: 'Detects menu providers and resolves platform IDs' },
|
||||||
|
{ id: 'analytics_refresh', name: 'Analytics Refresh', description: 'Refreshes materialized views and analytics' },
|
||||||
|
{ id: 'price_monitor', name: 'Price Monitor', description: 'Monitors price changes and triggers alerts' },
|
||||||
|
{ id: 'inventory_sync', name: 'Inventory Sync', description: 'Syncs inventory levels' },
|
||||||
|
{ id: 'image_processor', name: 'Image Processor', description: 'Downloads and processes product images' },
|
||||||
|
{ id: 'data_validator', name: 'Data Validator', description: 'Validates data integrity' },
|
||||||
|
{ id: 'custom', name: 'Custom', description: 'Custom worker role' },
|
||||||
|
];
|
||||||
|
|
||||||
|
res.json({ success: true, roles });
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/workers/states - List available states for assignment
|
||||||
|
*/
|
||||||
|
router.get('/states', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT state_code, state_name, dispensary_count
|
||||||
|
FROM states
|
||||||
|
WHERE active = true
|
||||||
|
ORDER BY state_name ASC
|
||||||
|
`);
|
||||||
|
res.json({ success: true, states: rows });
|
||||||
|
} catch (error: any) {
|
||||||
|
// Fallback if states table doesn't exist
|
||||||
|
res.json({ success: true, states: [
|
||||||
|
{ state_code: 'AZ', state_name: 'Arizona', dispensary_count: 0 },
|
||||||
|
{ state_code: 'CA', state_name: 'California', dispensary_count: 0 },
|
||||||
|
{ state_code: 'CO', state_name: 'Colorado', dispensary_count: 0 },
|
||||||
|
{ state_code: 'MI', state_name: 'Michigan', dispensary_count: 0 },
|
||||||
|
{ state_code: 'NV', state_name: 'Nevada', dispensary_count: 0 },
|
||||||
|
]});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/workers/dispensaries - List dispensaries for assignment (paginated search)
|
||||||
|
*/
|
||||||
|
router.get('/dispensaries', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const search = (req.query.search as string) || '';
|
||||||
|
const limit = parseInt(req.query.limit as string) || 50;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT id, name, city, state_code
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE ($1 = '' OR name ILIKE $2)
|
||||||
|
ORDER BY name ASC
|
||||||
|
LIMIT $3
|
||||||
|
`, [search, `%${search}%`, limit]);
|
||||||
|
|
||||||
|
res.json({ success: true, dispensaries: rows });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Workers] Error fetching dispensaries:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/workers/chains - List chains for assignment
|
||||||
|
*/
|
||||||
|
router.get('/chains', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT DISTINCT chain_id as id, chain_name as name, COUNT(*) as dispensary_count
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE chain_id IS NOT NULL AND chain_name IS NOT NULL
|
||||||
|
GROUP BY chain_id, chain_name
|
||||||
|
ORDER BY chain_name ASC
|
||||||
|
`);
|
||||||
|
res.json({ success: true, chains: rows });
|
||||||
|
} catch (error: any) {
|
||||||
|
// Fallback if chain columns don't exist
|
||||||
|
res.json({ success: true, chains: [] });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// WORKER TYPES
|
// WORKER TYPES
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -32,6 +121,7 @@ interface Worker {
|
|||||||
id: number;
|
id: number;
|
||||||
worker_name: string;
|
worker_name: string;
|
||||||
run_role: string;
|
run_role: string;
|
||||||
|
job_name?: string;
|
||||||
scope: string[];
|
scope: string[];
|
||||||
description: string;
|
description: string;
|
||||||
enabled: boolean;
|
enabled: boolean;
|
||||||
@@ -40,6 +130,8 @@ interface Worker {
|
|||||||
next_run_at: string | null;
|
next_run_at: string | null;
|
||||||
last_run_at: string | null;
|
last_run_at: string | null;
|
||||||
last_status: string | null;
|
last_status: string | null;
|
||||||
|
last_error_message?: string | null;
|
||||||
|
last_duration_ms?: number | null;
|
||||||
last_seen: string | null;
|
last_seen: string | null;
|
||||||
visibility_lost: number;
|
visibility_lost: number;
|
||||||
visibility_restored: number;
|
visibility_restored: number;
|
||||||
@@ -124,15 +216,20 @@ router.get('/', async (_req: Request, res: Response) => {
|
|||||||
next_run_at,
|
next_run_at,
|
||||||
last_run_at,
|
last_run_at,
|
||||||
last_status,
|
last_status,
|
||||||
job_config
|
last_error_message,
|
||||||
|
last_duration_ms,
|
||||||
|
job_config,
|
||||||
|
worker_name,
|
||||||
|
worker_role
|
||||||
FROM job_schedules
|
FROM job_schedules
|
||||||
ORDER BY enabled DESC, last_run_at DESC NULLS LAST
|
ORDER BY enabled DESC, last_run_at DESC NULLS LAST
|
||||||
`);
|
`);
|
||||||
|
|
||||||
const workers: Worker[] = rows.map((row: any) => ({
|
const workers: Worker[] = rows.map((row: any) => ({
|
||||||
id: row.id,
|
id: row.id,
|
||||||
worker_name: extractWorkerName(row.job_name, row.job_config),
|
worker_name: row.worker_name || extractWorkerName(row.job_name, row.job_config),
|
||||||
run_role: extractRunRole(row.job_name, row.job_config),
|
run_role: row.worker_role || extractRunRole(row.job_name, row.job_config),
|
||||||
|
job_name: row.job_name,
|
||||||
scope: parseScope(row.job_config),
|
scope: parseScope(row.job_config),
|
||||||
description: row.description || row.job_name,
|
description: row.description || row.job_name,
|
||||||
enabled: row.enabled,
|
enabled: row.enabled,
|
||||||
@@ -141,6 +238,8 @@ router.get('/', async (_req: Request, res: Response) => {
|
|||||||
next_run_at: row.next_run_at?.toISOString() || null,
|
next_run_at: row.next_run_at?.toISOString() || null,
|
||||||
last_run_at: row.last_run_at?.toISOString() || null,
|
last_run_at: row.last_run_at?.toISOString() || null,
|
||||||
last_status: row.last_status,
|
last_status: row.last_status,
|
||||||
|
last_error_message: row.last_error_message,
|
||||||
|
last_duration_ms: row.last_duration_ms,
|
||||||
last_seen: row.last_run_at?.toISOString() || null,
|
last_seen: row.last_run_at?.toISOString() || null,
|
||||||
visibility_lost: 0,
|
visibility_lost: 0,
|
||||||
visibility_restored: 0,
|
visibility_restored: 0,
|
||||||
@@ -619,4 +718,323 @@ router.get('/summary', async (req: Request, res: Response) => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// WORKER CRUD ROUTES (using new workers table)
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/workers/definitions - List all worker definitions from workers table
|
||||||
|
*/
|
||||||
|
router.get('/definitions', async (_req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
w.*,
|
||||||
|
(SELECT COUNT(*) FROM dispensary_crawl_jobs j WHERE j.assigned_worker_id = w.id AND j.status = 'pending') as pending_jobs,
|
||||||
|
(SELECT COUNT(*) FROM dispensary_crawl_jobs j WHERE j.assigned_worker_id = w.id AND j.status = 'running') as running_jobs
|
||||||
|
FROM workers w
|
||||||
|
ORDER BY w.enabled DESC, w.name ASC
|
||||||
|
`);
|
||||||
|
|
||||||
|
res.json({ success: true, workers: rows });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Workers] Error listing worker definitions:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/workers/definitions - Create a new worker definition
|
||||||
|
*/
|
||||||
|
router.post('/definitions', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const {
|
||||||
|
name,
|
||||||
|
role,
|
||||||
|
description,
|
||||||
|
enabled = true,
|
||||||
|
schedule_type = 'interval',
|
||||||
|
interval_minutes = 240,
|
||||||
|
cron_expression,
|
||||||
|
jitter_minutes = 30,
|
||||||
|
assignment_type = 'all',
|
||||||
|
assigned_state_codes,
|
||||||
|
assigned_dispensary_ids,
|
||||||
|
assigned_chain_ids,
|
||||||
|
job_type = 'dutchie_product_crawl',
|
||||||
|
job_config = {},
|
||||||
|
priority = 0,
|
||||||
|
max_concurrent = 1
|
||||||
|
} = req.body;
|
||||||
|
|
||||||
|
if (!name || !role) {
|
||||||
|
return res.status(400).json({ success: false, error: 'name and role are required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
INSERT INTO workers (
|
||||||
|
name, role, description, enabled,
|
||||||
|
schedule_type, interval_minutes, cron_expression, jitter_minutes,
|
||||||
|
assignment_type, assigned_state_codes, assigned_dispensary_ids, assigned_chain_ids,
|
||||||
|
job_type, job_config, priority, max_concurrent
|
||||||
|
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)
|
||||||
|
RETURNING *
|
||||||
|
`, [
|
||||||
|
name, role, description, enabled,
|
||||||
|
schedule_type, interval_minutes, cron_expression, jitter_minutes,
|
||||||
|
assignment_type, assigned_state_codes, assigned_dispensary_ids, assigned_chain_ids,
|
||||||
|
job_type, job_config, priority, max_concurrent
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Also create a job_schedule entry for backwards compatibility
|
||||||
|
await pool.query(`
|
||||||
|
INSERT INTO job_schedules (job_name, description, enabled, base_interval_minutes, jitter_minutes, worker_name, worker_role, job_config)
|
||||||
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||||
|
ON CONFLICT (job_name) DO UPDATE SET
|
||||||
|
description = EXCLUDED.description,
|
||||||
|
enabled = EXCLUDED.enabled,
|
||||||
|
base_interval_minutes = EXCLUDED.base_interval_minutes,
|
||||||
|
jitter_minutes = EXCLUDED.jitter_minutes,
|
||||||
|
worker_name = EXCLUDED.worker_name,
|
||||||
|
worker_role = EXCLUDED.worker_role,
|
||||||
|
updated_at = NOW()
|
||||||
|
`, [
|
||||||
|
`worker_${name.toLowerCase().replace(/\s+/g, '_')}`,
|
||||||
|
description || `Worker: ${name}`,
|
||||||
|
enabled,
|
||||||
|
interval_minutes,
|
||||||
|
jitter_minutes,
|
||||||
|
name,
|
||||||
|
role,
|
||||||
|
job_config
|
||||||
|
]);
|
||||||
|
|
||||||
|
res.json({ success: true, worker: rows[0], message: 'Worker created' });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Workers] Error creating worker:', error);
|
||||||
|
if (error.code === '23505') { // unique violation
|
||||||
|
return res.status(400).json({ success: false, error: 'Worker name already exists' });
|
||||||
|
}
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PUT /api/workers/definitions/:id - Update a worker definition
|
||||||
|
*/
|
||||||
|
router.put('/definitions/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const {
|
||||||
|
name,
|
||||||
|
role,
|
||||||
|
description,
|
||||||
|
enabled,
|
||||||
|
schedule_type,
|
||||||
|
interval_minutes,
|
||||||
|
cron_expression,
|
||||||
|
jitter_minutes,
|
||||||
|
assignment_type,
|
||||||
|
assigned_state_codes,
|
||||||
|
assigned_dispensary_ids,
|
||||||
|
assigned_chain_ids,
|
||||||
|
job_type,
|
||||||
|
job_config,
|
||||||
|
priority,
|
||||||
|
max_concurrent
|
||||||
|
} = req.body;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE workers SET
|
||||||
|
name = COALESCE($1, name),
|
||||||
|
role = COALESCE($2, role),
|
||||||
|
description = COALESCE($3, description),
|
||||||
|
enabled = COALESCE($4, enabled),
|
||||||
|
schedule_type = COALESCE($5, schedule_type),
|
||||||
|
interval_minutes = COALESCE($6, interval_minutes),
|
||||||
|
cron_expression = COALESCE($7, cron_expression),
|
||||||
|
jitter_minutes = COALESCE($8, jitter_minutes),
|
||||||
|
assignment_type = COALESCE($9, assignment_type),
|
||||||
|
assigned_state_codes = COALESCE($10, assigned_state_codes),
|
||||||
|
assigned_dispensary_ids = COALESCE($11, assigned_dispensary_ids),
|
||||||
|
assigned_chain_ids = COALESCE($12, assigned_chain_ids),
|
||||||
|
job_type = COALESCE($13, job_type),
|
||||||
|
job_config = COALESCE($14, job_config),
|
||||||
|
priority = COALESCE($15, priority),
|
||||||
|
max_concurrent = COALESCE($16, max_concurrent),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $17
|
||||||
|
RETURNING *
|
||||||
|
`, [
|
||||||
|
name, role, description, enabled,
|
||||||
|
schedule_type, interval_minutes, cron_expression, jitter_minutes,
|
||||||
|
assignment_type, assigned_state_codes, assigned_dispensary_ids, assigned_chain_ids,
|
||||||
|
job_type, job_config, priority, max_concurrent,
|
||||||
|
id
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, worker: rows[0], message: 'Worker updated' });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Workers] Error updating worker:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DELETE /api/workers/definitions/:id - Delete a worker definition
|
||||||
|
*/
|
||||||
|
router.delete('/definitions/:id', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
DELETE FROM workers WHERE id = $1 RETURNING name
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, message: `Worker "${rows[0].name}" deleted` });
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[Workers] Error deleting worker:', error);
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/workers/definitions/:id/toggle - Enable/disable worker
|
||||||
|
*/
|
||||||
|
router.post('/definitions/:id/toggle', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE workers SET enabled = NOT enabled, updated_at = NOW()
|
||||||
|
WHERE id = $1
|
||||||
|
RETURNING id, name, enabled
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, worker: rows[0], message: `Worker ${rows[0].enabled ? 'enabled' : 'disabled'}` });
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/workers/definitions/:id/assign-dispensary - Assign dispensary to worker
|
||||||
|
*/
|
||||||
|
router.post('/definitions/:id/assign-dispensary', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const { dispensary_id } = req.body;
|
||||||
|
|
||||||
|
if (!dispensary_id) {
|
||||||
|
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
||||||
|
}
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE workers SET
|
||||||
|
assigned_dispensary_ids = array_append(
|
||||||
|
COALESCE(assigned_dispensary_ids, ARRAY[]::integer[]),
|
||||||
|
$1::integer
|
||||||
|
),
|
||||||
|
assignment_type = 'dispensary',
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $2 AND NOT ($1 = ANY(COALESCE(assigned_dispensary_ids, ARRAY[]::integer[])))
|
||||||
|
RETURNING id, name, assigned_dispensary_ids
|
||||||
|
`, [dispensary_id, id]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
// Check if dispensary was already assigned
|
||||||
|
const existing = await pool.query(`
|
||||||
|
SELECT assigned_dispensary_ids FROM workers WHERE id = $1
|
||||||
|
`, [id]);
|
||||||
|
|
||||||
|
if (existing.rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.json({ success: true, message: 'Dispensary already assigned', worker: existing.rows[0] });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, worker: rows[0], message: 'Dispensary assigned to worker' });
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DELETE /api/workers/definitions/:id/assign-dispensary/:dispensaryId - Remove dispensary from worker
|
||||||
|
*/
|
||||||
|
router.delete('/definitions/:id/assign-dispensary/:dispensaryId', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id, dispensaryId } = req.params;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE workers SET
|
||||||
|
assigned_dispensary_ids = array_remove(assigned_dispensary_ids, $1::integer),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $2
|
||||||
|
RETURNING id, name, assigned_dispensary_ids
|
||||||
|
`, [dispensaryId, id]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, worker: rows[0], message: 'Dispensary removed from worker' });
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PUT /api/workers/:id/schedule - Update worker schedule (for job_schedules table)
|
||||||
|
*/
|
||||||
|
router.put('/:id/schedule', async (req: Request, res: Response) => {
|
||||||
|
try {
|
||||||
|
const { id } = req.params;
|
||||||
|
const {
|
||||||
|
worker_name,
|
||||||
|
worker_role,
|
||||||
|
description,
|
||||||
|
enabled,
|
||||||
|
base_interval_minutes,
|
||||||
|
jitter_minutes,
|
||||||
|
job_config
|
||||||
|
} = req.body;
|
||||||
|
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
UPDATE job_schedules SET
|
||||||
|
worker_name = COALESCE($1, worker_name),
|
||||||
|
worker_role = COALESCE($2, worker_role),
|
||||||
|
description = COALESCE($3, description),
|
||||||
|
enabled = COALESCE($4, enabled),
|
||||||
|
base_interval_minutes = COALESCE($5, base_interval_minutes),
|
||||||
|
jitter_minutes = COALESCE($6, jitter_minutes),
|
||||||
|
job_config = COALESCE($7, job_config),
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = $8
|
||||||
|
RETURNING *
|
||||||
|
`, [worker_name, worker_role, description, enabled, base_interval_minutes, jitter_minutes, job_config, id]);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return res.status(404).json({ success: false, error: 'Schedule not found' });
|
||||||
|
}
|
||||||
|
|
||||||
|
res.json({ success: true, schedule: rows[0], message: 'Schedule updated' });
|
||||||
|
} catch (error: any) {
|
||||||
|
res.status(500).json({ success: false, error: error.message });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
export default router;
|
export default router;
|
||||||
|
|||||||
353
backend/src/scraper-v2/canonical-pipeline.ts
Normal file
353
backend/src/scraper-v2/canonical-pipeline.ts
Normal file
@@ -0,0 +1,353 @@
|
|||||||
|
/**
|
||||||
|
* Canonical Database Pipeline
|
||||||
|
*
|
||||||
|
* Writes scraped products to the canonical tables:
|
||||||
|
* - store_products (current state)
|
||||||
|
* - store_product_snapshots (historical)
|
||||||
|
* - product_variants (per-weight pricing)
|
||||||
|
* - product_variant_snapshots (variant history)
|
||||||
|
*
|
||||||
|
* This replaces the legacy DatabasePipeline that wrote to `products` table.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { ItemPipeline, Product } from './types';
|
||||||
|
import { logger } from '../services/logger';
|
||||||
|
import { pool } from '../db/pool';
|
||||||
|
import { v4 as uuidv4 } from 'uuid';
|
||||||
|
|
||||||
|
interface VariantData {
|
||||||
|
option: string;
|
||||||
|
priceRec: number | null;
|
||||||
|
priceMed: number | null;
|
||||||
|
priceRecSpecial: number | null;
|
||||||
|
priceMedSpecial: number | null;
|
||||||
|
quantity: number | null;
|
||||||
|
inStock: boolean;
|
||||||
|
isOnSpecial: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse weight string like "1g", "3.5g", "1/8oz" into value and unit
|
||||||
|
*/
|
||||||
|
function parseWeight(option: string): { value: number | null; unit: string | null } {
|
||||||
|
if (!option) return { value: null, unit: null };
|
||||||
|
|
||||||
|
// Match patterns like "1g", "3.5g", "1/8oz", "100mg"
|
||||||
|
const match = option.match(/^([\d.\/]+)\s*(g|oz|mg|ml|each|pk|ct)?$/i);
|
||||||
|
if (!match) return { value: null, unit: null };
|
||||||
|
|
||||||
|
let value: number | null = null;
|
||||||
|
const rawValue = match[1];
|
||||||
|
const unit = match[2]?.toLowerCase() || null;
|
||||||
|
|
||||||
|
// Handle fractions like "1/8"
|
||||||
|
if (rawValue.includes('/')) {
|
||||||
|
const [num, denom] = rawValue.split('/');
|
||||||
|
value = parseFloat(num) / parseFloat(denom);
|
||||||
|
} else {
|
||||||
|
value = parseFloat(rawValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isNaN(value)) value = null;
|
||||||
|
|
||||||
|
return { value, unit };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Canonical Database Pipeline - saves items to canonical tables
|
||||||
|
*
|
||||||
|
* TABLES:
|
||||||
|
* - store_products: Current product state per store
|
||||||
|
* - store_product_snapshots: Historical snapshot per crawl
|
||||||
|
* - product_variants: Current variant state (per-weight pricing)
|
||||||
|
* - product_variant_snapshots: Historical variant snapshots
|
||||||
|
*/
|
||||||
|
export class CanonicalDatabasePipeline implements ItemPipeline<Product> {
|
||||||
|
name = 'CanonicalDatabasePipeline';
|
||||||
|
priority = 10; // Low priority - runs last
|
||||||
|
|
||||||
|
private crawlRunId: number | null = null;
|
||||||
|
|
||||||
|
setCrawlRunId(id: number): void {
|
||||||
|
this.crawlRunId = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
async process(item: Product, spider: string): Promise<Product | null> {
|
||||||
|
const client = await pool.connect();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Extract metadata set by spider
|
||||||
|
const dispensaryId = (item as any).dispensaryId;
|
||||||
|
const categoryName = (item as any).categoryName;
|
||||||
|
const variants: VariantData[] = (item as any).variants || [];
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
logger.error('pipeline', `Missing dispensaryId for ${item.name}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const externalProductId = item.dutchieProductId || null;
|
||||||
|
const provider = 'dutchie';
|
||||||
|
|
||||||
|
// Determine stock status
|
||||||
|
const isInStock = (item as any).inStock !== false;
|
||||||
|
const stockQuantity = (item as any).stockQuantity || null;
|
||||||
|
|
||||||
|
// Extract pricing
|
||||||
|
const priceRec = item.price || null;
|
||||||
|
const priceMed = (item as any).priceMed || null;
|
||||||
|
|
||||||
|
let storeProductId: number | null = null;
|
||||||
|
let isNewProduct = false;
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// UPSERT store_products
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
const upsertResult = await client.query(`
|
||||||
|
INSERT INTO store_products (
|
||||||
|
dispensary_id, provider, provider_product_id,
|
||||||
|
name_raw, brand_name_raw, category_raw,
|
||||||
|
price_rec, price_med,
|
||||||
|
thc_percent, cbd_percent,
|
||||||
|
is_in_stock, stock_quantity,
|
||||||
|
image_url, source_url,
|
||||||
|
raw_data,
|
||||||
|
first_seen_at, last_seen_at,
|
||||||
|
created_at, updated_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2, $3,
|
||||||
|
$4, $5, $6,
|
||||||
|
$7, $8,
|
||||||
|
$9, $10,
|
||||||
|
$11, $12,
|
||||||
|
$13, $14,
|
||||||
|
$15,
|
||||||
|
NOW(), NOW(),
|
||||||
|
NOW(), NOW()
|
||||||
|
)
|
||||||
|
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
||||||
|
DO UPDATE SET
|
||||||
|
name_raw = EXCLUDED.name_raw,
|
||||||
|
brand_name_raw = EXCLUDED.brand_name_raw,
|
||||||
|
category_raw = EXCLUDED.category_raw,
|
||||||
|
price_rec = EXCLUDED.price_rec,
|
||||||
|
price_med = EXCLUDED.price_med,
|
||||||
|
thc_percent = EXCLUDED.thc_percent,
|
||||||
|
cbd_percent = EXCLUDED.cbd_percent,
|
||||||
|
is_in_stock = EXCLUDED.is_in_stock,
|
||||||
|
stock_quantity = EXCLUDED.stock_quantity,
|
||||||
|
image_url = COALESCE(EXCLUDED.image_url, store_products.image_url),
|
||||||
|
source_url = EXCLUDED.source_url,
|
||||||
|
raw_data = EXCLUDED.raw_data,
|
||||||
|
last_seen_at = NOW(),
|
||||||
|
updated_at = NOW()
|
||||||
|
RETURNING id, (xmax = 0) as is_new
|
||||||
|
`, [
|
||||||
|
dispensaryId, provider, externalProductId,
|
||||||
|
item.name, item.brand || null, categoryName || null,
|
||||||
|
priceRec, priceMed,
|
||||||
|
item.thcPercentage || null, item.cbdPercentage || null,
|
||||||
|
isInStock, stockQuantity,
|
||||||
|
item.imageUrl || null, item.dutchieUrl || null,
|
||||||
|
JSON.stringify(item.metadata || {}),
|
||||||
|
]);
|
||||||
|
|
||||||
|
storeProductId = upsertResult.rows[0].id;
|
||||||
|
isNewProduct = upsertResult.rows[0].is_new;
|
||||||
|
|
||||||
|
logger.debug('pipeline', `${isNewProduct ? 'Inserted' : 'Updated'} canonical product: ${item.name} (ID: ${storeProductId})`);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// INSERT store_product_snapshots
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
await client.query(`
|
||||||
|
INSERT INTO store_product_snapshots (
|
||||||
|
store_product_id, dispensary_id, crawl_run_id,
|
||||||
|
price_rec, price_med,
|
||||||
|
is_in_stock, stock_quantity,
|
||||||
|
is_present_in_feed,
|
||||||
|
captured_at, created_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2, $3,
|
||||||
|
$4, $5,
|
||||||
|
$6, $7,
|
||||||
|
TRUE,
|
||||||
|
NOW(), NOW()
|
||||||
|
)
|
||||||
|
ON CONFLICT (store_product_id, crawl_run_id) WHERE crawl_run_id IS NOT NULL
|
||||||
|
DO UPDATE SET
|
||||||
|
price_rec = EXCLUDED.price_rec,
|
||||||
|
price_med = EXCLUDED.price_med,
|
||||||
|
is_in_stock = EXCLUDED.is_in_stock,
|
||||||
|
stock_quantity = EXCLUDED.stock_quantity
|
||||||
|
`, [
|
||||||
|
storeProductId, dispensaryId, this.crawlRunId,
|
||||||
|
priceRec, priceMed,
|
||||||
|
isInStock, stockQuantity,
|
||||||
|
]);
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// UPSERT product_variants (if variants exist)
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
if (variants.length > 0) {
|
||||||
|
for (const variant of variants) {
|
||||||
|
const { value: weightValue, unit: weightUnit } = parseWeight(variant.option);
|
||||||
|
|
||||||
|
const variantResult = await client.query(`
|
||||||
|
INSERT INTO product_variants (
|
||||||
|
store_product_id, dispensary_id,
|
||||||
|
option,
|
||||||
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
|
quantity, quantity_available, in_stock, is_on_special,
|
||||||
|
weight_value, weight_unit,
|
||||||
|
first_seen_at, last_seen_at,
|
||||||
|
created_at, updated_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2,
|
||||||
|
$3,
|
||||||
|
$4, $5, $6, $7,
|
||||||
|
$8, $8, $9, $10,
|
||||||
|
$11, $12,
|
||||||
|
NOW(), NOW(),
|
||||||
|
NOW(), NOW()
|
||||||
|
)
|
||||||
|
ON CONFLICT (store_product_id, option)
|
||||||
|
DO UPDATE SET
|
||||||
|
price_rec = EXCLUDED.price_rec,
|
||||||
|
price_med = EXCLUDED.price_med,
|
||||||
|
price_rec_special = EXCLUDED.price_rec_special,
|
||||||
|
price_med_special = EXCLUDED.price_med_special,
|
||||||
|
quantity = EXCLUDED.quantity,
|
||||||
|
quantity_available = EXCLUDED.quantity_available,
|
||||||
|
in_stock = EXCLUDED.in_stock,
|
||||||
|
is_on_special = EXCLUDED.is_on_special,
|
||||||
|
weight_value = EXCLUDED.weight_value,
|
||||||
|
weight_unit = EXCLUDED.weight_unit,
|
||||||
|
last_seen_at = NOW(),
|
||||||
|
last_price_change_at = CASE
|
||||||
|
WHEN product_variants.price_rec IS DISTINCT FROM EXCLUDED.price_rec
|
||||||
|
OR product_variants.price_rec_special IS DISTINCT FROM EXCLUDED.price_rec_special
|
||||||
|
THEN NOW()
|
||||||
|
ELSE product_variants.last_price_change_at
|
||||||
|
END,
|
||||||
|
last_stock_change_at = CASE
|
||||||
|
WHEN product_variants.in_stock IS DISTINCT FROM EXCLUDED.in_stock
|
||||||
|
THEN NOW()
|
||||||
|
ELSE product_variants.last_stock_change_at
|
||||||
|
END,
|
||||||
|
updated_at = NOW()
|
||||||
|
RETURNING id
|
||||||
|
`, [
|
||||||
|
storeProductId, dispensaryId,
|
||||||
|
variant.option,
|
||||||
|
variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
|
||||||
|
variant.quantity, variant.inStock, variant.isOnSpecial,
|
||||||
|
weightValue, weightUnit,
|
||||||
|
]);
|
||||||
|
|
||||||
|
const variantId = variantResult.rows[0].id;
|
||||||
|
|
||||||
|
// Insert variant snapshot
|
||||||
|
await client.query(`
|
||||||
|
INSERT INTO product_variant_snapshots (
|
||||||
|
product_variant_id, store_product_id, dispensary_id, crawl_run_id,
|
||||||
|
option,
|
||||||
|
price_rec, price_med, price_rec_special, price_med_special,
|
||||||
|
quantity, in_stock, is_on_special,
|
||||||
|
is_present_in_feed,
|
||||||
|
captured_at, created_at
|
||||||
|
) VALUES (
|
||||||
|
$1, $2, $3, $4,
|
||||||
|
$5,
|
||||||
|
$6, $7, $8, $9,
|
||||||
|
$10, $11, $12,
|
||||||
|
TRUE,
|
||||||
|
NOW(), NOW()
|
||||||
|
)
|
||||||
|
`, [
|
||||||
|
variantId, storeProductId, dispensaryId, this.crawlRunId,
|
||||||
|
variant.option,
|
||||||
|
variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
|
||||||
|
variant.quantity, variant.inStock, variant.isOnSpecial,
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug('pipeline', `Upserted ${variants.length} variants for ${item.name}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attach metadata for stats tracking
|
||||||
|
(item as any).isNewProduct = isNewProduct;
|
||||||
|
(item as any).storeProductId = storeProductId;
|
||||||
|
|
||||||
|
return item;
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('pipeline', `Failed to save canonical product ${item.name}: ${error}`);
|
||||||
|
return null;
|
||||||
|
} finally {
|
||||||
|
client.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a crawl run record before starting crawl
|
||||||
|
*/
|
||||||
|
export async function createCrawlRun(
|
||||||
|
dispensaryId: number,
|
||||||
|
provider: string = 'dutchie',
|
||||||
|
triggerType: string = 'manual'
|
||||||
|
): Promise<number> {
|
||||||
|
const result = await pool.query(`
|
||||||
|
INSERT INTO crawl_runs (
|
||||||
|
dispensary_id, provider,
|
||||||
|
started_at, status, trigger_type
|
||||||
|
) VALUES ($1, $2, NOW(), 'running', $3)
|
||||||
|
RETURNING id
|
||||||
|
`, [dispensaryId, provider, triggerType]);
|
||||||
|
|
||||||
|
return result.rows[0].id;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Complete a crawl run with stats
|
||||||
|
*/
|
||||||
|
export async function completeCrawlRun(
|
||||||
|
crawlRunId: number,
|
||||||
|
stats: {
|
||||||
|
productsFound: number;
|
||||||
|
productsNew: number;
|
||||||
|
productsUpdated: number;
|
||||||
|
snapshotsWritten: number;
|
||||||
|
variantsUpserted?: number;
|
||||||
|
status?: 'completed' | 'failed' | 'partial';
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
): Promise<void> {
|
||||||
|
await pool.query(`
|
||||||
|
UPDATE crawl_runs SET
|
||||||
|
finished_at = NOW(),
|
||||||
|
status = $2,
|
||||||
|
products_found = $3,
|
||||||
|
products_new = $4,
|
||||||
|
products_updated = $5,
|
||||||
|
snapshots_written = $6,
|
||||||
|
metadata = jsonb_build_object(
|
||||||
|
'variants_upserted', $7,
|
||||||
|
'error', $8
|
||||||
|
)
|
||||||
|
WHERE id = $1
|
||||||
|
`, [
|
||||||
|
crawlRunId,
|
||||||
|
stats.status || 'completed',
|
||||||
|
stats.productsFound,
|
||||||
|
stats.productsNew,
|
||||||
|
stats.productsUpdated,
|
||||||
|
stats.snapshotsWritten,
|
||||||
|
stats.variantsUpserted || 0,
|
||||||
|
stats.error || null,
|
||||||
|
]);
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@ import { RequestScheduler } from './scheduler';
|
|||||||
import { Downloader } from './downloader';
|
import { Downloader } from './downloader';
|
||||||
import { MiddlewareEngine, UserAgentMiddleware, ProxyMiddleware, RateLimitMiddleware, RetryMiddleware, BotDetectionMiddleware, StealthMiddleware } from './middlewares';
|
import { MiddlewareEngine, UserAgentMiddleware, ProxyMiddleware, RateLimitMiddleware, RetryMiddleware, BotDetectionMiddleware, StealthMiddleware } from './middlewares';
|
||||||
import { PipelineEngine, ValidationPipeline, SanitizationPipeline, DeduplicationPipeline, ImagePipeline, DatabasePipeline, StatsPipeline } from './pipelines';
|
import { PipelineEngine, ValidationPipeline, SanitizationPipeline, DeduplicationPipeline, ImagePipeline, DatabasePipeline, StatsPipeline } from './pipelines';
|
||||||
|
import { CanonicalDatabasePipeline, createCrawlRun, completeCrawlRun } from './canonical-pipeline';
|
||||||
import { ScraperRequest, ScraperResponse, ParseResult, Product, ScraperStats } from './types';
|
import { ScraperRequest, ScraperResponse, ParseResult, Product, ScraperStats } from './types';
|
||||||
import { logger } from '../services/logger';
|
import { logger } from '../services/logger';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
@@ -65,6 +66,9 @@ export class ScraperEngine {
|
|||||||
this.pipelineEngine.use(new DeduplicationPipeline());
|
this.pipelineEngine.use(new DeduplicationPipeline());
|
||||||
this.pipelineEngine.use(new ImagePipeline());
|
this.pipelineEngine.use(new ImagePipeline());
|
||||||
this.pipelineEngine.use(new StatsPipeline());
|
this.pipelineEngine.use(new StatsPipeline());
|
||||||
|
// Use canonical pipeline for writing to store_products/product_variants
|
||||||
|
this.pipelineEngine.use(new CanonicalDatabasePipeline());
|
||||||
|
// Keep legacy pipeline for backwards compatibility with existing stores table
|
||||||
this.pipelineEngine.use(new DatabasePipeline());
|
this.pipelineEngine.use(new DatabasePipeline());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -39,6 +39,11 @@ export {
|
|||||||
DatabasePipeline,
|
DatabasePipeline,
|
||||||
StatsPipeline
|
StatsPipeline
|
||||||
} from './pipelines';
|
} from './pipelines';
|
||||||
|
export {
|
||||||
|
CanonicalDatabasePipeline,
|
||||||
|
createCrawlRun,
|
||||||
|
completeCrawlRun
|
||||||
|
} from './canonical-pipeline';
|
||||||
export * from './types';
|
export * from './types';
|
||||||
|
|
||||||
// Main API functions
|
// Main API functions
|
||||||
|
|||||||
250
backend/src/scripts/crawl-single-store.ts
Normal file
250
backend/src/scripts/crawl-single-store.ts
Normal file
@@ -0,0 +1,250 @@
|
|||||||
|
#!/usr/bin/env npx tsx
|
||||||
|
/**
|
||||||
|
* Crawl Single Store - Verbose test showing each step
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
|
||||||
|
* npx tsx src/scripts/crawl-single-store.ts <dispensaryId>
|
||||||
|
*
|
||||||
|
* Example:
|
||||||
|
* DATABASE_URL="..." npx tsx src/scripts/crawl-single-store.ts 112
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
import {
|
||||||
|
executeGraphQL,
|
||||||
|
startSession,
|
||||||
|
endSession,
|
||||||
|
getFingerprint,
|
||||||
|
GRAPHQL_HASHES,
|
||||||
|
DUTCHIE_CONFIG,
|
||||||
|
} from '../platforms/dutchie';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// DATABASE CONNECTION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
function getConnectionString(): string {
|
||||||
|
if (process.env.DATABASE_URL) {
|
||||||
|
return process.env.DATABASE_URL;
|
||||||
|
}
|
||||||
|
if (process.env.CANNAIQ_DB_URL) {
|
||||||
|
return process.env.CANNAIQ_DB_URL;
|
||||||
|
}
|
||||||
|
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||||
|
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||||
|
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||||
|
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||||
|
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||||
|
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pool = new Pool({ connectionString: getConnectionString() });
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// MAIN
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const dispensaryId = parseInt(process.argv[2], 10);
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
console.error('Usage: npx tsx src/scripts/crawl-single-store.ts <dispensaryId>');
|
||||||
|
console.error('Example: npx tsx src/scripts/crawl-single-store.ts 112');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('');
|
||||||
|
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||||
|
console.log('║ SINGLE STORE CRAWL - VERBOSE OUTPUT ║');
|
||||||
|
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// ============================================================
|
||||||
|
// STEP 1: Get dispensary info from database
|
||||||
|
// ============================================================
|
||||||
|
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||||
|
console.log('│ STEP 1: Load Dispensary Info from Database │');
|
||||||
|
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||||
|
|
||||||
|
const dispResult = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
name,
|
||||||
|
platform_dispensary_id,
|
||||||
|
menu_url,
|
||||||
|
menu_type,
|
||||||
|
city,
|
||||||
|
state
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (dispResult.rows.length === 0) {
|
||||||
|
throw new Error(`Dispensary ${dispensaryId} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const disp = dispResult.rows[0];
|
||||||
|
console.log(` Dispensary ID: ${disp.id}`);
|
||||||
|
console.log(` Name: ${disp.name}`);
|
||||||
|
console.log(` City, State: ${disp.city}, ${disp.state}`);
|
||||||
|
console.log(` Menu Type: ${disp.menu_type}`);
|
||||||
|
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
|
||||||
|
console.log(` Menu URL: ${disp.menu_url}`);
|
||||||
|
|
||||||
|
if (!disp.platform_dispensary_id) {
|
||||||
|
throw new Error('Dispensary does not have a platform_dispensary_id - cannot crawl');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract cName from menu_url
|
||||||
|
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||||
|
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||||
|
console.log(` cName (derived): ${cName}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 2: Start stealth session
|
||||||
|
// ============================================================
|
||||||
|
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||||
|
console.log('│ STEP 2: Start Stealth Session │');
|
||||||
|
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||||
|
|
||||||
|
// Use Arizona timezone for this store
|
||||||
|
const session = startSession(disp.state || 'AZ', 'America/Phoenix');
|
||||||
|
|
||||||
|
const fp = getFingerprint();
|
||||||
|
console.log(` Session ID: ${session.sessionId}`);
|
||||||
|
console.log(` User-Agent: ${fp.userAgent.slice(0, 60)}...`);
|
||||||
|
console.log(` Accept-Language: ${fp.acceptLanguage}`);
|
||||||
|
console.log(` Sec-CH-UA: ${fp.secChUa || '(not set)'}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 3: Execute GraphQL query
|
||||||
|
// ============================================================
|
||||||
|
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||||
|
console.log('│ STEP 3: Execute GraphQL Query (FilteredProducts) │');
|
||||||
|
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||||
|
|
||||||
|
const variables = {
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId: disp.platform_dispensary_id,
|
||||||
|
pricingType: 'rec',
|
||||||
|
Status: 'Active',
|
||||||
|
types: [],
|
||||||
|
useCache: true,
|
||||||
|
isDefaultSort: true,
|
||||||
|
sortBy: 'popularSortIdx',
|
||||||
|
sortDirection: 1,
|
||||||
|
bypassOnlineThresholds: true,
|
||||||
|
isKioskMenu: false,
|
||||||
|
removeProductsBelowOptionThresholds: false,
|
||||||
|
},
|
||||||
|
page: 0,
|
||||||
|
perPage: 100,
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(` Endpoint: ${DUTCHIE_CONFIG.graphqlEndpoint}`);
|
||||||
|
console.log(` Operation: FilteredProducts`);
|
||||||
|
console.log(` Hash: ${GRAPHQL_HASHES.FilteredProducts.slice(0, 20)}...`);
|
||||||
|
console.log(` dispensaryId: ${variables.productsFilter.dispensaryId}`);
|
||||||
|
console.log(` pricingType: ${variables.productsFilter.pricingType}`);
|
||||||
|
console.log(` Status: ${variables.productsFilter.Status}`);
|
||||||
|
console.log(` perPage: ${variables.perPage}`);
|
||||||
|
console.log('');
|
||||||
|
console.log(' Sending request...');
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
const result = await executeGraphQL(
|
||||||
|
'FilteredProducts',
|
||||||
|
variables,
|
||||||
|
GRAPHQL_HASHES.FilteredProducts,
|
||||||
|
{ cName, maxRetries: 3 }
|
||||||
|
);
|
||||||
|
const elapsed = Date.now() - startTime;
|
||||||
|
|
||||||
|
console.log(` Response time: ${elapsed}ms`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 4: Process response
|
||||||
|
// ============================================================
|
||||||
|
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||||
|
console.log('│ STEP 4: Process Response │');
|
||||||
|
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||||
|
|
||||||
|
const data = result?.data?.filteredProducts;
|
||||||
|
if (!data) {
|
||||||
|
console.log(' ERROR: No data returned from GraphQL');
|
||||||
|
console.log(' Raw result:', JSON.stringify(result, null, 2).slice(0, 500));
|
||||||
|
endSession();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const products = data.products || [];
|
||||||
|
const totalCount = data.queryInfo?.totalCount || 0;
|
||||||
|
const totalPages = Math.ceil(totalCount / 100);
|
||||||
|
|
||||||
|
console.log(` Total products: ${totalCount}`);
|
||||||
|
console.log(` Products in page: ${products.length}`);
|
||||||
|
console.log(` Total pages: ${totalPages}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Show first few products
|
||||||
|
console.log(' First 5 products:');
|
||||||
|
console.log(' ─────────────────────────────────────────────────────────');
|
||||||
|
for (let i = 0; i < Math.min(5, products.length); i++) {
|
||||||
|
const p = products[i];
|
||||||
|
const name = (p.name || 'Unknown').slice(0, 40);
|
||||||
|
const brand = (p.brand?.name || 'Unknown').slice(0, 15);
|
||||||
|
const price = p.Prices?.[0]?.price || p.medPrice || p.recPrice || 'N/A';
|
||||||
|
const category = p.type || p.category || 'N/A';
|
||||||
|
console.log(` ${i + 1}. ${name.padEnd(42)} | ${brand.padEnd(17)} | $${price}`);
|
||||||
|
}
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// STEP 5: End session
|
||||||
|
// ============================================================
|
||||||
|
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||||
|
console.log('│ STEP 5: End Session │');
|
||||||
|
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||||
|
|
||||||
|
endSession();
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// SUMMARY
|
||||||
|
// ============================================================
|
||||||
|
console.log('╔════════════════════════════════════════════════════════════╗');
|
||||||
|
console.log('║ SUMMARY ║');
|
||||||
|
console.log('╠════════════════════════════════════════════════════════════╣');
|
||||||
|
console.log(`║ Store: ${disp.name.slice(0, 38).padEnd(38)} ║`);
|
||||||
|
console.log(`║ Products Found: ${String(totalCount).padEnd(38)} ║`);
|
||||||
|
console.log(`║ Response Time: ${(elapsed + 'ms').padEnd(38)} ║`);
|
||||||
|
console.log(`║ Status: ${'SUCCESS'.padEnd(38)} ║`);
|
||||||
|
console.log('╚════════════════════════════════════════════════════════════╝');
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('');
|
||||||
|
console.error('╔════════════════════════════════════════════════════════════╗');
|
||||||
|
console.error('║ ERROR ║');
|
||||||
|
console.error('╚════════════════════════════════════════════════════════════╝');
|
||||||
|
console.error(` ${error.message}`);
|
||||||
|
if (error.stack) {
|
||||||
|
console.error('');
|
||||||
|
console.error('Stack trace:');
|
||||||
|
console.error(error.stack.split('\n').slice(0, 5).join('\n'));
|
||||||
|
}
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
385
backend/src/scripts/discover-all-states.ts
Normal file
385
backend/src/scripts/discover-all-states.ts
Normal file
@@ -0,0 +1,385 @@
|
|||||||
|
#!/usr/bin/env npx tsx
|
||||||
|
/**
|
||||||
|
* Discover All States - Sequential State-by-State Dutchie Discovery
|
||||||
|
*
|
||||||
|
* This script discovers all Dutchie dispensaries for every US state,
|
||||||
|
* processing one state at a time with delays between states.
|
||||||
|
*
|
||||||
|
* Progress is automatically saved to /tmp/discovery-progress.json
|
||||||
|
* so the script can resume from where it left off if interrupted.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts
|
||||||
|
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --dry-run
|
||||||
|
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --start-from CA
|
||||||
|
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --resume
|
||||||
|
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --reset # Clear progress, start fresh
|
||||||
|
*
|
||||||
|
* Options:
|
||||||
|
* --dry-run Don't save to database, just show what would happen
|
||||||
|
* --start-from Start from a specific state (skip earlier states)
|
||||||
|
* --states Comma-separated list of specific states to run (e.g., AZ,CA,CO)
|
||||||
|
* --verbose Show detailed output
|
||||||
|
* --resume Auto-resume from last saved progress (default if progress file exists)
|
||||||
|
* --reset Clear progress file and start fresh
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
|
||||||
|
const PROGRESS_FILE = '/tmp/discovery-progress.json';
|
||||||
|
|
||||||
|
interface ProgressData {
|
||||||
|
lastCompletedState: string | null;
|
||||||
|
lastCompletedIndex: number;
|
||||||
|
startedAt: string;
|
||||||
|
updatedAt: string;
|
||||||
|
completedStates: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
function loadProgress(): ProgressData | null {
|
||||||
|
try {
|
||||||
|
if (fs.existsSync(PROGRESS_FILE)) {
|
||||||
|
const data = JSON.parse(fs.readFileSync(PROGRESS_FILE, 'utf-8'));
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[Progress] Could not load progress file:', e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function saveProgress(progress: ProgressData): void {
|
||||||
|
try {
|
||||||
|
progress.updatedAt = new Date().toISOString();
|
||||||
|
fs.writeFileSync(PROGRESS_FILE, JSON.stringify(progress, null, 2));
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[Progress] Could not save progress:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function clearProgress(): void {
|
||||||
|
try {
|
||||||
|
if (fs.existsSync(PROGRESS_FILE)) {
|
||||||
|
fs.unlinkSync(PROGRESS_FILE);
|
||||||
|
console.log('[Progress] Cleared progress file');
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[Progress] Could not clear progress:', e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
import { discoverState } from '../discovery';
|
||||||
|
|
||||||
|
// US states with legal cannabis (medical or recreational)
|
||||||
|
// Ordered roughly by market size / likelihood of Dutchie presence
|
||||||
|
const US_STATES = [
|
||||||
|
'AZ', // Arizona
|
||||||
|
'CA', // California
|
||||||
|
'CO', // Colorado
|
||||||
|
'FL', // Florida
|
||||||
|
'IL', // Illinois
|
||||||
|
'MA', // Massachusetts
|
||||||
|
'MI', // Michigan
|
||||||
|
'NV', // Nevada
|
||||||
|
'NJ', // New Jersey
|
||||||
|
'NY', // New York
|
||||||
|
'OH', // Ohio
|
||||||
|
'OR', // Oregon
|
||||||
|
'PA', // Pennsylvania
|
||||||
|
'WA', // Washington
|
||||||
|
'MD', // Maryland
|
||||||
|
'MO', // Missouri
|
||||||
|
'CT', // Connecticut
|
||||||
|
'NM', // New Mexico
|
||||||
|
'ME', // Maine
|
||||||
|
'VT', // Vermont
|
||||||
|
'MT', // Montana
|
||||||
|
'AK', // Alaska
|
||||||
|
'OK', // Oklahoma
|
||||||
|
'AR', // Arkansas
|
||||||
|
'ND', // North Dakota
|
||||||
|
'SD', // South Dakota
|
||||||
|
'MN', // Minnesota
|
||||||
|
'NH', // New Hampshire
|
||||||
|
'RI', // Rhode Island
|
||||||
|
'DE', // Delaware
|
||||||
|
'HI', // Hawaii
|
||||||
|
'WV', // West Virginia
|
||||||
|
'LA', // Louisiana
|
||||||
|
'UT', // Utah
|
||||||
|
'VA', // Virginia
|
||||||
|
'DC', // District of Columbia
|
||||||
|
];
|
||||||
|
|
||||||
|
interface DiscoveryResult {
|
||||||
|
stateCode: string;
|
||||||
|
citiesCrawled: number;
|
||||||
|
locationsFound: number;
|
||||||
|
locationsUpserted: number;
|
||||||
|
durationMs: number;
|
||||||
|
errors: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseArgs() {
|
||||||
|
const args = process.argv.slice(2);
|
||||||
|
const flags: Record<string, string | boolean> = {};
|
||||||
|
|
||||||
|
for (let i = 0; i < args.length; i++) {
|
||||||
|
const arg = args[i];
|
||||||
|
if (arg.startsWith('--')) {
|
||||||
|
const [key, value] = arg.slice(2).split('=');
|
||||||
|
if (value !== undefined) {
|
||||||
|
flags[key] = value;
|
||||||
|
} else if (args[i + 1] && !args[i + 1].startsWith('--')) {
|
||||||
|
flags[key] = args[i + 1];
|
||||||
|
i++;
|
||||||
|
} else {
|
||||||
|
flags[key] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const flags = parseArgs();
|
||||||
|
const dryRun = Boolean(flags['dry-run']);
|
||||||
|
const verbose = Boolean(flags.verbose);
|
||||||
|
const reset = Boolean(flags.reset);
|
||||||
|
const resume = Boolean(flags.resume);
|
||||||
|
let startFrom = flags['start-from'] as string | undefined;
|
||||||
|
const specificStates = flags.states
|
||||||
|
? (flags.states as string).split(',').map((s) => s.trim().toUpperCase())
|
||||||
|
: null;
|
||||||
|
|
||||||
|
// Handle reset flag
|
||||||
|
if (reset) {
|
||||||
|
clearProgress();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine which states to process
|
||||||
|
let statesToProcess = specificStates || US_STATES;
|
||||||
|
|
||||||
|
// Check for saved progress (auto-resume unless --reset or --start-from specified)
|
||||||
|
const savedProgress = loadProgress();
|
||||||
|
if (savedProgress && !reset && !startFrom && !specificStates) {
|
||||||
|
const nextIndex = savedProgress.lastCompletedIndex + 1;
|
||||||
|
if (nextIndex < US_STATES.length) {
|
||||||
|
startFrom = US_STATES[nextIndex];
|
||||||
|
console.log(`[Progress] Resuming from saved progress`);
|
||||||
|
console.log(`[Progress] Last completed: ${savedProgress.lastCompletedState} (${savedProgress.completedStates.length} states done)`);
|
||||||
|
console.log(`[Progress] Started at: ${savedProgress.startedAt}`);
|
||||||
|
console.log(`[Progress] Last update: ${savedProgress.updatedAt}`);
|
||||||
|
console.log('');
|
||||||
|
} else {
|
||||||
|
console.log(`[Progress] All states already completed! Use --reset to start over.`);
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (startFrom) {
|
||||||
|
const startIndex = statesToProcess.indexOf(startFrom.toUpperCase());
|
||||||
|
if (startIndex === -1) {
|
||||||
|
console.error(`ERROR: State ${startFrom} not found in list`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
statesToProcess = statesToProcess.slice(startIndex);
|
||||||
|
console.log(`Starting from ${startFrom}, ${statesToProcess.length} states remaining`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize progress tracking
|
||||||
|
let progress: ProgressData = savedProgress || {
|
||||||
|
lastCompletedState: null,
|
||||||
|
lastCompletedIndex: -1,
|
||||||
|
startedAt: new Date().toISOString(),
|
||||||
|
updatedAt: new Date().toISOString(),
|
||||||
|
completedStates: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log('='.repeat(70));
|
||||||
|
console.log('DUTCHIE ALL-STATES DISCOVERY');
|
||||||
|
console.log('='.repeat(70));
|
||||||
|
console.log(`Mode: ${dryRun ? 'DRY RUN' : 'LIVE'}`);
|
||||||
|
console.log(`States to process: ${statesToProcess.length}`);
|
||||||
|
console.log(`States: ${statesToProcess.join(', ')}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Create database pool
|
||||||
|
const connectionString = process.env.DATABASE_URL;
|
||||||
|
if (!connectionString) {
|
||||||
|
console.error('ERROR: DATABASE_URL environment variable is required');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
const pool = new Pool({ connectionString });
|
||||||
|
|
||||||
|
const results: DiscoveryResult[] = [];
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
try {
|
||||||
|
for (let i = 0; i < statesToProcess.length; i++) {
|
||||||
|
const stateCode = statesToProcess[i];
|
||||||
|
|
||||||
|
console.log('');
|
||||||
|
console.log('─'.repeat(70));
|
||||||
|
console.log(`[${i + 1}/${statesToProcess.length}] Discovering ${stateCode}...`);
|
||||||
|
console.log('─'.repeat(70));
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await discoverState(pool, stateCode, {
|
||||||
|
dryRun,
|
||||||
|
verbose,
|
||||||
|
cityLimit: 200, // Allow up to 200 cities per state
|
||||||
|
});
|
||||||
|
|
||||||
|
const discoveryResult: DiscoveryResult = {
|
||||||
|
stateCode,
|
||||||
|
citiesCrawled: result.locations.length,
|
||||||
|
locationsFound: result.totalLocationsFound,
|
||||||
|
locationsUpserted: result.totalLocationsUpserted,
|
||||||
|
durationMs: result.durationMs,
|
||||||
|
errors: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Collect errors from city results
|
||||||
|
result.locations.forEach((loc) => {
|
||||||
|
if (loc.errors && loc.errors.length > 0) {
|
||||||
|
discoveryResult.errors.push(...loc.errors);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
results.push(discoveryResult);
|
||||||
|
|
||||||
|
// Save progress after each successful state
|
||||||
|
const stateIndex = US_STATES.indexOf(stateCode);
|
||||||
|
progress.lastCompletedState = stateCode;
|
||||||
|
progress.lastCompletedIndex = stateIndex;
|
||||||
|
if (!progress.completedStates.includes(stateCode)) {
|
||||||
|
progress.completedStates.push(stateCode);
|
||||||
|
}
|
||||||
|
saveProgress(progress);
|
||||||
|
|
||||||
|
console.log(`\n[${stateCode}] COMPLETE:`);
|
||||||
|
console.log(` Cities crawled: ${discoveryResult.citiesCrawled}`);
|
||||||
|
console.log(` Locations found: ${discoveryResult.locationsFound}`);
|
||||||
|
console.log(` Locations upserted: ${discoveryResult.locationsUpserted}`);
|
||||||
|
console.log(` Duration: ${(discoveryResult.durationMs / 1000).toFixed(1)}s`);
|
||||||
|
console.log(` Progress saved (${progress.completedStates.length}/${US_STATES.length} states)`);
|
||||||
|
|
||||||
|
if (discoveryResult.errors.length > 0) {
|
||||||
|
console.log(` Errors: ${discoveryResult.errors.length}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delay between states to avoid rate limiting
|
||||||
|
if (i < statesToProcess.length - 1) {
|
||||||
|
const delaySeconds = 5;
|
||||||
|
console.log(`\n Waiting ${delaySeconds}s before next state...`);
|
||||||
|
await new Promise((r) => setTimeout(r, delaySeconds * 1000));
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`\n[${stateCode}] ERROR: ${error.message}`);
|
||||||
|
results.push({
|
||||||
|
stateCode,
|
||||||
|
citiesCrawled: 0,
|
||||||
|
locationsFound: 0,
|
||||||
|
locationsUpserted: 0,
|
||||||
|
durationMs: 0,
|
||||||
|
errors: [error.message],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Continue to next state even on error
|
||||||
|
await new Promise((r) => setTimeout(r, 3000));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print summary
|
||||||
|
const totalDuration = Date.now() - startTime;
|
||||||
|
const totalLocations = results.reduce((sum, r) => sum + r.locationsFound, 0);
|
||||||
|
const totalUpserted = results.reduce((sum, r) => sum + r.locationsUpserted, 0);
|
||||||
|
const totalCities = results.reduce((sum, r) => sum + r.citiesCrawled, 0);
|
||||||
|
const statesWithErrors = results.filter((r) => r.errors.length > 0);
|
||||||
|
|
||||||
|
console.log('');
|
||||||
|
console.log('='.repeat(70));
|
||||||
|
console.log('DISCOVERY COMPLETE - SUMMARY');
|
||||||
|
console.log('='.repeat(70));
|
||||||
|
console.log(`Total states processed: ${results.length}`);
|
||||||
|
console.log(`Total cities crawled: ${totalCities}`);
|
||||||
|
console.log(`Total locations found: ${totalLocations}`);
|
||||||
|
console.log(`Total locations upserted: ${totalUpserted}`);
|
||||||
|
console.log(`Total duration: ${(totalDuration / 1000 / 60).toFixed(1)} minutes`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
if (statesWithErrors.length > 0) {
|
||||||
|
console.log('States with errors:');
|
||||||
|
statesWithErrors.forEach((r) => {
|
||||||
|
console.log(` ${r.stateCode}: ${r.errors.length} error(s)`);
|
||||||
|
});
|
||||||
|
console.log('');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print per-state breakdown
|
||||||
|
console.log('Per-state results:');
|
||||||
|
console.log('-'.repeat(70));
|
||||||
|
console.log('State\tCities\tFound\tUpserted\tDuration\tStatus');
|
||||||
|
console.log('-'.repeat(70));
|
||||||
|
|
||||||
|
results.forEach((r) => {
|
||||||
|
const status = r.errors.length > 0 ? 'ERRORS' : 'OK';
|
||||||
|
const duration = (r.durationMs / 1000).toFixed(1) + 's';
|
||||||
|
console.log(
|
||||||
|
`${r.stateCode}\t${r.citiesCrawled}\t${r.locationsFound}\t${r.locationsUpserted}\t\t${duration}\t\t${status}`
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Final count from database
|
||||||
|
console.log('');
|
||||||
|
console.log('='.repeat(70));
|
||||||
|
console.log('DATABASE TOTALS');
|
||||||
|
console.log('='.repeat(70));
|
||||||
|
|
||||||
|
const { rows: locationCounts } = await pool.query(`
|
||||||
|
SELECT
|
||||||
|
state_code,
|
||||||
|
COUNT(*) as count,
|
||||||
|
COUNT(CASE WHEN status = 'discovered' THEN 1 END) as discovered,
|
||||||
|
COUNT(CASE WHEN status = 'promoted' THEN 1 END) as promoted
|
||||||
|
FROM dutchie_discovery_locations
|
||||||
|
WHERE active = TRUE
|
||||||
|
GROUP BY state_code
|
||||||
|
ORDER BY count DESC
|
||||||
|
`);
|
||||||
|
|
||||||
|
console.log('State\tTotal\tDiscovered\tPromoted');
|
||||||
|
console.log('-'.repeat(50));
|
||||||
|
locationCounts.forEach((row: any) => {
|
||||||
|
console.log(`${row.state_code || 'N/A'}\t${row.count}\t${row.discovered}\t\t${row.promoted}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
const { rows: totalRow } = await pool.query(`
|
||||||
|
SELECT COUNT(*) as total FROM dutchie_discovery_locations WHERE active = TRUE
|
||||||
|
`);
|
||||||
|
console.log('-'.repeat(50));
|
||||||
|
console.log(`TOTAL: ${totalRow[0].total} locations in discovery table`);
|
||||||
|
|
||||||
|
const { rows: dispRow } = await pool.query(`
|
||||||
|
SELECT COUNT(*) as total FROM dispensaries WHERE menu_type = 'dutchie'
|
||||||
|
`);
|
||||||
|
console.log(`DISPENSARIES: ${dispRow[0].total} Dutchie dispensaries in main table`);
|
||||||
|
|
||||||
|
// Clear progress file on successful completion of all states
|
||||||
|
if (results.length === US_STATES.length || (savedProgress && progress.completedStates.length === US_STATES.length)) {
|
||||||
|
clearProgress();
|
||||||
|
console.log('\n[Progress] All states completed! Progress file cleared.');
|
||||||
|
}
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((error) => {
|
||||||
|
console.error('Fatal error:', error);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
173
backend/src/scripts/estimate-bandwidth.ts
Normal file
173
backend/src/scripts/estimate-bandwidth.ts
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
import axios from 'axios';
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
|
||||||
|
const DUTCHIE_GRAPHQL_URL = 'https://dutchie.com/graphql';
|
||||||
|
|
||||||
|
const MENU_PRODUCTS_QUERY = `
|
||||||
|
query FilteredProducts($productsFilter: ProductFilterInput!) {
|
||||||
|
filteredProducts(productsFilter: $productsFilter) {
|
||||||
|
products {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
brand
|
||||||
|
category
|
||||||
|
subcategory
|
||||||
|
strainType
|
||||||
|
description
|
||||||
|
image
|
||||||
|
images {
|
||||||
|
id
|
||||||
|
url
|
||||||
|
}
|
||||||
|
posId
|
||||||
|
potencyCbd {
|
||||||
|
formatted
|
||||||
|
range
|
||||||
|
unit
|
||||||
|
}
|
||||||
|
potencyThc {
|
||||||
|
formatted
|
||||||
|
range
|
||||||
|
unit
|
||||||
|
}
|
||||||
|
variants {
|
||||||
|
id
|
||||||
|
option
|
||||||
|
price
|
||||||
|
priceMed
|
||||||
|
priceRec
|
||||||
|
quantity
|
||||||
|
specialPrice
|
||||||
|
}
|
||||||
|
status
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
`;
|
||||||
|
|
||||||
|
function formatBytes(bytes: number): string {
|
||||||
|
if (bytes < 1024) return `${bytes} B`;
|
||||||
|
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(2)} KB`;
|
||||||
|
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(2)} MB`;
|
||||||
|
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function measureRequest(dispensaryId: string, mode: 'A' | 'B') {
|
||||||
|
const variables: any = {
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId,
|
||||||
|
pricingType: 'rec',
|
||||||
|
Status: mode === 'A' ? 'Active' : null,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const requestBody = JSON.stringify({
|
||||||
|
query: MENU_PRODUCTS_QUERY,
|
||||||
|
variables,
|
||||||
|
});
|
||||||
|
|
||||||
|
const requestSize = Buffer.byteLength(requestBody, 'utf8');
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await axios.post(DUTCHIE_GRAPHQL_URL, requestBody, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||||
|
'Origin': 'https://dutchie.com',
|
||||||
|
},
|
||||||
|
timeout: 30000,
|
||||||
|
});
|
||||||
|
|
||||||
|
const responseSize = Buffer.byteLength(JSON.stringify(response.data), 'utf8');
|
||||||
|
const productCount = response.data?.data?.filteredProducts?.products?.length || 0;
|
||||||
|
|
||||||
|
// Debug: show what we got
|
||||||
|
if (productCount === 0) {
|
||||||
|
console.log(` Response preview: ${JSON.stringify(response.data).slice(0, 300)}...`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { requestSize, responseSize, productCount };
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(` Error: ${error.message}`);
|
||||||
|
if (error.response) {
|
||||||
|
console.error(` Status: ${error.response.status}`);
|
||||||
|
console.error(` Data: ${JSON.stringify(error.response.data).slice(0, 200)}`);
|
||||||
|
}
|
||||||
|
return { requestSize, responseSize: 0, productCount: 0, error: error.message };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
|
||||||
|
|
||||||
|
// Get one store with products (use a known good ID)
|
||||||
|
const { rows } = await pool.query(`
|
||||||
|
SELECT d.platform_dispensary_id, d.name, COUNT(sp.id) as product_count
|
||||||
|
FROM dispensaries d
|
||||||
|
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
|
||||||
|
WHERE d.platform_dispensary_id IS NOT NULL
|
||||||
|
GROUP BY d.id
|
||||||
|
ORDER BY product_count DESC
|
||||||
|
LIMIT 1
|
||||||
|
`);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
console.log('No crawlable stores found');
|
||||||
|
await pool.end();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const store = rows[0];
|
||||||
|
console.log('=== Dutchie GraphQL Bandwidth for One Store ===\n');
|
||||||
|
console.log(`Store: ${store.name}`);
|
||||||
|
console.log(`Platform ID: ${store.platform_dispensary_id}`);
|
||||||
|
console.log(`Products in DB: ${store.product_count || 'unknown'}\n`);
|
||||||
|
|
||||||
|
// Mode A (Active products with pricing)
|
||||||
|
console.log('Fetching Mode A (Active products)...');
|
||||||
|
const modeA = await measureRequest(store.platform_dispensary_id, 'A');
|
||||||
|
|
||||||
|
// Mode B (All products)
|
||||||
|
console.log('Fetching Mode B (All products)...');
|
||||||
|
const modeB = await measureRequest(store.platform_dispensary_id, 'B');
|
||||||
|
|
||||||
|
console.log('\n=== Results for ONE STORE ===');
|
||||||
|
console.log('\nMode A (Active products with pricing):');
|
||||||
|
console.log(` Request size: ${formatBytes(modeA.requestSize)}`);
|
||||||
|
console.log(` Response size: ${formatBytes(modeA.responseSize)}`);
|
||||||
|
console.log(` Products: ${modeA.productCount}`);
|
||||||
|
if (modeA.productCount > 0) {
|
||||||
|
console.log(` Per product: ${formatBytes(modeA.responseSize / modeA.productCount)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\nMode B (All products incl. OOS):');
|
||||||
|
console.log(` Request size: ${formatBytes(modeB.requestSize)}`);
|
||||||
|
console.log(` Response size: ${formatBytes(modeB.responseSize)}`);
|
||||||
|
console.log(` Products: ${modeB.productCount}`);
|
||||||
|
if (modeB.productCount > 0) {
|
||||||
|
console.log(` Per product: ${formatBytes(modeB.responseSize / modeB.productCount)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\nDual-Mode Crawl (what we actually do):');
|
||||||
|
const totalRequest = modeA.requestSize + modeB.requestSize;
|
||||||
|
const totalResponse = modeA.responseSize + modeB.responseSize;
|
||||||
|
const totalBandwidth = totalRequest + totalResponse;
|
||||||
|
console.log(` Total request: ${formatBytes(totalRequest)}`);
|
||||||
|
console.log(` Total response: ${formatBytes(totalResponse)}`);
|
||||||
|
console.log(` TOTAL BANDWIDTH: ${formatBytes(totalBandwidth)}`);
|
||||||
|
|
||||||
|
// Per-product average
|
||||||
|
const avgProducts = Math.max(modeA.productCount, modeB.productCount);
|
||||||
|
const bytesPerProduct = avgProducts > 0 ? totalResponse / avgProducts : 0;
|
||||||
|
|
||||||
|
console.log('\n=== Quick Reference ===');
|
||||||
|
console.log(`Average bytes per product: ~${formatBytes(bytesPerProduct)}`);
|
||||||
|
console.log(`\nTypical store sizes:`);
|
||||||
|
console.log(` Small (100 products): ~${formatBytes(bytesPerProduct * 100 + totalRequest)}`);
|
||||||
|
console.log(` Medium (300 products): ~${formatBytes(bytesPerProduct * 300 + totalRequest)}`);
|
||||||
|
console.log(` Large (500 products): ~${formatBytes(bytesPerProduct * 500 + totalRequest)}`);
|
||||||
|
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(console.error);
|
||||||
137
backend/src/scripts/retry-platform-ids.ts
Normal file
137
backend/src/scripts/retry-platform-ids.ts
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
#!/usr/bin/env npx tsx
|
||||||
|
/**
|
||||||
|
* Retry resolving platform IDs for Dutchie stores that have menu_url but no platform_dispensary_id
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/scripts/retry-platform-ids.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
import { resolveDispensaryIdWithDetails } from '../platforms/dutchie/queries';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
const pool = new Pool({
|
||||||
|
connectionString: process.env.DATABASE_URL ||
|
||||||
|
`postgresql://${process.env.CANNAIQ_DB_USER || 'dutchie'}:${process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'}@${process.env.CANNAIQ_DB_HOST || 'localhost'}:${process.env.CANNAIQ_DB_PORT || '54320'}/${process.env.CANNAIQ_DB_NAME || 'dutchie_menus'}`
|
||||||
|
});
|
||||||
|
|
||||||
|
interface DispensaryRow {
|
||||||
|
id: number;
|
||||||
|
name: string;
|
||||||
|
menu_url: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractSlugFromUrl(menuUrl: string): string | null {
|
||||||
|
// Extract slug from Dutchie URLs like:
|
||||||
|
// https://dutchie.com/stores/Nirvana-North-Phoenix
|
||||||
|
// https://dutchie.com/dispensary/curaleaf-dispensary-peoria
|
||||||
|
// https://dutchie.com/embedded-menu/some-slug
|
||||||
|
|
||||||
|
const patterns = [
|
||||||
|
/dutchie\.com\/stores\/([^/?]+)/i,
|
||||||
|
/dutchie\.com\/dispensary\/([^/?]+)/i,
|
||||||
|
/dutchie\.com\/embedded-menu\/([^/?]+)/i,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const pattern of patterns) {
|
||||||
|
const match = menuUrl.match(pattern);
|
||||||
|
if (match) {
|
||||||
|
return match[1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('Retry Platform ID Resolution');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
// Get Dutchie dispensaries with menu_url but no platform_dispensary_id
|
||||||
|
const result = await pool.query<DispensaryRow>(`
|
||||||
|
SELECT id, name, menu_url
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE menu_type = 'dutchie'
|
||||||
|
AND menu_url IS NOT NULL AND menu_url != ''
|
||||||
|
AND (platform_dispensary_id IS NULL OR platform_dispensary_id = '')
|
||||||
|
ORDER BY name
|
||||||
|
`);
|
||||||
|
|
||||||
|
console.log(`Found ${result.rows.length} stores to retry\n`);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
console.log('No stores need platform ID resolution.');
|
||||||
|
await pool.end();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const successes: { id: number; name: string; platformId: string }[] = [];
|
||||||
|
const failures: { id: number; name: string; slug: string | null; error: string }[] = [];
|
||||||
|
|
||||||
|
for (const row of result.rows) {
|
||||||
|
console.log(`\n[${row.id}] ${row.name}`);
|
||||||
|
console.log(` URL: ${row.menu_url}`);
|
||||||
|
|
||||||
|
const slug = extractSlugFromUrl(row.menu_url);
|
||||||
|
if (!slug) {
|
||||||
|
console.log(` ❌ Could not extract slug from URL`);
|
||||||
|
failures.push({ id: row.id, name: row.name, slug: null, error: 'Could not extract slug' });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(` Slug: ${slug}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const resolveResult = await resolveDispensaryIdWithDetails(slug);
|
||||||
|
|
||||||
|
if (resolveResult.dispensaryId) {
|
||||||
|
console.log(` ✅ Resolved: ${resolveResult.dispensaryId}`);
|
||||||
|
|
||||||
|
// Update database
|
||||||
|
await pool.query(
|
||||||
|
'UPDATE dispensaries SET platform_dispensary_id = $1 WHERE id = $2',
|
||||||
|
[resolveResult.dispensaryId, row.id]
|
||||||
|
);
|
||||||
|
console.log(` 💾 Updated database`);
|
||||||
|
|
||||||
|
successes.push({ id: row.id, name: row.name, platformId: resolveResult.dispensaryId });
|
||||||
|
} else {
|
||||||
|
const errorMsg = resolveResult.error || 'Unknown error';
|
||||||
|
console.log(` ❌ Failed: ${errorMsg}`);
|
||||||
|
failures.push({ id: row.id, name: row.name, slug, error: errorMsg });
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log(` ❌ Error: ${error.message}`);
|
||||||
|
failures.push({ id: row.id, name: row.name, slug, error: error.message });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Small delay between requests
|
||||||
|
await new Promise(r => setTimeout(r, 500));
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('SUMMARY');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
console.log(`\n✅ Successes (${successes.length}):`);
|
||||||
|
for (const s of successes) {
|
||||||
|
console.log(` [${s.id}] ${s.name} -> ${s.platformId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n❌ Failures (${failures.length}):`);
|
||||||
|
for (const f of failures) {
|
||||||
|
console.log(` [${f.id}] ${f.name} (slug: ${f.slug || 'N/A'})`);
|
||||||
|
console.log(` ${f.error}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(e => {
|
||||||
|
console.error('Fatal error:', e);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
@@ -30,8 +30,8 @@ import {
|
|||||||
discoverState,
|
discoverState,
|
||||||
getDiscoveryStats,
|
getDiscoveryStats,
|
||||||
seedKnownCities,
|
seedKnownCities,
|
||||||
ARIZONA_CITIES,
|
|
||||||
} from '../discovery';
|
} from '../discovery';
|
||||||
|
import { getCitiesForState } from '../discovery/location-discovery';
|
||||||
|
|
||||||
// Parse command line arguments
|
// Parse command line arguments
|
||||||
function parseArgs() {
|
function parseArgs() {
|
||||||
@@ -204,16 +204,22 @@ async function main() {
|
|||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
let cities: any[] = [];
|
// Dynamically fetch cities from Dutchie
|
||||||
if (stateCode.toUpperCase() === 'AZ') {
|
console.log(`\nFetching cities for ${stateCode} from Dutchie...\n`);
|
||||||
cities = ARIZONA_CITIES;
|
const cityNames = await getCitiesForState(stateCode.toUpperCase());
|
||||||
} else {
|
|
||||||
console.error(`No predefined cities for state: ${stateCode}`);
|
if (cityNames.length === 0) {
|
||||||
console.error('Add cities to city-discovery.ts ARIZONA_CITIES array (or add new state arrays)');
|
console.error(`No cities found for state: ${stateCode}`);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`\nSeeding ${cities.length} cities for ${stateCode}...\n`);
|
const cities = cityNames.map(name => ({
|
||||||
|
name,
|
||||||
|
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
|
||||||
|
stateCode: stateCode.toUpperCase(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
console.log(`Seeding ${cities.length} cities for ${stateCode}...\n`);
|
||||||
const result = await seedKnownCities(pool, cities);
|
const result = await seedKnownCities(pool, cities);
|
||||||
console.log(`Created: ${result.created} new cities`);
|
console.log(`Created: ${result.created} new cities`);
|
||||||
console.log(`Updated: ${result.updated} existing cities`);
|
console.log(`Updated: ${result.updated} existing cities`);
|
||||||
|
|||||||
277
backend/src/scripts/test-crawl-to-canonical.ts
Normal file
277
backend/src/scripts/test-crawl-to-canonical.ts
Normal file
@@ -0,0 +1,277 @@
|
|||||||
|
#!/usr/bin/env npx tsx
|
||||||
|
/**
|
||||||
|
* Test Script: Crawl a single dispensary and write to canonical tables
|
||||||
|
*
|
||||||
|
* This script:
|
||||||
|
* 1. Fetches products from Dutchie GraphQL
|
||||||
|
* 2. Normalizes via DutchieNormalizer
|
||||||
|
* 3. Writes to store_products, product_variants, snapshots via hydrateToCanonical
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>
|
||||||
|
* npx tsx src/scripts/test-crawl-to-canonical.ts 235
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Pool } from 'pg';
|
||||||
|
import dotenv from 'dotenv';
|
||||||
|
import {
|
||||||
|
executeGraphQL,
|
||||||
|
GRAPHQL_HASHES,
|
||||||
|
DUTCHIE_CONFIG,
|
||||||
|
} from '../platforms/dutchie';
|
||||||
|
import {
|
||||||
|
DutchieNormalizer,
|
||||||
|
hydrateToCanonical,
|
||||||
|
} from '../hydration';
|
||||||
|
import { initializeImageStorage } from '../utils/image-storage';
|
||||||
|
|
||||||
|
dotenv.config();
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// DATABASE CONNECTION
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
function getConnectionString(): string {
|
||||||
|
if (process.env.CANNAIQ_DB_URL) {
|
||||||
|
return process.env.CANNAIQ_DB_URL;
|
||||||
|
}
|
||||||
|
if (process.env.DATABASE_URL) {
|
||||||
|
return process.env.DATABASE_URL;
|
||||||
|
}
|
||||||
|
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
|
||||||
|
const port = process.env.CANNAIQ_DB_PORT || '54320';
|
||||||
|
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
|
||||||
|
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
|
||||||
|
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
|
||||||
|
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pool = new Pool({ connectionString: getConnectionString() });
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// FETCH PRODUCTS FROM DUTCHIE
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
interface FetchResult {
|
||||||
|
products: any[];
|
||||||
|
totalPages: number;
|
||||||
|
totalProducts: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchAllProducts(platformDispensaryId: string, cName: string): Promise<FetchResult> {
|
||||||
|
const allProducts: any[] = [];
|
||||||
|
let page = 0;
|
||||||
|
let totalPages = 1;
|
||||||
|
let totalProducts = 0;
|
||||||
|
|
||||||
|
console.log(`[Fetch] Starting fetch for ${platformDispensaryId} (cName: ${cName})`);
|
||||||
|
|
||||||
|
while (page < totalPages && page < DUTCHIE_CONFIG.maxPages) {
|
||||||
|
const variables = {
|
||||||
|
includeEnterpriseSpecials: false,
|
||||||
|
productsFilter: {
|
||||||
|
dispensaryId: platformDispensaryId,
|
||||||
|
pricingType: 'rec',
|
||||||
|
Status: 'Active', // 'Active' = in-stock products with pricing
|
||||||
|
types: [],
|
||||||
|
useCache: true,
|
||||||
|
isDefaultSort: true,
|
||||||
|
sortBy: 'popularSortIdx',
|
||||||
|
sortDirection: 1,
|
||||||
|
bypassOnlineThresholds: true,
|
||||||
|
isKioskMenu: false,
|
||||||
|
removeProductsBelowOptionThresholds: false,
|
||||||
|
},
|
||||||
|
page,
|
||||||
|
perPage: DUTCHIE_CONFIG.perPage,
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await executeGraphQL(
|
||||||
|
'FilteredProducts',
|
||||||
|
variables,
|
||||||
|
GRAPHQL_HASHES.FilteredProducts,
|
||||||
|
{ cName, maxRetries: 3 }
|
||||||
|
);
|
||||||
|
|
||||||
|
const data = result?.data?.filteredProducts;
|
||||||
|
if (!data) {
|
||||||
|
console.error(`[Fetch] No data returned for page ${page}`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const products = data.products || [];
|
||||||
|
totalProducts = data.queryInfo?.totalCount || 0;
|
||||||
|
totalPages = Math.ceil(totalProducts / DUTCHIE_CONFIG.perPage);
|
||||||
|
|
||||||
|
allProducts.push(...products);
|
||||||
|
console.log(`[Fetch] Page ${page + 1}/${totalPages}: ${products.length} products (total so far: ${allProducts.length})`);
|
||||||
|
|
||||||
|
page++;
|
||||||
|
|
||||||
|
if (page < totalPages) {
|
||||||
|
await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs));
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`[Fetch] Error on page ${page}: ${error.message}`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { products: allProducts, totalPages, totalProducts };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// MAIN
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const dispensaryId = parseInt(process.argv[2], 10);
|
||||||
|
|
||||||
|
if (!dispensaryId) {
|
||||||
|
console.error('Usage: npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>');
|
||||||
|
console.error('Example: npx tsx src/scripts/test-crawl-to-canonical.ts 235');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('============================================================');
|
||||||
|
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
|
||||||
|
console.log('============================================================\n');
|
||||||
|
|
||||||
|
// Initialize image storage
|
||||||
|
console.log('[Init] Initializing image storage...');
|
||||||
|
await initializeImageStorage();
|
||||||
|
console.log(' Image storage ready\n');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Step 1: Get dispensary info
|
||||||
|
console.log('[Step 1] Getting dispensary info...');
|
||||||
|
const dispResult = await pool.query(`
|
||||||
|
SELECT id, name, platform_dispensary_id, menu_url
|
||||||
|
FROM dispensaries
|
||||||
|
WHERE id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
|
||||||
|
if (dispResult.rows.length === 0) {
|
||||||
|
throw new Error(`Dispensary ${dispensaryId} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const disp = dispResult.rows[0];
|
||||||
|
console.log(` Name: ${disp.name}`);
|
||||||
|
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
|
||||||
|
console.log(` Menu URL: ${disp.menu_url}`);
|
||||||
|
|
||||||
|
if (!disp.platform_dispensary_id) {
|
||||||
|
throw new Error('Dispensary does not have a platform_dispensary_id');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract cName from menu_url
|
||||||
|
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||||
|
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||||
|
console.log(` cName: ${cName}\n`);
|
||||||
|
|
||||||
|
// Step 2: Fetch products from Dutchie
|
||||||
|
console.log('[Step 2] Fetching products from Dutchie GraphQL...');
|
||||||
|
const fetchResult = await fetchAllProducts(disp.platform_dispensary_id, cName);
|
||||||
|
console.log(` Total products fetched: ${fetchResult.products.length}\n`);
|
||||||
|
|
||||||
|
if (fetchResult.products.length === 0) {
|
||||||
|
console.log('No products fetched. Exiting.');
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: Normalize
|
||||||
|
console.log('[Step 3] Normalizing products...');
|
||||||
|
const normalizer = new DutchieNormalizer();
|
||||||
|
|
||||||
|
// Construct a RawPayload structure that the normalizer expects
|
||||||
|
// The normalizer.normalize() expects: { raw_json, dispensary_id, ... }
|
||||||
|
const rawPayloadForValidation = {
|
||||||
|
products: fetchResult.products,
|
||||||
|
queryInfo: {
|
||||||
|
totalCount: fetchResult.totalProducts,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const validation = normalizer.validatePayload(rawPayloadForValidation);
|
||||||
|
if (!validation.valid) {
|
||||||
|
console.error(` Validation failed: ${validation.errors?.join(', ')}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
console.log(` Validation: PASS`);
|
||||||
|
|
||||||
|
// Build proper RawPayload for normalize()
|
||||||
|
const rawPayload = {
|
||||||
|
id: `test-${Date.now()}`,
|
||||||
|
dispensary_id: dispensaryId,
|
||||||
|
crawl_run_id: null,
|
||||||
|
platform: 'dutchie',
|
||||||
|
payload_version: 1,
|
||||||
|
raw_json: rawPayloadForValidation,
|
||||||
|
product_count: fetchResult.totalProducts,
|
||||||
|
pricing_type: 'rec',
|
||||||
|
crawl_mode: 'active',
|
||||||
|
fetched_at: new Date(),
|
||||||
|
processed: false,
|
||||||
|
normalized_at: null,
|
||||||
|
hydration_error: null,
|
||||||
|
hydration_attempts: 0,
|
||||||
|
created_at: new Date(),
|
||||||
|
};
|
||||||
|
|
||||||
|
const normResult = normalizer.normalize(rawPayload);
|
||||||
|
console.log(` Normalized products: ${normResult.products.length}`);
|
||||||
|
console.log(` Brands extracted: ${normResult.brands.length}`);
|
||||||
|
console.log(` Sample product: ${normResult.products[0]?.name}\n`);
|
||||||
|
|
||||||
|
// Step 4: Write to canonical tables
|
||||||
|
console.log('[Step 4] Writing to canonical tables via hydrateToCanonical...');
|
||||||
|
const hydrateResult = await hydrateToCanonical(
|
||||||
|
pool,
|
||||||
|
dispensaryId,
|
||||||
|
normResult,
|
||||||
|
null // no crawl_run_id for this test
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(` Products upserted: ${hydrateResult.productsUpserted}`);
|
||||||
|
console.log(` Products new: ${hydrateResult.productsNew}`);
|
||||||
|
console.log(` Snapshots created: ${hydrateResult.snapshotsCreated}`);
|
||||||
|
console.log(` Variants upserted: ${hydrateResult.variantsUpserted}`);
|
||||||
|
console.log(` Brands created: ${hydrateResult.brandsCreated}\n`);
|
||||||
|
|
||||||
|
// Step 5: Verify
|
||||||
|
console.log('[Step 5] Verifying data in canonical tables...');
|
||||||
|
|
||||||
|
const productCount = await pool.query(`
|
||||||
|
SELECT COUNT(*) as count FROM store_products WHERE dispensary_id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
console.log(` store_products count: ${productCount.rows[0].count}`);
|
||||||
|
|
||||||
|
const variantCount = await pool.query(`
|
||||||
|
SELECT COUNT(*) as count FROM product_variants WHERE dispensary_id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
console.log(` product_variants count: ${variantCount.rows[0].count}`);
|
||||||
|
|
||||||
|
const snapshotCount = await pool.query(`
|
||||||
|
SELECT COUNT(*) as count FROM store_product_snapshots WHERE dispensary_id = $1
|
||||||
|
`, [dispensaryId]);
|
||||||
|
console.log(` store_product_snapshots count: ${snapshotCount.rows[0].count}`);
|
||||||
|
|
||||||
|
console.log('\n============================================================');
|
||||||
|
console.log('SUCCESS - Crawl and hydration complete!');
|
||||||
|
console.log('============================================================');
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('\n============================================================');
|
||||||
|
console.error('ERROR:', error.message);
|
||||||
|
console.error('============================================================');
|
||||||
|
if (error.stack) {
|
||||||
|
console.error(error.stack);
|
||||||
|
}
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await pool.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
80
backend/src/scripts/test-image-proxy.ts
Normal file
80
backend/src/scripts/test-image-proxy.ts
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
#!/usr/bin/env npx tsx
|
||||||
|
/**
|
||||||
|
* Test Image Proxy - Standalone test without backend
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/scripts/test-image-proxy.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import express from 'express';
|
||||||
|
import imageProxyRoutes from '../routes/image-proxy';
|
||||||
|
|
||||||
|
const app = express();
|
||||||
|
const PORT = 3099;
|
||||||
|
|
||||||
|
// Mount the image proxy
|
||||||
|
app.use('/img', imageProxyRoutes);
|
||||||
|
|
||||||
|
// Start server
|
||||||
|
app.listen(PORT, async () => {
|
||||||
|
console.log(`Test image proxy running on http://localhost:${PORT}`);
|
||||||
|
console.log('');
|
||||||
|
console.log('Testing image proxy...');
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
const axios = require('axios');
|
||||||
|
|
||||||
|
// Test cases
|
||||||
|
const tests = [
|
||||||
|
{
|
||||||
|
name: 'Original image',
|
||||||
|
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Resize to 200px width',
|
||||||
|
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Resize to 100x100 cover',
|
||||||
|
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=100&h=100&fit=cover',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Grayscale + blur',
|
||||||
|
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&gray=1&blur=2',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Convert to JPEG',
|
||||||
|
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&format=jpeg&q=70',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'Non-existent image',
|
||||||
|
url: '/img/products/az/nonexistent/image.webp',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const test of tests) {
|
||||||
|
try {
|
||||||
|
const response = await axios.get(`http://localhost:${PORT}${test.url}`, {
|
||||||
|
responseType: 'arraybuffer',
|
||||||
|
validateStatus: () => true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const contentType = response.headers['content-type'];
|
||||||
|
const size = response.data.length;
|
||||||
|
const status = response.status;
|
||||||
|
|
||||||
|
console.log(`${test.name}:`);
|
||||||
|
console.log(` URL: ${test.url.slice(0, 80)}${test.url.length > 80 ? '...' : ''}`);
|
||||||
|
console.log(` Status: ${status}`);
|
||||||
|
console.log(` Content-Type: ${contentType}`);
|
||||||
|
console.log(` Size: ${(size / 1024).toFixed(1)} KB`);
|
||||||
|
console.log('');
|
||||||
|
} catch (error: any) {
|
||||||
|
console.log(`${test.name}: ERROR - ${error.message}`);
|
||||||
|
console.log('');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Tests complete!');
|
||||||
|
process.exit(0);
|
||||||
|
});
|
||||||
117
backend/src/scripts/test-stealth-session.ts
Normal file
117
backend/src/scripts/test-stealth-session.ts
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
/**
|
||||||
|
* Test script for stealth session management
|
||||||
|
*
|
||||||
|
* Tests:
|
||||||
|
* 1. Per-session fingerprint rotation
|
||||||
|
* 2. Geographic consistency (timezone → Accept-Language)
|
||||||
|
* 3. Proxy location loading from database
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx src/scripts/test-stealth-session.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
startSession,
|
||||||
|
endSession,
|
||||||
|
getCurrentSession,
|
||||||
|
getFingerprint,
|
||||||
|
getRandomFingerprint,
|
||||||
|
getLocaleForTimezone,
|
||||||
|
buildHeaders,
|
||||||
|
} from '../platforms/dutchie';
|
||||||
|
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
console.log('STEALTH SESSION TEST');
|
||||||
|
console.log('='.repeat(60));
|
||||||
|
|
||||||
|
// Test 1: Timezone to Locale mapping
|
||||||
|
console.log('\n[Test 1] Timezone to Locale Mapping:');
|
||||||
|
const testTimezones = [
|
||||||
|
'America/Phoenix',
|
||||||
|
'America/Los_Angeles',
|
||||||
|
'America/New_York',
|
||||||
|
'America/Chicago',
|
||||||
|
undefined,
|
||||||
|
'Invalid/Timezone',
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const tz of testTimezones) {
|
||||||
|
const locale = getLocaleForTimezone(tz);
|
||||||
|
console.log(` ${tz || '(undefined)'} → ${locale}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test 2: Random fingerprint selection
|
||||||
|
console.log('\n[Test 2] Random Fingerprint Selection (5 samples):');
|
||||||
|
for (let i = 0; i < 5; i++) {
|
||||||
|
const fp = getRandomFingerprint();
|
||||||
|
console.log(` ${i + 1}. ${fp.userAgent.slice(0, 60)}...`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test 3: Session Management
|
||||||
|
console.log('\n[Test 3] Session Management:');
|
||||||
|
|
||||||
|
// Before session - should use default fingerprint
|
||||||
|
console.log(' Before session:');
|
||||||
|
const beforeFp = getFingerprint();
|
||||||
|
console.log(` getFingerprint(): ${beforeFp.userAgent.slice(0, 50)}...`);
|
||||||
|
console.log(` getCurrentSession(): ${getCurrentSession()}`);
|
||||||
|
|
||||||
|
// Start session with Arizona timezone
|
||||||
|
console.log('\n Starting session (AZ, America/Phoenix):');
|
||||||
|
const session1 = startSession('AZ', 'America/Phoenix');
|
||||||
|
console.log(` Session ID: ${session1.sessionId}`);
|
||||||
|
console.log(` Fingerprint UA: ${session1.fingerprint.userAgent.slice(0, 50)}...`);
|
||||||
|
console.log(` Accept-Language: ${session1.fingerprint.acceptLanguage}`);
|
||||||
|
console.log(` Timezone: ${session1.timezone}`);
|
||||||
|
|
||||||
|
// During session - should use session fingerprint
|
||||||
|
console.log('\n During session:');
|
||||||
|
const duringFp = getFingerprint();
|
||||||
|
console.log(` getFingerprint(): ${duringFp.userAgent.slice(0, 50)}...`);
|
||||||
|
console.log(` Same as session? ${duringFp.userAgent === session1.fingerprint.userAgent}`);
|
||||||
|
|
||||||
|
// Test buildHeaders with session
|
||||||
|
console.log('\n buildHeaders() during session:');
|
||||||
|
const headers = buildHeaders('/embedded-menu/test-store');
|
||||||
|
console.log(` User-Agent: ${headers['user-agent'].slice(0, 50)}...`);
|
||||||
|
console.log(` Accept-Language: ${headers['accept-language']}`);
|
||||||
|
console.log(` Origin: ${headers['origin']}`);
|
||||||
|
console.log(` Referer: ${headers['referer']}`);
|
||||||
|
|
||||||
|
// End session
|
||||||
|
console.log('\n Ending session:');
|
||||||
|
endSession();
|
||||||
|
console.log(` getCurrentSession(): ${getCurrentSession()}`);
|
||||||
|
|
||||||
|
// Test 4: Multiple sessions should have different fingerprints
|
||||||
|
console.log('\n[Test 4] Multiple Sessions (fingerprint variety):');
|
||||||
|
const fingerprints: string[] = [];
|
||||||
|
for (let i = 0; i < 10; i++) {
|
||||||
|
const session = startSession('CA', 'America/Los_Angeles');
|
||||||
|
fingerprints.push(session.fingerprint.userAgent);
|
||||||
|
endSession();
|
||||||
|
}
|
||||||
|
|
||||||
|
const uniqueCount = new Set(fingerprints).size;
|
||||||
|
console.log(` 10 sessions created, ${uniqueCount} unique fingerprints`);
|
||||||
|
console.log(` Variety: ${uniqueCount >= 3 ? '✅ Good' : '⚠️ Low - may need more fingerprint options'}`);
|
||||||
|
|
||||||
|
// Test 5: Geographic consistency check
|
||||||
|
console.log('\n[Test 5] Geographic Consistency:');
|
||||||
|
const geoTests = [
|
||||||
|
{ state: 'AZ', tz: 'America/Phoenix' },
|
||||||
|
{ state: 'CA', tz: 'America/Los_Angeles' },
|
||||||
|
{ state: 'NY', tz: 'America/New_York' },
|
||||||
|
{ state: 'IL', tz: 'America/Chicago' },
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const { state, tz } of geoTests) {
|
||||||
|
const session = startSession(state, tz);
|
||||||
|
const consistent = session.fingerprint.acceptLanguage.includes('en-US');
|
||||||
|
console.log(` ${state} (${tz}): Accept-Language=${session.fingerprint.acceptLanguage} ${consistent ? '✅' : '❌'}`);
|
||||||
|
endSession();
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('\n' + '='.repeat(60));
|
||||||
|
console.log('TEST COMPLETE');
|
||||||
|
console.log('='.repeat(60));
|
||||||
521
backend/src/seo/settings.ts
Normal file
521
backend/src/seo/settings.ts
Normal file
@@ -0,0 +1,521 @@
|
|||||||
|
/**
|
||||||
|
* SEO Settings Helper Module
|
||||||
|
*
|
||||||
|
* Provides functions for managing SEO configuration stored in seo_settings table.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { getPool } from '../db/pool';
|
||||||
|
|
||||||
|
// Default settings - used when table is empty or for reset
|
||||||
|
export const DEFAULT_SETTINGS: Record<string, any> = {
|
||||||
|
// Section 1: Global Content Generation Settings
|
||||||
|
primary_prompt_template: `You are a cannabis industry content expert creating SEO-optimized content for {{page_type}} pages.
|
||||||
|
|
||||||
|
Topic: {{subject}}
|
||||||
|
Focus Areas: {{focus_areas}}
|
||||||
|
Tone: {{tone}}
|
||||||
|
Length: {{length}}
|
||||||
|
|
||||||
|
Generate engaging, informative content that:
|
||||||
|
1. Uses natural keyword placement
|
||||||
|
2. Provides value to cannabis consumers
|
||||||
|
3. Maintains compliance with industry standards
|
||||||
|
4. Includes relevant local market data
|
||||||
|
5. Avoids technical jargon about data collection
|
||||||
|
|
||||||
|
Write content that feels authentic and helpful, not automated.`,
|
||||||
|
|
||||||
|
regeneration_template: `You are improving existing SEO content for a {{page_type}} page.
|
||||||
|
|
||||||
|
=== ORIGINAL CONTENT ===
|
||||||
|
{{original_content}}
|
||||||
|
|
||||||
|
=== IMPROVEMENT AREAS ===
|
||||||
|
{{improvement_areas}}
|
||||||
|
|
||||||
|
=== FRESH DATA ===
|
||||||
|
{{fresh_data}}
|
||||||
|
|
||||||
|
=== REQUIREMENTS ===
|
||||||
|
- Tone: {{tone}}
|
||||||
|
- Length: {{length}}
|
||||||
|
- Preserve accurate information from original
|
||||||
|
- Update outdated statistics with fresh data
|
||||||
|
- Improve SEO keyword density naturally
|
||||||
|
- Enhance readability and engagement
|
||||||
|
- Maintain compliance with cannabis industry standards
|
||||||
|
- Keep the same content structure unless improvement is needed
|
||||||
|
|
||||||
|
Generate the improved version, preserving what works while addressing the improvement areas.`,
|
||||||
|
|
||||||
|
default_content_length: 'medium',
|
||||||
|
tone_voice: 'informational',
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// TEMPLATE LIBRARY - Complete Page Type Templates
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
state_page_template: `# {{state_name}} Dispensaries - Your Cannabis Guide
|
||||||
|
|
||||||
|
Explore **{{dispensary_count}} licensed dispensaries** across {{state_name}}. Our comprehensive directory features {{product_count}}+ products from {{brand_count}} trusted brands, with real-time menu updates and pricing.
|
||||||
|
|
||||||
|
## Why Shop Cannabis in {{state_name}}?
|
||||||
|
|
||||||
|
{{state_name}} offers a thriving cannabis market with diverse product selections and competitive pricing. Whether you're looking for premium flower, convenient vapes, or precisely dosed edibles, you'll find options to match your preferences.
|
||||||
|
|
||||||
|
## Top Cannabis Brands in {{state_name}}
|
||||||
|
|
||||||
|
{{top_brands}}
|
||||||
|
|
||||||
|
These brands are available at dispensaries across the state, known for quality, consistency, and consumer trust.
|
||||||
|
|
||||||
|
## Popular Product Categories
|
||||||
|
|
||||||
|
{{top_categories}}
|
||||||
|
|
||||||
|
Find everything from traditional flower to innovative concentrates and wellness-focused CBD products.
|
||||||
|
|
||||||
|
## {{state_name}} Cannabis Market Overview
|
||||||
|
|
||||||
|
| Metric | Value |
|
||||||
|
|--------|-------|
|
||||||
|
| Licensed Dispensaries | {{dispensary_count}} |
|
||||||
|
| Products Available | {{product_count}}+ |
|
||||||
|
| Active Brands | {{brand_count}} |
|
||||||
|
| Average Price | \${{avg_price}} |
|
||||||
|
|
||||||
|
## Finding the Right Dispensary
|
||||||
|
|
||||||
|
Use our search tools to filter by location, product availability, and store hours. Compare menus across dispensaries to find the best selection for your needs.
|
||||||
|
|
||||||
|
---
|
||||||
|
*Market data continuously updated. Last refresh: {{last_updated}}*`,
|
||||||
|
|
||||||
|
city_page_template: `# {{city_name}}, {{state_code}} Cannabis Dispensaries
|
||||||
|
|
||||||
|
Discover **{{dispensary_count}} dispensaries** in {{city_name}}, {{state_name}}. Browse {{product_count}} products from {{brand_count}} local and national brands.
|
||||||
|
|
||||||
|
## Cannabis Shopping in {{city_name}}
|
||||||
|
|
||||||
|
{{city_name}} offers convenient access to quality cannabis products through licensed retail locations. Our directory helps you find the perfect dispensary based on location, selection, and reviews.
|
||||||
|
|
||||||
|
## Featured Dispensaries in {{city_name}}
|
||||||
|
|
||||||
|
{{popular_dispensaries}}
|
||||||
|
|
||||||
|
## Explore Nearby Cities
|
||||||
|
|
||||||
|
Looking for more options? Check out dispensaries in these nearby areas:
|
||||||
|
|
||||||
|
{{nearby_cities}}
|
||||||
|
|
||||||
|
## {{city_name}} Market Snapshot
|
||||||
|
|
||||||
|
- **Local Stores**: {{dispensary_count}}
|
||||||
|
- **Products Available**: {{product_count}}
|
||||||
|
- **Average Price**: \${{avg_price}}
|
||||||
|
|
||||||
|
## What to Expect
|
||||||
|
|
||||||
|
{{city_name}} dispensaries offer a range of experiences from boutique shops to high-volume retail stores. First-time visitors should bring valid ID and check store hours before visiting.
|
||||||
|
|
||||||
|
---
|
||||||
|
*Find your local dispensary and start shopping today.*`,
|
||||||
|
|
||||||
|
category_page_template: `# {{category_name}} Products in {{state_name}}
|
||||||
|
|
||||||
|
Explore **{{product_count}} {{category_name}} products** from {{brand_count}} trusted brands across {{state_name}} dispensaries.
|
||||||
|
|
||||||
|
## About {{category_name}}
|
||||||
|
|
||||||
|
{{category_name}} remains one of the most popular cannabis product categories, offering options for every preference and experience level.
|
||||||
|
|
||||||
|
## Popular {{category_name}} Varieties
|
||||||
|
|
||||||
|
{{top_strains}}
|
||||||
|
|
||||||
|
## Browse by Type
|
||||||
|
|
||||||
|
{{subcategories}}
|
||||||
|
|
||||||
|
## {{category_name}} Pricing in {{state_name}}
|
||||||
|
|
||||||
|
- **Average Price**: \${{avg_price}}
|
||||||
|
- **Budget Options**: Starting under $25
|
||||||
|
- **Premium Selection**: $50+
|
||||||
|
|
||||||
|
## How to Choose {{category_name}}
|
||||||
|
|
||||||
|
Consider potency levels, terpene profiles, and intended effects when selecting {{category_name}} products. Our filters help you narrow down options by THC/CBD content, brand, and price range.
|
||||||
|
|
||||||
|
## Shop {{category_name}} Near You
|
||||||
|
|
||||||
|
Find {{category_name}} products at dispensaries across {{state_name}}. Use our location search to find stores with current inventory.`,
|
||||||
|
|
||||||
|
brand_page_template: `# {{brand_name}} - Cannabis Products & Store Locator
|
||||||
|
|
||||||
|
{{description}}
|
||||||
|
|
||||||
|
## Where to Find {{brand_name}}
|
||||||
|
|
||||||
|
{{brand_name}} products are available at **{{store_count}} dispensaries** across multiple states:
|
||||||
|
|
||||||
|
{{state_presence}}
|
||||||
|
|
||||||
|
## {{brand_name}} Product Categories
|
||||||
|
|
||||||
|
{{categories}}
|
||||||
|
|
||||||
|
## Brand Statistics
|
||||||
|
|
||||||
|
| Metric | Value |
|
||||||
|
|--------|-------|
|
||||||
|
| Total Products | {{product_count}} |
|
||||||
|
| Retail Partners | {{store_count}} |
|
||||||
|
| Average Price | \${{avg_price}} |
|
||||||
|
|
||||||
|
## Why Choose {{brand_name}}?
|
||||||
|
|
||||||
|
Consumers trust {{brand_name}} for consistent quality, transparent lab testing, and innovative product development. Whether you're a long-time fan or discovering them for the first time, explore their full lineup at dispensaries near you.
|
||||||
|
|
||||||
|
## Shop {{brand_name}} Products
|
||||||
|
|
||||||
|
Find {{brand_name}} at a dispensary near you. Compare prices and availability across stores to get the best deal.`,
|
||||||
|
|
||||||
|
product_page_template: `# {{product_name}}
|
||||||
|
|
||||||
|
**{{brand_name}}** | {{category}}
|
||||||
|
|
||||||
|
## Product Details
|
||||||
|
|
||||||
|
| Attribute | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| THC Content | {{thc_percent}}% |
|
||||||
|
| CBD Content | {{cbd_percent}}% |
|
||||||
|
| Category | {{category}} |
|
||||||
|
| Brand | {{brand_name}} |
|
||||||
|
|
||||||
|
## Availability
|
||||||
|
|
||||||
|
{{#if in_stock}}
|
||||||
|
**In Stock** at {{dispensary_name}}
|
||||||
|
{{else}}
|
||||||
|
**Currently Unavailable** at {{dispensary_name}}
|
||||||
|
{{/if}}
|
||||||
|
|
||||||
|
📍 {{dispensary_city}}, {{state_name}}
|
||||||
|
|
||||||
|
## Pricing
|
||||||
|
|
||||||
|
**\${{price}}**
|
||||||
|
|
||||||
|
*Prices may vary by location. Check dispensary menu for current pricing.*
|
||||||
|
|
||||||
|
## About This Product
|
||||||
|
|
||||||
|
{{product_name}} from {{brand_name}} offers a quality {{category}} experience. Visit {{dispensary_name}} to learn more about this product and explore similar options.
|
||||||
|
|
||||||
|
## Find More {{brand_name}} Products
|
||||||
|
|
||||||
|
Browse the complete {{brand_name}} lineup and find products at dispensaries across {{state_name}}.`,
|
||||||
|
|
||||||
|
search_results_template: `# Search Results: "{{query}}"
|
||||||
|
|
||||||
|
Found **{{result_count}} results** across {{state_name}} dispensaries.
|
||||||
|
|
||||||
|
## Results Overview
|
||||||
|
|
||||||
|
| Category | Count |
|
||||||
|
|----------|-------|
|
||||||
|
| Products | {{product_results}} |
|
||||||
|
| Dispensaries | {{dispensary_results}} |
|
||||||
|
| Brands | {{brand_results}} |
|
||||||
|
|
||||||
|
## Top Categories for "{{query}}"
|
||||||
|
|
||||||
|
{{top_categories}}
|
||||||
|
|
||||||
|
## Refine Your Search
|
||||||
|
|
||||||
|
Use our filters to narrow results by:
|
||||||
|
- **Category**: Flower, Vape, Edibles, Concentrates, and more
|
||||||
|
- **Price Range**: Budget-friendly to premium options
|
||||||
|
- **Brand**: Shop your favorite brands
|
||||||
|
- **Location**: Find nearby dispensaries
|
||||||
|
|
||||||
|
## Popular Related Searches
|
||||||
|
|
||||||
|
Explore related products and categories to find exactly what you're looking for.
|
||||||
|
|
||||||
|
---
|
||||||
|
*Can't find what you need? Try broadening your search terms or browse by category.*`,
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Section 2: Automatic Refresh Rules
|
||||||
|
// ============================================================================
|
||||||
|
auto_refresh_interval: 'weekly',
|
||||||
|
trigger_pct_product_change: true,
|
||||||
|
trigger_pct_brand_change: true,
|
||||||
|
trigger_new_stores: true,
|
||||||
|
trigger_market_shift: false,
|
||||||
|
webhook_url: '',
|
||||||
|
notify_on_trigger: false,
|
||||||
|
|
||||||
|
// Section 3: Page-Level Defaults
|
||||||
|
default_title_template: '{{state_name}} Dispensaries | Find Cannabis Near You | CannaiQ',
|
||||||
|
default_meta_description_template: 'Discover the best dispensaries in {{state_name}}. Browse {{dispensary_count}}+ licensed retailers, compare prices, and find cannabis products near you.',
|
||||||
|
default_slug_template: 'dispensaries-{{state_code_lower}}',
|
||||||
|
default_og_image_template: '/images/seo/og-{{state_code_lower}}.jpg',
|
||||||
|
enable_ai_images: false,
|
||||||
|
|
||||||
|
// Section 4: Crawl / Dataset Configuration
|
||||||
|
primary_data_provider: 'cannaiq',
|
||||||
|
fallback_data_provider: 'dutchie',
|
||||||
|
min_data_freshness_hours: 24,
|
||||||
|
stale_data_behavior: 'allow_with_warning',
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a single setting by key
|
||||||
|
*/
|
||||||
|
export async function getSetting(key: string): Promise<any> {
|
||||||
|
const pool = getPool();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await pool.query(
|
||||||
|
'SELECT value FROM seo_settings WHERE key = $1',
|
||||||
|
[key]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (result.rows.length === 0) {
|
||||||
|
// Return default if not found
|
||||||
|
return DEFAULT_SETTINGS[key] ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.rows[0].value;
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`[SEO Settings] Error getting setting "${key}":`, error.message);
|
||||||
|
// Return default on error
|
||||||
|
return DEFAULT_SETTINGS[key] ?? null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set a single setting
|
||||||
|
*/
|
||||||
|
export async function setSetting(key: string, value: any): Promise<void> {
|
||||||
|
const pool = getPool();
|
||||||
|
|
||||||
|
try {
|
||||||
|
await pool.query(
|
||||||
|
`INSERT INTO seo_settings (key, value, updated_at)
|
||||||
|
VALUES ($1, $2, NOW())
|
||||||
|
ON CONFLICT (key) DO UPDATE SET
|
||||||
|
value = EXCLUDED.value,
|
||||||
|
updated_at = NOW()`,
|
||||||
|
[key, JSON.stringify(value)]
|
||||||
|
);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`[SEO Settings] Error setting "${key}":`, error.message);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all settings as a key/value object
|
||||||
|
*/
|
||||||
|
export async function getAllSettings(): Promise<Record<string, any>> {
|
||||||
|
const pool = getPool();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await pool.query('SELECT key, value FROM seo_settings');
|
||||||
|
|
||||||
|
// Start with defaults
|
||||||
|
const settings: Record<string, any> = { ...DEFAULT_SETTINGS };
|
||||||
|
|
||||||
|
// Override with stored values
|
||||||
|
for (const row of result.rows) {
|
||||||
|
settings[row.key] = row.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
return settings;
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO Settings] Error getting all settings:', error.message);
|
||||||
|
// Return defaults on error
|
||||||
|
return { ...DEFAULT_SETTINGS };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set multiple settings at once
|
||||||
|
*/
|
||||||
|
export async function setMultipleSettings(settings: Record<string, any>): Promise<void> {
|
||||||
|
const pool = getPool();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Use a transaction for bulk updates
|
||||||
|
await pool.query('BEGIN');
|
||||||
|
|
||||||
|
for (const [key, value] of Object.entries(settings)) {
|
||||||
|
await pool.query(
|
||||||
|
`INSERT INTO seo_settings (key, value, updated_at)
|
||||||
|
VALUES ($1, $2, NOW())
|
||||||
|
ON CONFLICT (key) DO UPDATE SET
|
||||||
|
value = EXCLUDED.value,
|
||||||
|
updated_at = NOW()`,
|
||||||
|
[key, JSON.stringify(value)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
await pool.query('COMMIT');
|
||||||
|
} catch (error: any) {
|
||||||
|
await pool.query('ROLLBACK');
|
||||||
|
console.error('[SEO Settings] Error setting multiple settings:', error.message);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reset all settings to defaults
|
||||||
|
*/
|
||||||
|
export async function resetToDefaults(): Promise<Record<string, any>> {
|
||||||
|
const pool = getPool();
|
||||||
|
|
||||||
|
try {
|
||||||
|
await pool.query('BEGIN');
|
||||||
|
|
||||||
|
// Delete all existing settings
|
||||||
|
await pool.query('DELETE FROM seo_settings');
|
||||||
|
|
||||||
|
// Insert all defaults
|
||||||
|
for (const [key, value] of Object.entries(DEFAULT_SETTINGS)) {
|
||||||
|
await pool.query(
|
||||||
|
`INSERT INTO seo_settings (key, value, created_at, updated_at)
|
||||||
|
VALUES ($1, $2, NOW(), NOW())`,
|
||||||
|
[key, JSON.stringify(value)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
await pool.query('COMMIT');
|
||||||
|
|
||||||
|
return { ...DEFAULT_SETTINGS };
|
||||||
|
} catch (error: any) {
|
||||||
|
await pool.query('ROLLBACK');
|
||||||
|
console.error('[SEO Settings] Error resetting to defaults:', error.message);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure settings table exists and has defaults
|
||||||
|
* Call this on app startup
|
||||||
|
*/
|
||||||
|
export async function ensureSettingsExist(): Promise<void> {
|
||||||
|
const pool = getPool();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Check if table exists
|
||||||
|
const tableCheck = await pool.query(`
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT FROM information_schema.tables
|
||||||
|
WHERE table_name = 'seo_settings'
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
|
||||||
|
if (!tableCheck.rows[0].exists) {
|
||||||
|
// Create table
|
||||||
|
await pool.query(`
|
||||||
|
CREATE TABLE IF NOT EXISTS seo_settings (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
key TEXT UNIQUE NOT NULL,
|
||||||
|
value JSONB NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMP DEFAULT NOW()
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if settings exist
|
||||||
|
const countResult = await pool.query('SELECT COUNT(*) FROM seo_settings');
|
||||||
|
const count = parseInt(countResult.rows[0].count, 10);
|
||||||
|
|
||||||
|
if (count === 0) {
|
||||||
|
// Seed with defaults
|
||||||
|
for (const [key, value] of Object.entries(DEFAULT_SETTINGS)) {
|
||||||
|
await pool.query(
|
||||||
|
`INSERT INTO seo_settings (key, value)
|
||||||
|
VALUES ($1, $2)
|
||||||
|
ON CONFLICT (key) DO NOTHING`,
|
||||||
|
[key, JSON.stringify(value)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
console.log('[SEO Settings] Seeded default settings');
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('[SEO Settings] Error ensuring settings exist:', error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a prompt using settings and template variables
|
||||||
|
*/
|
||||||
|
export function buildPrompt(
|
||||||
|
template: string,
|
||||||
|
variables: Record<string, string>
|
||||||
|
): string {
|
||||||
|
let result = template;
|
||||||
|
|
||||||
|
for (const [key, value] of Object.entries(variables)) {
|
||||||
|
result = result.replace(new RegExp(`{{${key}}}`, 'g'), value);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get content generation settings as a structured object
|
||||||
|
*/
|
||||||
|
export async function getContentGenerationSettings(): Promise<{
|
||||||
|
promptTemplate: string;
|
||||||
|
regenerationTemplate: string;
|
||||||
|
contentLength: 'short' | 'medium' | 'long';
|
||||||
|
tone: 'neutral' | 'informational' | 'consumer' | 'authoritative';
|
||||||
|
}> {
|
||||||
|
const settings = await getAllSettings();
|
||||||
|
|
||||||
|
return {
|
||||||
|
promptTemplate: settings.primary_prompt_template,
|
||||||
|
regenerationTemplate: settings.regeneration_prompt_template,
|
||||||
|
contentLength: settings.default_content_length,
|
||||||
|
tone: settings.tone_voice,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if data is stale based on settings
|
||||||
|
*/
|
||||||
|
export async function checkDataFreshness(lastCrawlAt: Date | null): Promise<{
|
||||||
|
isFresh: boolean;
|
||||||
|
behavior: 'block_generation' | 'allow_with_warning' | 'auto_trigger_crawl';
|
||||||
|
hoursStale: number;
|
||||||
|
}> {
|
||||||
|
const settings = await getAllSettings();
|
||||||
|
const maxHours = settings.min_data_freshness_hours || 24;
|
||||||
|
const behavior = settings.stale_data_behavior || 'allow_with_warning';
|
||||||
|
|
||||||
|
if (!lastCrawlAt) {
|
||||||
|
return {
|
||||||
|
isFresh: false,
|
||||||
|
behavior,
|
||||||
|
hoursStale: Infinity,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const hoursStale = (Date.now() - lastCrawlAt.getTime()) / (1000 * 60 * 60);
|
||||||
|
|
||||||
|
return {
|
||||||
|
isFresh: hoursStale <= maxHours,
|
||||||
|
behavior,
|
||||||
|
hoursStale: Math.round(hoursStale),
|
||||||
|
};
|
||||||
|
}
|
||||||
369
backend/src/seo/template-engine.ts
Normal file
369
backend/src/seo/template-engine.ts
Normal file
@@ -0,0 +1,369 @@
|
|||||||
|
/**
|
||||||
|
* SEO Template Engine
|
||||||
|
*
|
||||||
|
* Handles template selection, variable injection, and content generation
|
||||||
|
* for different page types (state, city, category, brand, product, search).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { getAllSettings, getSetting } from './settings';
|
||||||
|
|
||||||
|
// Page types supported by the template engine
|
||||||
|
export type PageType = 'state' | 'city' | 'category' | 'brand' | 'product' | 'search';
|
||||||
|
|
||||||
|
// Template keys mapping
|
||||||
|
export const TEMPLATE_KEYS: Record<PageType, string> = {
|
||||||
|
state: 'state_page_template',
|
||||||
|
city: 'city_page_template',
|
||||||
|
category: 'category_page_template',
|
||||||
|
brand: 'brand_page_template',
|
||||||
|
product: 'product_page_template',
|
||||||
|
search: 'search_results_template',
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sample mock data for previews
|
||||||
|
export const MOCK_DATA: Record<PageType, Record<string, any>> = {
|
||||||
|
state: {
|
||||||
|
state_name: 'Arizona',
|
||||||
|
state_code: 'AZ',
|
||||||
|
state_code_lower: 'az',
|
||||||
|
dispensary_count: 156,
|
||||||
|
product_count: 12450,
|
||||||
|
brand_count: 287,
|
||||||
|
category_count: 8,
|
||||||
|
top_brands: ['Raw Garden', 'Stiiizy', 'Select', 'Pax', 'Bloom'],
|
||||||
|
top_categories: ['Flower', 'Vape', 'Edibles', 'Concentrate', 'Pre-rolls'],
|
||||||
|
avg_price: 42.50,
|
||||||
|
last_updated: new Date().toISOString().split('T')[0],
|
||||||
|
},
|
||||||
|
city: {
|
||||||
|
city_name: 'Phoenix',
|
||||||
|
state_name: 'Arizona',
|
||||||
|
state_code: 'AZ',
|
||||||
|
dispensary_count: 45,
|
||||||
|
product_count: 3200,
|
||||||
|
brand_count: 120,
|
||||||
|
nearby_cities: ['Scottsdale', 'Tempe', 'Mesa', 'Glendale'],
|
||||||
|
popular_dispensaries: ['Harvest', 'Curaleaf', 'Zen Leaf'],
|
||||||
|
avg_price: 40.00,
|
||||||
|
},
|
||||||
|
category: {
|
||||||
|
category_name: 'Flower',
|
||||||
|
category_slug: 'flower',
|
||||||
|
product_count: 4500,
|
||||||
|
brand_count: 95,
|
||||||
|
state_name: 'Arizona',
|
||||||
|
avg_price: 35.00,
|
||||||
|
top_strains: ['Blue Dream', 'OG Kush', 'Girl Scout Cookies'],
|
||||||
|
subcategories: ['Indica', 'Sativa', 'Hybrid'],
|
||||||
|
},
|
||||||
|
brand: {
|
||||||
|
brand_name: 'Raw Garden',
|
||||||
|
brand_slug: 'raw-garden',
|
||||||
|
product_count: 156,
|
||||||
|
state_presence: ['AZ', 'CA', 'NV', 'CO'],
|
||||||
|
store_count: 89,
|
||||||
|
avg_price: 45.00,
|
||||||
|
categories: ['Concentrate', 'Vape', 'Live Resin'],
|
||||||
|
description: 'Premium cannabis products from California',
|
||||||
|
},
|
||||||
|
product: {
|
||||||
|
product_name: 'Blue Dream Cartridge',
|
||||||
|
brand_name: 'Select',
|
||||||
|
category: 'Vape',
|
||||||
|
thc_percent: 85.5,
|
||||||
|
cbd_percent: 0.5,
|
||||||
|
price: 45.00,
|
||||||
|
dispensary_name: 'Harvest HOC',
|
||||||
|
dispensary_city: 'Phoenix',
|
||||||
|
state_name: 'Arizona',
|
||||||
|
in_stock: true,
|
||||||
|
},
|
||||||
|
search: {
|
||||||
|
query: 'live resin',
|
||||||
|
result_count: 245,
|
||||||
|
product_results: 180,
|
||||||
|
dispensary_results: 45,
|
||||||
|
brand_results: 20,
|
||||||
|
state_name: 'Arizona',
|
||||||
|
top_categories: ['Concentrate', 'Vape'],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply template variables to a template string
|
||||||
|
* Replaces {{variable}} with values from data object
|
||||||
|
*
|
||||||
|
* Rules:
|
||||||
|
* - Replace {{variable}} occurrences
|
||||||
|
* - Leave unknown variables unchanged
|
||||||
|
* - Prevent undefined values (replace with empty string)
|
||||||
|
* - Support arrays by joining with comma
|
||||||
|
*/
|
||||||
|
export function applyTemplateVariables(
|
||||||
|
template: string,
|
||||||
|
data: Record<string, any>
|
||||||
|
): string {
|
||||||
|
if (!template) return '';
|
||||||
|
|
||||||
|
let result = template;
|
||||||
|
|
||||||
|
// Find all {{variable}} patterns
|
||||||
|
const variablePattern = /\{\{(\w+)\}\}/g;
|
||||||
|
let match;
|
||||||
|
|
||||||
|
while ((match = variablePattern.exec(template)) !== null) {
|
||||||
|
const fullMatch = match[0];
|
||||||
|
const variableName = match[1];
|
||||||
|
|
||||||
|
if (variableName in data) {
|
||||||
|
let value = data[variableName];
|
||||||
|
|
||||||
|
// Handle different value types
|
||||||
|
if (value === undefined || value === null) {
|
||||||
|
value = '';
|
||||||
|
} else if (Array.isArray(value)) {
|
||||||
|
value = value.join(', ');
|
||||||
|
} else if (typeof value === 'object') {
|
||||||
|
value = JSON.stringify(value);
|
||||||
|
} else {
|
||||||
|
value = String(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace all occurrences of this variable
|
||||||
|
result = result.split(fullMatch).join(value);
|
||||||
|
}
|
||||||
|
// Leave unknown variables unchanged
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the correct template for a page type
|
||||||
|
* Uses case-insensitive matching
|
||||||
|
*/
|
||||||
|
export async function getTemplateForPageType(pageType: string): Promise<string> {
|
||||||
|
const normalizedType = pageType.toLowerCase().trim() as PageType;
|
||||||
|
const templateKey = TEMPLATE_KEYS[normalizedType];
|
||||||
|
|
||||||
|
if (!templateKey) {
|
||||||
|
console.warn(`[TemplateEngine] Unknown page type: ${pageType}, falling back to state template`);
|
||||||
|
return getSetting('state_page_template');
|
||||||
|
}
|
||||||
|
|
||||||
|
return getSetting(templateKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get regeneration template
|
||||||
|
*/
|
||||||
|
export async function getRegenerationTemplate(): Promise<string> {
|
||||||
|
return getSetting('regeneration_template');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate content for a page using the appropriate template
|
||||||
|
*/
|
||||||
|
export async function generatePageContent(
|
||||||
|
pageType: string,
|
||||||
|
data: Record<string, any>
|
||||||
|
): Promise<{
|
||||||
|
content: string;
|
||||||
|
templateUsed: string;
|
||||||
|
variablesApplied: string[];
|
||||||
|
}> {
|
||||||
|
const template = await getTemplateForPageType(pageType);
|
||||||
|
const content = applyTemplateVariables(template, data);
|
||||||
|
|
||||||
|
// Extract which variables were actually used
|
||||||
|
const variablePattern = /\{\{(\w+)\}\}/g;
|
||||||
|
const variablesInTemplate: string[] = [];
|
||||||
|
let match;
|
||||||
|
while ((match = variablePattern.exec(template)) !== null) {
|
||||||
|
if (!variablesInTemplate.includes(match[1])) {
|
||||||
|
variablesInTemplate.push(match[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const variablesApplied = variablesInTemplate.filter(v => v in data);
|
||||||
|
|
||||||
|
return {
|
||||||
|
content,
|
||||||
|
templateUsed: TEMPLATE_KEYS[pageType.toLowerCase() as PageType] || 'state_page_template',
|
||||||
|
variablesApplied,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a preview with mock data
|
||||||
|
*/
|
||||||
|
export async function generatePreview(
|
||||||
|
pageType: string,
|
||||||
|
customTemplate?: string
|
||||||
|
): Promise<{
|
||||||
|
preview: string;
|
||||||
|
template: string;
|
||||||
|
mockData: Record<string, any>;
|
||||||
|
availableVariables: string[];
|
||||||
|
}> {
|
||||||
|
const normalizedType = (pageType?.toLowerCase().trim() || 'state') as PageType;
|
||||||
|
const template = customTemplate || await getTemplateForPageType(normalizedType);
|
||||||
|
const mockData = MOCK_DATA[normalizedType] || MOCK_DATA.state;
|
||||||
|
|
||||||
|
const preview = applyTemplateVariables(template, mockData);
|
||||||
|
|
||||||
|
return {
|
||||||
|
preview,
|
||||||
|
template,
|
||||||
|
mockData,
|
||||||
|
availableVariables: Object.keys(mockData),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Regenerate content using regeneration template
|
||||||
|
*/
|
||||||
|
export async function regenerateContent(
|
||||||
|
pageType: string,
|
||||||
|
originalContent: string,
|
||||||
|
newData: Record<string, any>,
|
||||||
|
improvementAreas?: string[]
|
||||||
|
): Promise<{
|
||||||
|
content: string;
|
||||||
|
regenerationPrompt: string;
|
||||||
|
}> {
|
||||||
|
const regenerationTemplate = await getRegenerationTemplate();
|
||||||
|
const settings = await getAllSettings();
|
||||||
|
|
||||||
|
// Build regeneration context
|
||||||
|
const regenerationData = {
|
||||||
|
...newData,
|
||||||
|
original_content: originalContent,
|
||||||
|
page_type: pageType,
|
||||||
|
improvement_areas: improvementAreas?.join(', ') || 'SEO keywords, local relevance, data freshness',
|
||||||
|
tone: settings.tone_voice || 'informational',
|
||||||
|
length: settings.default_content_length || 'medium',
|
||||||
|
};
|
||||||
|
|
||||||
|
const regenerationPrompt = applyTemplateVariables(regenerationTemplate, regenerationData);
|
||||||
|
|
||||||
|
// Generate new content using the page template
|
||||||
|
const pageTemplate = await getTemplateForPageType(pageType);
|
||||||
|
const content = applyTemplateVariables(pageTemplate, newData);
|
||||||
|
|
||||||
|
return {
|
||||||
|
content,
|
||||||
|
regenerationPrompt,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all available templates and their metadata
|
||||||
|
*/
|
||||||
|
export async function getAllTemplates(): Promise<Record<string, {
|
||||||
|
key: string;
|
||||||
|
template: string;
|
||||||
|
description: string;
|
||||||
|
availableVariables: string[];
|
||||||
|
}>> {
|
||||||
|
const settings = await getAllSettings();
|
||||||
|
|
||||||
|
return {
|
||||||
|
state: {
|
||||||
|
key: 'state_page_template',
|
||||||
|
template: settings.state_page_template || '',
|
||||||
|
description: 'Template for state landing pages (e.g., "Arizona Dispensaries")',
|
||||||
|
availableVariables: Object.keys(MOCK_DATA.state),
|
||||||
|
},
|
||||||
|
city: {
|
||||||
|
key: 'city_page_template',
|
||||||
|
template: settings.city_page_template || '',
|
||||||
|
description: 'Template for city landing pages (e.g., "Phoenix Dispensaries")',
|
||||||
|
availableVariables: Object.keys(MOCK_DATA.city),
|
||||||
|
},
|
||||||
|
category: {
|
||||||
|
key: 'category_page_template',
|
||||||
|
template: settings.category_page_template || '',
|
||||||
|
description: 'Template for category pages (e.g., "Flower", "Edibles")',
|
||||||
|
availableVariables: Object.keys(MOCK_DATA.category),
|
||||||
|
},
|
||||||
|
brand: {
|
||||||
|
key: 'brand_page_template',
|
||||||
|
template: settings.brand_page_template || '',
|
||||||
|
description: 'Template for brand pages (e.g., "Raw Garden Products")',
|
||||||
|
availableVariables: Object.keys(MOCK_DATA.brand),
|
||||||
|
},
|
||||||
|
product: {
|
||||||
|
key: 'product_page_template',
|
||||||
|
template: settings.product_page_template || '',
|
||||||
|
description: 'Template for individual product pages',
|
||||||
|
availableVariables: Object.keys(MOCK_DATA.product),
|
||||||
|
},
|
||||||
|
search: {
|
||||||
|
key: 'search_results_template',
|
||||||
|
template: settings.search_results_template || '',
|
||||||
|
description: 'Template for search results pages',
|
||||||
|
availableVariables: Object.keys(MOCK_DATA.search),
|
||||||
|
},
|
||||||
|
regeneration: {
|
||||||
|
key: 'regeneration_template',
|
||||||
|
template: settings.regeneration_template || '',
|
||||||
|
description: 'Template used when regenerating/improving existing content',
|
||||||
|
availableVariables: ['original_content', 'page_type', 'improvement_areas', 'tone', 'length', '...page-specific variables'],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate a template string
|
||||||
|
*/
|
||||||
|
export function validateTemplate(template: string): {
|
||||||
|
valid: boolean;
|
||||||
|
variables: string[];
|
||||||
|
unknownVariables: string[];
|
||||||
|
errors: string[];
|
||||||
|
} {
|
||||||
|
const errors: string[] = [];
|
||||||
|
const variables: string[] = [];
|
||||||
|
|
||||||
|
// Find all variables
|
||||||
|
const variablePattern = /\{\{(\w+)\}\}/g;
|
||||||
|
let match;
|
||||||
|
while ((match = variablePattern.exec(template)) !== null) {
|
||||||
|
if (!variables.includes(match[1])) {
|
||||||
|
variables.push(match[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for unclosed brackets
|
||||||
|
const openBrackets = (template.match(/\{\{/g) || []).length;
|
||||||
|
const closeBrackets = (template.match(/\}\}/g) || []).length;
|
||||||
|
if (openBrackets !== closeBrackets) {
|
||||||
|
errors.push('Mismatched template brackets: {{ and }} counts do not match');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for empty variable names
|
||||||
|
if (template.includes('{{}}')) {
|
||||||
|
errors.push('Empty variable name found: {{}}');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get all known variables
|
||||||
|
const allKnownVariables = new Set<string>();
|
||||||
|
Object.values(MOCK_DATA).forEach(data => {
|
||||||
|
Object.keys(data).forEach(key => allKnownVariables.add(key));
|
||||||
|
});
|
||||||
|
allKnownVariables.add('original_content');
|
||||||
|
allKnownVariables.add('page_type');
|
||||||
|
allKnownVariables.add('improvement_areas');
|
||||||
|
allKnownVariables.add('tone');
|
||||||
|
allKnownVariables.add('length');
|
||||||
|
|
||||||
|
const unknownVariables = variables.filter(v => !allKnownVariables.has(v));
|
||||||
|
|
||||||
|
return {
|
||||||
|
valid: errors.length === 0,
|
||||||
|
variables,
|
||||||
|
unknownVariables,
|
||||||
|
errors,
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -115,7 +115,7 @@ export class LegalStateService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get all states with dispensary counts
|
* Get all states with dispensary counts (active/crawlable dispensaries only)
|
||||||
*/
|
*/
|
||||||
async getAllStatesWithDispensaryCounts(): Promise<StateWithDispensaryCount[]> {
|
async getAllStatesWithDispensaryCounts(): Promise<StateWithDispensaryCount[]> {
|
||||||
const { rows } = await this.pool.query<StateWithDispensaryCount>(`
|
const { rows } = await this.pool.query<StateWithDispensaryCount>(`
|
||||||
@@ -127,6 +127,8 @@ export class LegalStateService {
|
|||||||
SELECT state_id, COUNT(*) AS cnt
|
SELECT state_id, COUNT(*) AS cnt
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
WHERE state_id IS NOT NULL
|
WHERE state_id IS NOT NULL
|
||||||
|
AND menu_type = 'dutchie'
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
GROUP BY state_id
|
GROUP BY state_id
|
||||||
) d ON d.state_id = s.id
|
) d ON d.state_id = s.id
|
||||||
ORDER BY s.name ASC
|
ORDER BY s.name ASC
|
||||||
@@ -324,6 +326,8 @@ export class LegalStateService {
|
|||||||
SELECT state_id, COUNT(*) AS cnt
|
SELECT state_id, COUNT(*) AS cnt
|
||||||
FROM dispensaries
|
FROM dispensaries
|
||||||
WHERE state_id IS NOT NULL
|
WHERE state_id IS NOT NULL
|
||||||
|
AND menu_type = 'dutchie'
|
||||||
|
AND platform_dispensary_id IS NOT NULL
|
||||||
GROUP BY state_id
|
GROUP BY state_id
|
||||||
) d ON d.state_id = s.id
|
) d ON d.state_id = s.id
|
||||||
ORDER BY s.name ASC
|
ORDER BY s.name ASC
|
||||||
|
|||||||
@@ -26,6 +26,8 @@ import {
|
|||||||
PenetrationDataPoint,
|
PenetrationDataPoint,
|
||||||
BrandMarketPosition,
|
BrandMarketPosition,
|
||||||
BrandRecVsMedFootprint,
|
BrandRecVsMedFootprint,
|
||||||
|
BrandPromotionalSummary,
|
||||||
|
BrandPromotionalEvent,
|
||||||
} from './types';
|
} from './types';
|
||||||
|
|
||||||
export class BrandPenetrationService {
|
export class BrandPenetrationService {
|
||||||
@@ -44,16 +46,17 @@ export class BrandPenetrationService {
|
|||||||
// Get current brand presence
|
// Get current brand presence
|
||||||
const currentResult = await this.pool.query(`
|
const currentResult = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name,
|
sp.brand_name_raw AS brand_name,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries,
|
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries,
|
||||||
COUNT(*) AS total_skus,
|
COUNT(*) AS total_skus,
|
||||||
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus_per_dispensary,
|
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus_per_dispensary,
|
||||||
ARRAY_AGG(DISTINCT s.code) FILTER (WHERE s.code IS NOT NULL) AS states_present
|
ARRAY_AGG(DISTINCT s.code) FILTER (WHERE s.code IS NOT NULL) AS states_present
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
LEFT JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.brand_name = $1
|
LEFT JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.brand_name_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY sp.brand_name
|
GROUP BY sp.brand_name_raw
|
||||||
`, [brandName]);
|
`, [brandName]);
|
||||||
|
|
||||||
if (currentResult.rows.length === 0) {
|
if (currentResult.rows.length === 0) {
|
||||||
@@ -72,7 +75,7 @@ export class BrandPenetrationService {
|
|||||||
DATE(sps.captured_at) AS date,
|
DATE(sps.captured_at) AS date,
|
||||||
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
|
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
|
||||||
FROM store_product_snapshots sps
|
FROM store_product_snapshots sps
|
||||||
WHERE sps.brand_name = $1
|
WHERE sps.brand_name_raw = $1
|
||||||
AND sps.captured_at >= $2
|
AND sps.captured_at >= $2
|
||||||
AND sps.captured_at <= $3
|
AND sps.captured_at <= $3
|
||||||
AND sps.is_in_stock = TRUE
|
AND sps.is_in_stock = TRUE
|
||||||
@@ -123,8 +126,9 @@ export class BrandPenetrationService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
COUNT(*) AS sku_count
|
COUNT(*) AS sku_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.brand_name = $1
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.brand_name_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
||||||
),
|
),
|
||||||
@@ -133,7 +137,8 @@ export class BrandPenetrationService {
|
|||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries
|
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE sp.is_in_stock = TRUE
|
WHERE sp.is_in_stock = TRUE
|
||||||
GROUP BY s.code
|
GROUP BY s.code
|
||||||
)
|
)
|
||||||
@@ -169,7 +174,7 @@ export class BrandPenetrationService {
|
|||||||
let filters = '';
|
let filters = '';
|
||||||
|
|
||||||
if (options.category) {
|
if (options.category) {
|
||||||
filters += ` AND sp.category = $${paramIdx}`;
|
filters += ` AND sp.category_raw = $${paramIdx}`;
|
||||||
params.push(options.category);
|
params.push(options.category);
|
||||||
paramIdx++;
|
paramIdx++;
|
||||||
}
|
}
|
||||||
@@ -183,31 +188,33 @@ export class BrandPenetrationService {
|
|||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
WITH brand_metrics AS (
|
WITH brand_metrics AS (
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name,
|
sp.brand_name_raw AS brand_name,
|
||||||
sp.category,
|
sp.category_raw AS category,
|
||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
AVG(sp.price_rec) AS avg_price
|
AVG(sp.price_rec) AS avg_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.brand_name = $1
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.brand_name_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND sp.category IS NOT NULL
|
AND sp.category_raw IS NOT NULL
|
||||||
${filters}
|
${filters}
|
||||||
GROUP BY sp.brand_name, sp.category, s.code
|
GROUP BY sp.brand_name_raw, sp.category_raw, s.code
|
||||||
),
|
),
|
||||||
category_totals AS (
|
category_totals AS (
|
||||||
SELECT
|
SELECT
|
||||||
sp.category,
|
sp.category_raw AS category,
|
||||||
s.code AS state_code,
|
s.code AS state_code,
|
||||||
COUNT(*) AS total_skus,
|
COUNT(*) AS total_skus,
|
||||||
AVG(sp.price_rec) AS category_avg_price
|
AVG(sp.price_rec) AS category_avg_price
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
|
JOIN states s ON s.id = d.state_id
|
||||||
WHERE sp.is_in_stock = TRUE
|
WHERE sp.is_in_stock = TRUE
|
||||||
AND sp.category IS NOT NULL
|
AND sp.category_raw IS NOT NULL
|
||||||
GROUP BY sp.category, s.code
|
GROUP BY sp.category_raw, s.code
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
bm.*,
|
bm.*,
|
||||||
@@ -243,8 +250,9 @@ export class BrandPenetrationService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.brand_name = $1
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.brand_name_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND s.recreational_legal = TRUE
|
AND s.recreational_legal = TRUE
|
||||||
),
|
),
|
||||||
@@ -255,8 +263,9 @@ export class BrandPenetrationService {
|
|||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.brand_name = $1
|
JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.brand_name_raw = $1
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
AND s.medical_legal = TRUE
|
AND s.medical_legal = TRUE
|
||||||
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
||||||
@@ -311,23 +320,24 @@ export class BrandPenetrationService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (category) {
|
if (category) {
|
||||||
filters += ` AND sp.category = $${paramIdx}`;
|
filters += ` AND sp.category_raw = $${paramIdx}`;
|
||||||
params.push(category);
|
params.push(category);
|
||||||
paramIdx++;
|
paramIdx++;
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
SELECT
|
SELECT
|
||||||
sp.brand_name,
|
sp.brand_name_raw AS brand_name,
|
||||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||||
COUNT(*) AS sku_count,
|
COUNT(*) AS sku_count,
|
||||||
COUNT(DISTINCT s.code) AS state_count
|
COUNT(DISTINCT s.code) AS state_count
|
||||||
FROM store_products sp
|
FROM store_products sp
|
||||||
LEFT JOIN states s ON s.id = sp.state_id
|
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||||
WHERE sp.brand_name IS NOT NULL
|
LEFT JOIN states s ON s.id = d.state_id
|
||||||
|
WHERE sp.brand_name_raw IS NOT NULL
|
||||||
AND sp.is_in_stock = TRUE
|
AND sp.is_in_stock = TRUE
|
||||||
${filters}
|
${filters}
|
||||||
GROUP BY sp.brand_name
|
GROUP BY sp.brand_name_raw
|
||||||
ORDER BY dispensary_count DESC, sku_count DESC
|
ORDER BY dispensary_count DESC, sku_count DESC
|
||||||
LIMIT $1
|
LIMIT $1
|
||||||
`, params);
|
`, params);
|
||||||
@@ -358,23 +368,23 @@ export class BrandPenetrationService {
|
|||||||
const result = await this.pool.query(`
|
const result = await this.pool.query(`
|
||||||
WITH start_counts AS (
|
WITH start_counts AS (
|
||||||
SELECT
|
SELECT
|
||||||
brand_name,
|
brand_name_raw AS brand_name,
|
||||||
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
||||||
FROM store_product_snapshots
|
FROM store_product_snapshots
|
||||||
WHERE captured_at >= $1 AND captured_at < $1 + INTERVAL '1 day'
|
WHERE captured_at >= $1 AND captured_at < $1 + INTERVAL '1 day'
|
||||||
AND brand_name IS NOT NULL
|
AND brand_name_raw IS NOT NULL
|
||||||
AND is_in_stock = TRUE
|
AND is_in_stock = TRUE
|
||||||
GROUP BY brand_name
|
GROUP BY brand_name_raw
|
||||||
),
|
),
|
||||||
end_counts AS (
|
end_counts AS (
|
||||||
SELECT
|
SELECT
|
||||||
brand_name,
|
brand_name_raw AS brand_name,
|
||||||
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
||||||
FROM store_product_snapshots
|
FROM store_product_snapshots
|
||||||
WHERE captured_at >= $2 - INTERVAL '1 day' AND captured_at <= $2
|
WHERE captured_at >= $2 - INTERVAL '1 day' AND captured_at <= $2
|
||||||
AND brand_name IS NOT NULL
|
AND brand_name_raw IS NOT NULL
|
||||||
AND is_in_stock = TRUE
|
AND is_in_stock = TRUE
|
||||||
GROUP BY brand_name
|
GROUP BY brand_name_raw
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
COALESCE(sc.brand_name, ec.brand_name) AS brand_name,
|
COALESCE(sc.brand_name, ec.brand_name) AS brand_name,
|
||||||
@@ -401,6 +411,225 @@ export class BrandPenetrationService {
|
|||||||
change_percent: row.change_percent ? parseFloat(row.change_percent) : 0,
|
change_percent: row.change_percent ? parseFloat(row.change_percent) : 0,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get brand promotional history
|
||||||
|
*
|
||||||
|
* Tracks when products went on special, how long, what discount,
|
||||||
|
* and estimated quantity sold during the promotion.
|
||||||
|
*/
|
||||||
|
async getBrandPromotionalHistory(
|
||||||
|
brandName: string,
|
||||||
|
options: { window?: TimeWindow; customRange?: DateRange; stateCode?: string; category?: string } = {}
|
||||||
|
): Promise<BrandPromotionalSummary> {
|
||||||
|
const { window = '90d', customRange, stateCode, category } = options;
|
||||||
|
const { start, end } = getDateRangeFromWindow(window, customRange);
|
||||||
|
|
||||||
|
// Build filters
|
||||||
|
const params: any[] = [brandName, start, end];
|
||||||
|
let paramIdx = 4;
|
||||||
|
let filters = '';
|
||||||
|
|
||||||
|
if (stateCode) {
|
||||||
|
filters += ` AND s.code = $${paramIdx}`;
|
||||||
|
params.push(stateCode);
|
||||||
|
paramIdx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (category) {
|
||||||
|
filters += ` AND sp.category_raw = $${paramIdx}`;
|
||||||
|
params.push(category);
|
||||||
|
paramIdx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find promotional events by detecting when is_on_special transitions to TRUE
|
||||||
|
// and tracking until it transitions back to FALSE
|
||||||
|
const eventsResult = await this.pool.query(`
|
||||||
|
WITH snapshot_with_lag AS (
|
||||||
|
SELECT
|
||||||
|
sps.id,
|
||||||
|
sps.store_product_id,
|
||||||
|
sps.dispensary_id,
|
||||||
|
sps.brand_name_raw,
|
||||||
|
sps.name_raw,
|
||||||
|
sps.category_raw,
|
||||||
|
sps.is_on_special,
|
||||||
|
sps.price_rec,
|
||||||
|
sps.price_rec_special,
|
||||||
|
sps.stock_quantity,
|
||||||
|
sps.captured_at,
|
||||||
|
LAG(sps.is_on_special) OVER (
|
||||||
|
PARTITION BY sps.store_product_id
|
||||||
|
ORDER BY sps.captured_at
|
||||||
|
) AS prev_is_on_special,
|
||||||
|
LAG(sps.stock_quantity) OVER (
|
||||||
|
PARTITION BY sps.store_product_id
|
||||||
|
ORDER BY sps.captured_at
|
||||||
|
) AS prev_stock_quantity
|
||||||
|
FROM store_product_snapshots sps
|
||||||
|
JOIN store_products sp ON sp.id = sps.store_product_id
|
||||||
|
JOIN dispensaries dd ON dd.id = sp.dispensary_id
|
||||||
|
LEFT JOIN states s ON s.id = dd.state_id
|
||||||
|
WHERE sps.brand_name_raw = $1
|
||||||
|
AND sps.captured_at >= $2
|
||||||
|
AND sps.captured_at <= $3
|
||||||
|
${filters}
|
||||||
|
),
|
||||||
|
special_starts AS (
|
||||||
|
-- Find when specials START (transition from not-on-special to on-special)
|
||||||
|
SELECT
|
||||||
|
store_product_id,
|
||||||
|
dispensary_id,
|
||||||
|
name_raw,
|
||||||
|
category_raw,
|
||||||
|
captured_at AS special_start,
|
||||||
|
price_rec AS regular_price,
|
||||||
|
price_rec_special AS special_price,
|
||||||
|
stock_quantity AS quantity_at_start
|
||||||
|
FROM snapshot_with_lag
|
||||||
|
WHERE is_on_special = TRUE
|
||||||
|
AND (prev_is_on_special = FALSE OR prev_is_on_special IS NULL)
|
||||||
|
AND price_rec_special IS NOT NULL
|
||||||
|
AND price_rec IS NOT NULL
|
||||||
|
),
|
||||||
|
special_ends AS (
|
||||||
|
-- Find when specials END (transition from on-special to not-on-special)
|
||||||
|
SELECT
|
||||||
|
store_product_id,
|
||||||
|
captured_at AS special_end,
|
||||||
|
prev_stock_quantity AS quantity_at_end
|
||||||
|
FROM snapshot_with_lag
|
||||||
|
WHERE is_on_special = FALSE
|
||||||
|
AND prev_is_on_special = TRUE
|
||||||
|
),
|
||||||
|
matched_events AS (
|
||||||
|
SELECT
|
||||||
|
ss.store_product_id,
|
||||||
|
ss.dispensary_id,
|
||||||
|
ss.name_raw AS product_name,
|
||||||
|
ss.category_raw AS category,
|
||||||
|
ss.special_start,
|
||||||
|
se.special_end,
|
||||||
|
ss.regular_price,
|
||||||
|
ss.special_price,
|
||||||
|
ss.quantity_at_start,
|
||||||
|
COALESCE(se.quantity_at_end, ss.quantity_at_start) AS quantity_at_end
|
||||||
|
FROM special_starts ss
|
||||||
|
LEFT JOIN special_ends se ON se.store_product_id = ss.store_product_id
|
||||||
|
AND se.special_end > ss.special_start
|
||||||
|
AND se.special_end = (
|
||||||
|
SELECT MIN(se2.special_end)
|
||||||
|
FROM special_ends se2
|
||||||
|
WHERE se2.store_product_id = ss.store_product_id
|
||||||
|
AND se2.special_end > ss.special_start
|
||||||
|
)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
me.store_product_id,
|
||||||
|
me.dispensary_id,
|
||||||
|
d.name AS dispensary_name,
|
||||||
|
s.code AS state_code,
|
||||||
|
me.product_name,
|
||||||
|
me.category,
|
||||||
|
me.special_start,
|
||||||
|
me.special_end,
|
||||||
|
EXTRACT(DAY FROM COALESCE(me.special_end, NOW()) - me.special_start)::INT AS duration_days,
|
||||||
|
me.regular_price,
|
||||||
|
me.special_price,
|
||||||
|
ROUND(((me.regular_price - me.special_price) / NULLIF(me.regular_price, 0)) * 100, 1) AS discount_percent,
|
||||||
|
me.quantity_at_start,
|
||||||
|
me.quantity_at_end,
|
||||||
|
GREATEST(0, COALESCE(me.quantity_at_start, 0) - COALESCE(me.quantity_at_end, 0)) AS quantity_sold_estimate
|
||||||
|
FROM matched_events me
|
||||||
|
JOIN dispensaries d ON d.id = me.dispensary_id
|
||||||
|
LEFT JOIN states s ON s.id = d.state_id
|
||||||
|
ORDER BY me.special_start DESC
|
||||||
|
`, params);
|
||||||
|
|
||||||
|
const events: BrandPromotionalEvent[] = eventsResult.rows.map((row: any) => ({
|
||||||
|
product_name: row.product_name,
|
||||||
|
store_product_id: parseInt(row.store_product_id),
|
||||||
|
dispensary_id: parseInt(row.dispensary_id),
|
||||||
|
dispensary_name: row.dispensary_name,
|
||||||
|
state_code: row.state_code || 'Unknown',
|
||||||
|
category: row.category,
|
||||||
|
special_start: row.special_start.toISOString().split('T')[0],
|
||||||
|
special_end: row.special_end ? row.special_end.toISOString().split('T')[0] : null,
|
||||||
|
duration_days: row.duration_days ? parseInt(row.duration_days) : null,
|
||||||
|
regular_price: parseFloat(row.regular_price) || 0,
|
||||||
|
special_price: parseFloat(row.special_price) || 0,
|
||||||
|
discount_percent: parseFloat(row.discount_percent) || 0,
|
||||||
|
quantity_at_start: row.quantity_at_start ? parseInt(row.quantity_at_start) : null,
|
||||||
|
quantity_at_end: row.quantity_at_end ? parseInt(row.quantity_at_end) : null,
|
||||||
|
quantity_sold_estimate: row.quantity_sold_estimate ? parseInt(row.quantity_sold_estimate) : null,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Calculate summary stats
|
||||||
|
const totalEvents = events.length;
|
||||||
|
const uniqueProducts = new Set(events.map(e => e.store_product_id)).size;
|
||||||
|
const uniqueDispensaries = new Set(events.map(e => e.dispensary_id)).size;
|
||||||
|
const uniqueStates = [...new Set(events.map(e => e.state_code))];
|
||||||
|
|
||||||
|
const avgDiscount = totalEvents > 0
|
||||||
|
? events.reduce((sum, e) => sum + e.discount_percent, 0) / totalEvents
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
const durations = events.filter(e => e.duration_days !== null).map(e => e.duration_days!);
|
||||||
|
const avgDuration = durations.length > 0
|
||||||
|
? durations.reduce((sum, d) => sum + d, 0) / durations.length
|
||||||
|
: null;
|
||||||
|
|
||||||
|
const totalQuantitySold = events
|
||||||
|
.filter(e => e.quantity_sold_estimate !== null)
|
||||||
|
.reduce((sum, e) => sum + (e.quantity_sold_estimate || 0), 0);
|
||||||
|
|
||||||
|
// Calculate frequency
|
||||||
|
const windowDays = Math.ceil((end.getTime() - start.getTime()) / (1000 * 60 * 60 * 24));
|
||||||
|
const weeklyAvg = windowDays > 0 ? (totalEvents / windowDays) * 7 : 0;
|
||||||
|
const monthlyAvg = windowDays > 0 ? (totalEvents / windowDays) * 30 : 0;
|
||||||
|
|
||||||
|
// Group by category
|
||||||
|
const categoryMap = new Map<string, { count: number; discounts: number[]; quantity: number }>();
|
||||||
|
for (const event of events) {
|
||||||
|
const cat = event.category || 'Uncategorized';
|
||||||
|
if (!categoryMap.has(cat)) {
|
||||||
|
categoryMap.set(cat, { count: 0, discounts: [], quantity: 0 });
|
||||||
|
}
|
||||||
|
const entry = categoryMap.get(cat)!;
|
||||||
|
entry.count++;
|
||||||
|
entry.discounts.push(event.discount_percent);
|
||||||
|
if (event.quantity_sold_estimate !== null) {
|
||||||
|
entry.quantity += event.quantity_sold_estimate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const byCategory = Array.from(categoryMap.entries()).map(([category, data]) => ({
|
||||||
|
category,
|
||||||
|
event_count: data.count,
|
||||||
|
avg_discount_percent: data.discounts.length > 0
|
||||||
|
? Math.round((data.discounts.reduce((a, b) => a + b, 0) / data.discounts.length) * 10) / 10
|
||||||
|
: 0,
|
||||||
|
quantity_sold_estimate: data.quantity > 0 ? data.quantity : null,
|
||||||
|
})).sort((a, b) => b.event_count - a.event_count);
|
||||||
|
|
||||||
|
return {
|
||||||
|
brand_name: brandName,
|
||||||
|
window,
|
||||||
|
total_promotional_events: totalEvents,
|
||||||
|
total_products_on_special: uniqueProducts,
|
||||||
|
total_dispensaries_with_specials: uniqueDispensaries,
|
||||||
|
states_with_specials: uniqueStates,
|
||||||
|
avg_discount_percent: Math.round(avgDiscount * 10) / 10,
|
||||||
|
avg_duration_days: avgDuration !== null ? Math.round(avgDuration * 10) / 10 : null,
|
||||||
|
total_quantity_sold_estimate: totalQuantitySold > 0 ? totalQuantitySold : null,
|
||||||
|
promotional_frequency: {
|
||||||
|
weekly_avg: Math.round(weeklyAvg * 10) / 10,
|
||||||
|
monthly_avg: Math.round(monthlyAvg * 10) / 10,
|
||||||
|
},
|
||||||
|
by_category: byCategory,
|
||||||
|
events,
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export default BrandPenetrationService;
|
export default BrandPenetrationService;
|
||||||
|
|||||||
@@ -259,6 +259,122 @@ export class StoreAnalyticsService {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get quantity changes for a store (increases/decreases)
|
||||||
|
* Useful for estimating sales (decreases) or restocks (increases)
|
||||||
|
*
|
||||||
|
* @param direction - 'decrease' for likely sales, 'increase' for restocks, 'all' for both
|
||||||
|
*/
|
||||||
|
async getQuantityChanges(
|
||||||
|
dispensaryId: number,
|
||||||
|
options: {
|
||||||
|
window?: TimeWindow;
|
||||||
|
customRange?: DateRange;
|
||||||
|
direction?: 'increase' | 'decrease' | 'all';
|
||||||
|
limit?: number;
|
||||||
|
} = {}
|
||||||
|
): Promise<{
|
||||||
|
dispensary_id: number;
|
||||||
|
window: TimeWindow;
|
||||||
|
direction: string;
|
||||||
|
total_changes: number;
|
||||||
|
total_units_decreased: number;
|
||||||
|
total_units_increased: number;
|
||||||
|
changes: Array<{
|
||||||
|
store_product_id: number;
|
||||||
|
product_name: string;
|
||||||
|
brand_name: string | null;
|
||||||
|
category: string | null;
|
||||||
|
old_quantity: number;
|
||||||
|
new_quantity: number;
|
||||||
|
quantity_delta: number;
|
||||||
|
direction: 'increase' | 'decrease';
|
||||||
|
captured_at: string;
|
||||||
|
}>;
|
||||||
|
}> {
|
||||||
|
const { window = '7d', customRange, direction = 'all', limit = 100 } = options;
|
||||||
|
const { start, end } = getDateRangeFromWindow(window, customRange);
|
||||||
|
|
||||||
|
// Build direction filter
|
||||||
|
let directionFilter = '';
|
||||||
|
if (direction === 'decrease') {
|
||||||
|
directionFilter = 'AND qty_delta < 0';
|
||||||
|
} else if (direction === 'increase') {
|
||||||
|
directionFilter = 'AND qty_delta > 0';
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await this.pool.query(`
|
||||||
|
WITH qty_changes AS (
|
||||||
|
SELECT
|
||||||
|
sps.store_product_id,
|
||||||
|
sp.name_raw AS product_name,
|
||||||
|
sp.brand_name_raw AS brand_name,
|
||||||
|
sp.category_raw AS category,
|
||||||
|
LAG(sps.stock_quantity) OVER w AS old_quantity,
|
||||||
|
sps.stock_quantity AS new_quantity,
|
||||||
|
sps.stock_quantity - LAG(sps.stock_quantity) OVER w AS qty_delta,
|
||||||
|
sps.captured_at
|
||||||
|
FROM store_product_snapshots sps
|
||||||
|
JOIN store_products sp ON sp.id = sps.store_product_id
|
||||||
|
WHERE sps.dispensary_id = $1
|
||||||
|
AND sps.captured_at >= $2
|
||||||
|
AND sps.captured_at <= $3
|
||||||
|
AND sps.stock_quantity IS NOT NULL
|
||||||
|
WINDOW w AS (PARTITION BY sps.store_product_id ORDER BY sps.captured_at)
|
||||||
|
)
|
||||||
|
SELECT *
|
||||||
|
FROM qty_changes
|
||||||
|
WHERE old_quantity IS NOT NULL
|
||||||
|
AND qty_delta != 0
|
||||||
|
${directionFilter}
|
||||||
|
ORDER BY captured_at DESC
|
||||||
|
LIMIT $4
|
||||||
|
`, [dispensaryId, start, end, limit]);
|
||||||
|
|
||||||
|
// Calculate totals
|
||||||
|
const totalsResult = await this.pool.query(`
|
||||||
|
WITH qty_changes AS (
|
||||||
|
SELECT
|
||||||
|
sps.stock_quantity - LAG(sps.stock_quantity) OVER w AS qty_delta
|
||||||
|
FROM store_product_snapshots sps
|
||||||
|
WHERE sps.dispensary_id = $1
|
||||||
|
AND sps.captured_at >= $2
|
||||||
|
AND sps.captured_at <= $3
|
||||||
|
AND sps.stock_quantity IS NOT NULL
|
||||||
|
AND sps.store_product_id IS NOT NULL
|
||||||
|
WINDOW w AS (PARTITION BY sps.store_product_id ORDER BY sps.captured_at)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE qty_delta != 0) AS total_changes,
|
||||||
|
COALESCE(SUM(ABS(qty_delta)) FILTER (WHERE qty_delta < 0), 0) AS units_decreased,
|
||||||
|
COALESCE(SUM(qty_delta) FILTER (WHERE qty_delta > 0), 0) AS units_increased
|
||||||
|
FROM qty_changes
|
||||||
|
WHERE qty_delta IS NOT NULL
|
||||||
|
`, [dispensaryId, start, end]);
|
||||||
|
|
||||||
|
const totals = totalsResult.rows[0] || {};
|
||||||
|
|
||||||
|
return {
|
||||||
|
dispensary_id: dispensaryId,
|
||||||
|
window,
|
||||||
|
direction,
|
||||||
|
total_changes: parseInt(totals.total_changes) || 0,
|
||||||
|
total_units_decreased: parseInt(totals.units_decreased) || 0,
|
||||||
|
total_units_increased: parseInt(totals.units_increased) || 0,
|
||||||
|
changes: result.rows.map((row: any) => ({
|
||||||
|
store_product_id: row.store_product_id,
|
||||||
|
product_name: row.product_name,
|
||||||
|
brand_name: row.brand_name,
|
||||||
|
category: row.category,
|
||||||
|
old_quantity: row.old_quantity,
|
||||||
|
new_quantity: row.new_quantity,
|
||||||
|
quantity_delta: row.qty_delta,
|
||||||
|
direction: row.qty_delta > 0 ? 'increase' : 'decrease',
|
||||||
|
captured_at: row.captured_at?.toISOString() || null,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get store inventory composition (categories and brands breakdown)
|
* Get store inventory composition (categories and brands breakdown)
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -322,3 +322,48 @@ export interface RecVsMedPriceComparison {
|
|||||||
};
|
};
|
||||||
price_diff_percent: number | null;
|
price_diff_percent: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// BRAND PROMOTIONAL ANALYTICS TYPES
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
export interface BrandPromotionalEvent {
|
||||||
|
product_name: string;
|
||||||
|
store_product_id: number;
|
||||||
|
dispensary_id: number;
|
||||||
|
dispensary_name: string;
|
||||||
|
state_code: string;
|
||||||
|
category: string | null;
|
||||||
|
special_start: string; // ISO date when special started
|
||||||
|
special_end: string | null; // ISO date when special ended (null if ongoing)
|
||||||
|
duration_days: number | null;
|
||||||
|
regular_price: number;
|
||||||
|
special_price: number;
|
||||||
|
discount_percent: number;
|
||||||
|
quantity_at_start: number | null;
|
||||||
|
quantity_at_end: number | null;
|
||||||
|
quantity_sold_estimate: number | null; // quantity_at_start - quantity_at_end
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface BrandPromotionalSummary {
|
||||||
|
brand_name: string;
|
||||||
|
window: TimeWindow;
|
||||||
|
total_promotional_events: number;
|
||||||
|
total_products_on_special: number;
|
||||||
|
total_dispensaries_with_specials: number;
|
||||||
|
states_with_specials: string[];
|
||||||
|
avg_discount_percent: number;
|
||||||
|
avg_duration_days: number | null;
|
||||||
|
total_quantity_sold_estimate: number | null;
|
||||||
|
promotional_frequency: {
|
||||||
|
weekly_avg: number;
|
||||||
|
monthly_avg: number;
|
||||||
|
};
|
||||||
|
by_category: Array<{
|
||||||
|
category: string;
|
||||||
|
event_count: number;
|
||||||
|
avg_discount_percent: number;
|
||||||
|
quantity_sold_estimate: number | null;
|
||||||
|
}>;
|
||||||
|
events: BrandPromotionalEvent[];
|
||||||
|
}
|
||||||
|
|||||||
@@ -61,6 +61,13 @@ export interface Proxy {
|
|||||||
failureCount: number;
|
failureCount: number;
|
||||||
successCount: number;
|
successCount: number;
|
||||||
avgResponseTimeMs: number | null;
|
avgResponseTimeMs: number | null;
|
||||||
|
maxConnections: number; // Number of concurrent connections allowed (for rotating proxies)
|
||||||
|
// Location info (if known)
|
||||||
|
city?: string;
|
||||||
|
state?: string;
|
||||||
|
country?: string;
|
||||||
|
countryCode?: string;
|
||||||
|
timezone?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ProxyStats {
|
export interface ProxyStats {
|
||||||
@@ -109,18 +116,27 @@ export class ProxyRotator {
|
|||||||
username,
|
username,
|
||||||
password,
|
password,
|
||||||
protocol,
|
protocol,
|
||||||
is_active as "isActive",
|
active as "isActive",
|
||||||
last_used_at as "lastUsedAt",
|
last_tested_at as "lastUsedAt",
|
||||||
failure_count as "failureCount",
|
failure_count as "failureCount",
|
||||||
success_count as "successCount",
|
0 as "successCount",
|
||||||
avg_response_time_ms as "avgResponseTimeMs"
|
response_time_ms as "avgResponseTimeMs",
|
||||||
|
COALESCE(max_connections, 1) as "maxConnections",
|
||||||
|
city,
|
||||||
|
state,
|
||||||
|
country,
|
||||||
|
country_code as "countryCode",
|
||||||
|
timezone
|
||||||
FROM proxies
|
FROM proxies
|
||||||
WHERE is_active = true
|
WHERE active = true
|
||||||
ORDER BY failure_count ASC, last_used_at ASC NULLS FIRST
|
ORDER BY failure_count ASC, last_tested_at ASC NULLS FIRST
|
||||||
`);
|
`);
|
||||||
|
|
||||||
this.proxies = result.rows;
|
this.proxies = result.rows;
|
||||||
console.log(`[ProxyRotator] Loaded ${this.proxies.length} active proxies`);
|
|
||||||
|
// Calculate total concurrent capacity
|
||||||
|
const totalCapacity = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
|
||||||
|
console.log(`[ProxyRotator] Loaded ${this.proxies.length} active proxies (${totalCapacity} max concurrent connections)`);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Table might not exist - that's okay
|
// Table might not exist - that's okay
|
||||||
console.warn(`[ProxyRotator] Could not load proxies: ${error}`);
|
console.warn(`[ProxyRotator] Could not load proxies: ${error}`);
|
||||||
@@ -192,11 +208,11 @@ export class ProxyRotator {
|
|||||||
UPDATE proxies
|
UPDATE proxies
|
||||||
SET
|
SET
|
||||||
failure_count = failure_count + 1,
|
failure_count = failure_count + 1,
|
||||||
last_failure_at = NOW(),
|
updated_at = NOW(),
|
||||||
last_error = $2,
|
test_result = $2,
|
||||||
is_active = CASE WHEN failure_count >= 4 THEN false ELSE is_active END
|
active = CASE WHEN failure_count >= 4 THEN false ELSE active END
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, [proxyId, error || null]);
|
`, [proxyId, error || 'failed']);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(`[ProxyRotator] Failed to update proxy ${proxyId}:`, err);
|
console.error(`[ProxyRotator] Failed to update proxy ${proxyId}:`, err);
|
||||||
}
|
}
|
||||||
@@ -226,12 +242,13 @@ export class ProxyRotator {
|
|||||||
await this.pool.query(`
|
await this.pool.query(`
|
||||||
UPDATE proxies
|
UPDATE proxies
|
||||||
SET
|
SET
|
||||||
success_count = success_count + 1,
|
last_tested_at = NOW(),
|
||||||
last_used_at = NOW(),
|
test_result = 'success',
|
||||||
avg_response_time_ms = CASE
|
response_time_ms = CASE
|
||||||
WHEN avg_response_time_ms IS NULL THEN $2
|
WHEN response_time_ms IS NULL THEN $2
|
||||||
ELSE (avg_response_time_ms * 0.8) + ($2 * 0.2)
|
ELSE (response_time_ms * 0.8 + $2 * 0.2)::integer
|
||||||
END
|
END,
|
||||||
|
updated_at = NOW()
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, [proxyId, responseTimeMs || null]);
|
`, [proxyId, responseTimeMs || null]);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -255,7 +272,7 @@ export class ProxyRotator {
|
|||||||
*/
|
*/
|
||||||
getStats(): ProxyStats {
|
getStats(): ProxyStats {
|
||||||
const totalProxies = this.proxies.length;
|
const totalProxies = this.proxies.length;
|
||||||
const activeProxies = this.proxies.filter(p => p.isActive).length;
|
const activeProxies = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0); // Total concurrent capacity
|
||||||
const blockedProxies = this.proxies.filter(p => p.failureCount >= 5).length;
|
const blockedProxies = this.proxies.filter(p => p.failureCount >= 5).length;
|
||||||
|
|
||||||
const successRates = this.proxies
|
const successRates = this.proxies
|
||||||
@@ -268,7 +285,7 @@ export class ProxyRotator {
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
totalProxies,
|
totalProxies,
|
||||||
activeProxies,
|
activeProxies, // Total concurrent capacity across all proxies
|
||||||
blockedProxies,
|
blockedProxies,
|
||||||
avgSuccessRate,
|
avgSuccessRate,
|
||||||
};
|
};
|
||||||
@@ -402,6 +419,26 @@ export class CrawlRotator {
|
|||||||
await this.proxy.markFailed(current.id, error);
|
await this.proxy.markFailed(current.id, error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get current proxy location info (for reporting)
|
||||||
|
* Note: For rotating proxies (like IPRoyal), the actual exit location varies per request
|
||||||
|
*/
|
||||||
|
getProxyLocation(): { city?: string; state?: string; country?: string; timezone?: string; isRotating: boolean } | null {
|
||||||
|
const current = this.proxy.getCurrent();
|
||||||
|
if (!current) return null;
|
||||||
|
|
||||||
|
// Check if this is a rotating proxy (max_connections > 1 usually indicates rotating)
|
||||||
|
const isRotating = current.maxConnections > 1;
|
||||||
|
|
||||||
|
return {
|
||||||
|
city: current.city,
|
||||||
|
state: current.state,
|
||||||
|
country: current.country,
|
||||||
|
timezone: current.timezone,
|
||||||
|
isRotating
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
134
backend/src/services/ip2location.ts
Normal file
134
backend/src/services/ip2location.ts
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
/**
|
||||||
|
* IP2Location Service
|
||||||
|
*
|
||||||
|
* Uses local IP2Location LITE DB3 database for IP geolocation.
|
||||||
|
* No external API calls, no rate limits.
|
||||||
|
*
|
||||||
|
* Database: IP2Location LITE DB3 (free, monthly updates)
|
||||||
|
* Fields: country, region, city, latitude, longitude
|
||||||
|
*/
|
||||||
|
|
||||||
|
import path from 'path';
|
||||||
|
import fs from 'fs';
|
||||||
|
|
||||||
|
// @ts-ignore - no types for ip2location-nodejs
|
||||||
|
const { IP2Location } = require('ip2location-nodejs');
|
||||||
|
|
||||||
|
const DB_PATH = process.env.IP2LOCATION_DB_PATH ||
|
||||||
|
path.join(__dirname, '../../data/ip2location/IP2LOCATION-LITE-DB5.BIN');
|
||||||
|
|
||||||
|
let ip2location: any = null;
|
||||||
|
let dbLoaded = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize IP2Location database
|
||||||
|
*/
|
||||||
|
export function initIP2Location(): boolean {
|
||||||
|
if (dbLoaded) return true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!fs.existsSync(DB_PATH)) {
|
||||||
|
console.warn(`IP2Location database not found at: ${DB_PATH}`);
|
||||||
|
console.warn('Run: ./scripts/download-ip2location.sh to download');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ip2location = new IP2Location();
|
||||||
|
ip2location.open(DB_PATH);
|
||||||
|
dbLoaded = true;
|
||||||
|
console.log('IP2Location database loaded successfully');
|
||||||
|
return true;
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Failed to load IP2Location database:', err);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close IP2Location database
|
||||||
|
*/
|
||||||
|
export function closeIP2Location(): void {
|
||||||
|
if (ip2location) {
|
||||||
|
ip2location.close();
|
||||||
|
ip2location = null;
|
||||||
|
dbLoaded = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface GeoLocation {
|
||||||
|
city: string | null;
|
||||||
|
state: string | null;
|
||||||
|
stateCode: string | null;
|
||||||
|
country: string | null;
|
||||||
|
countryCode: string | null;
|
||||||
|
lat: number | null;
|
||||||
|
lng: number | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lookup IP address location
|
||||||
|
*
|
||||||
|
* @param ip - IPv4 or IPv6 address
|
||||||
|
* @returns Location data or null if not found
|
||||||
|
*/
|
||||||
|
export function lookupIP(ip: string): GeoLocation | null {
|
||||||
|
// Skip private/localhost IPs
|
||||||
|
if (!ip || ip === '127.0.0.1' || ip === '::1' ||
|
||||||
|
ip.startsWith('192.168.') || ip.startsWith('10.') ||
|
||||||
|
ip.startsWith('172.16.') || ip.startsWith('172.17.') ||
|
||||||
|
ip.startsWith('::ffff:127.') || ip.startsWith('::ffff:192.168.') ||
|
||||||
|
ip.startsWith('::ffff:10.')) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip IPv6 prefix if present
|
||||||
|
const cleanIP = ip.replace(/^::ffff:/, '');
|
||||||
|
|
||||||
|
// Initialize on first use if not already loaded
|
||||||
|
if (!dbLoaded) {
|
||||||
|
if (!initIP2Location()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = ip2location.getAll(cleanIP);
|
||||||
|
|
||||||
|
if (!result || result.ip === '?' || result.countryShort === '-') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// DB3 LITE doesn't include lat/lng - would need DB5+ for that
|
||||||
|
const lat = typeof result.latitude === 'number' && result.latitude !== 0 ? result.latitude : null;
|
||||||
|
const lng = typeof result.longitude === 'number' && result.longitude !== 0 ? result.longitude : null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
city: result.city !== '-' ? result.city : null,
|
||||||
|
state: result.region !== '-' ? result.region : null,
|
||||||
|
stateCode: null, // DB3 doesn't include state codes
|
||||||
|
country: result.countryLong !== '-' ? result.countryLong : null,
|
||||||
|
countryCode: result.countryShort !== '-' ? result.countryShort : null,
|
||||||
|
lat,
|
||||||
|
lng,
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
console.error('IP2Location lookup error:', err);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if IP2Location database is available
|
||||||
|
*/
|
||||||
|
export function isIP2LocationAvailable(): boolean {
|
||||||
|
if (dbLoaded) return true;
|
||||||
|
return fs.existsSync(DB_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export singleton-style interface
|
||||||
|
export default {
|
||||||
|
init: initIP2Location,
|
||||||
|
close: closeIP2Location,
|
||||||
|
lookup: lookupIP,
|
||||||
|
isAvailable: isIP2LocationAvailable,
|
||||||
|
};
|
||||||
@@ -276,7 +276,6 @@ export async function addProxiesFromList(proxies: Array<{
|
|||||||
await pool.query(`
|
await pool.query(`
|
||||||
INSERT INTO proxies (host, port, protocol, username, password, active)
|
INSERT INTO proxies (host, port, protocol, username, password, active)
|
||||||
VALUES ($1, $2, $3, $4, $5, false)
|
VALUES ($1, $2, $3, $4, $5, false)
|
||||||
ON CONFLICT (host, port, protocol) DO NOTHING
|
|
||||||
`, [
|
`, [
|
||||||
proxy.host,
|
proxy.host,
|
||||||
proxy.port,
|
proxy.port,
|
||||||
@@ -285,27 +284,9 @@ export async function addProxiesFromList(proxies: Array<{
|
|||||||
proxy.password
|
proxy.password
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// Check if it was actually inserted
|
added++;
|
||||||
const result = await pool.query(`
|
if (added % 100 === 0) {
|
||||||
SELECT id FROM proxies
|
console.log(`📥 Imported ${added} proxies...`);
|
||||||
WHERE host = $1 AND port = $2 AND protocol = $3
|
|
||||||
`, [proxy.host, proxy.port, proxy.protocol]);
|
|
||||||
|
|
||||||
if (result.rows.length > 0) {
|
|
||||||
// Check if it was just inserted (no last_tested_at means new)
|
|
||||||
const checkResult = await pool.query(`
|
|
||||||
SELECT last_tested_at FROM proxies
|
|
||||||
WHERE host = $1 AND port = $2 AND protocol = $3
|
|
||||||
`, [proxy.host, proxy.port, proxy.protocol]);
|
|
||||||
|
|
||||||
if (checkResult.rows[0].last_tested_at === null) {
|
|
||||||
added++;
|
|
||||||
if (added % 100 === 0) {
|
|
||||||
console.log(`📥 Imported ${added} proxies...`);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
duplicates++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
failed++;
|
failed++;
|
||||||
|
|||||||
@@ -8,8 +8,12 @@ interface ProxyTestJob {
|
|||||||
tested_proxies: number;
|
tested_proxies: number;
|
||||||
passed_proxies: number;
|
passed_proxies: number;
|
||||||
failed_proxies: number;
|
failed_proxies: number;
|
||||||
|
mode?: string; // 'all' | 'failed' | 'inactive'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Concurrency settings
|
||||||
|
const DEFAULT_CONCURRENCY = 10; // Test 10 proxies at a time
|
||||||
|
|
||||||
// Simple in-memory queue - could be replaced with Bull/Bee-Queue for production
|
// Simple in-memory queue - could be replaced with Bull/Bee-Queue for production
|
||||||
const activeJobs = new Map<number, { cancelled: boolean }>();
|
const activeJobs = new Map<number, { cancelled: boolean }>();
|
||||||
|
|
||||||
@@ -33,18 +37,40 @@ export async function cleanupOrphanedJobs(): Promise<void> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function createProxyTestJob(): Promise<number> {
|
export type ProxyTestMode = 'all' | 'failed' | 'inactive';
|
||||||
|
|
||||||
|
export interface CreateJobResult {
|
||||||
|
jobId: number;
|
||||||
|
totalProxies: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function createProxyTestJob(mode: ProxyTestMode = 'all', concurrency: number = DEFAULT_CONCURRENCY): Promise<CreateJobResult> {
|
||||||
// Check for existing running jobs first
|
// Check for existing running jobs first
|
||||||
const existingJob = await getActiveProxyTestJob();
|
const existingJob = await getActiveProxyTestJob();
|
||||||
if (existingJob) {
|
if (existingJob) {
|
||||||
throw new Error('A proxy test job is already running. Please cancel it first.');
|
throw new Error('A proxy test job is already running. Please cancel it first.');
|
||||||
}
|
}
|
||||||
const result = await pool.query(`
|
|
||||||
SELECT COUNT(*) as count FROM proxies
|
|
||||||
`);
|
|
||||||
|
|
||||||
|
// Get count based on mode
|
||||||
|
let countQuery: string;
|
||||||
|
switch (mode) {
|
||||||
|
case 'failed':
|
||||||
|
countQuery = `SELECT COUNT(*) as count FROM proxies WHERE test_result = 'failed' OR active = false`;
|
||||||
|
break;
|
||||||
|
case 'inactive':
|
||||||
|
countQuery = `SELECT COUNT(*) as count FROM proxies WHERE active = false`;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
countQuery = `SELECT COUNT(*) as count FROM proxies`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await pool.query(countQuery);
|
||||||
const totalProxies = parseInt(result.rows[0].count);
|
const totalProxies = parseInt(result.rows[0].count);
|
||||||
|
|
||||||
|
if (totalProxies === 0) {
|
||||||
|
throw new Error(`No proxies to test with mode '${mode}'`);
|
||||||
|
}
|
||||||
|
|
||||||
const jobResult = await pool.query(`
|
const jobResult = await pool.query(`
|
||||||
INSERT INTO proxy_test_jobs (status, total_proxies)
|
INSERT INTO proxy_test_jobs (status, total_proxies)
|
||||||
VALUES ('pending', $1)
|
VALUES ('pending', $1)
|
||||||
@@ -53,12 +79,12 @@ export async function createProxyTestJob(): Promise<number> {
|
|||||||
|
|
||||||
const jobId = jobResult.rows[0].id;
|
const jobId = jobResult.rows[0].id;
|
||||||
|
|
||||||
// Start job in background
|
// Start job in background with mode and concurrency
|
||||||
runProxyTestJob(jobId).catch(err => {
|
runProxyTestJob(jobId, mode, concurrency).catch(err => {
|
||||||
console.error(`❌ Proxy test job ${jobId} failed:`, err);
|
console.error(`❌ Proxy test job ${jobId} failed:`, err);
|
||||||
});
|
});
|
||||||
|
|
||||||
return jobId;
|
return { jobId, totalProxies };
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getProxyTestJob(jobId: number): Promise<ProxyTestJob | null> {
|
export async function getProxyTestJob(jobId: number): Promise<ProxyTestJob | null> {
|
||||||
@@ -111,7 +137,7 @@ export async function cancelProxyTestJob(jobId: number): Promise<boolean> {
|
|||||||
return result.rows.length > 0;
|
return result.rows.length > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runProxyTestJob(jobId: number): Promise<void> {
|
async function runProxyTestJob(jobId: number, mode: ProxyTestMode = 'all', concurrency: number = DEFAULT_CONCURRENCY): Promise<void> {
|
||||||
// Register job as active
|
// Register job as active
|
||||||
activeJobs.set(jobId, { cancelled: false });
|
activeJobs.set(jobId, { cancelled: false });
|
||||||
|
|
||||||
@@ -125,20 +151,30 @@ async function runProxyTestJob(jobId: number): Promise<void> {
|
|||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, [jobId]);
|
`, [jobId]);
|
||||||
|
|
||||||
console.log(`🔍 Starting proxy test job ${jobId}...`);
|
console.log(`🔍 Starting proxy test job ${jobId} (mode: ${mode}, concurrency: ${concurrency})...`);
|
||||||
|
|
||||||
// Get all proxies
|
// Get proxies based on mode
|
||||||
const result = await pool.query(`
|
let query: string;
|
||||||
SELECT id, host, port, protocol, username, password
|
switch (mode) {
|
||||||
FROM proxies
|
case 'failed':
|
||||||
ORDER BY id
|
query = `SELECT id, host, port, protocol, username, password FROM proxies WHERE test_result = 'failed' OR active = false ORDER BY id`;
|
||||||
`);
|
break;
|
||||||
|
case 'inactive':
|
||||||
|
query = `SELECT id, host, port, protocol, username, password FROM proxies WHERE active = false ORDER BY id`;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
query = `SELECT id, host, port, protocol, username, password FROM proxies ORDER BY id`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await pool.query(query);
|
||||||
|
const proxies = result.rows;
|
||||||
|
|
||||||
let tested = 0;
|
let tested = 0;
|
||||||
let passed = 0;
|
let passed = 0;
|
||||||
let failed = 0;
|
let failed = 0;
|
||||||
|
|
||||||
for (const proxy of result.rows) {
|
// Process proxies in batches for parallel testing
|
||||||
|
for (let i = 0; i < proxies.length; i += concurrency) {
|
||||||
// Check if job was cancelled
|
// Check if job was cancelled
|
||||||
const jobControl = activeJobs.get(jobId);
|
const jobControl = activeJobs.get(jobId);
|
||||||
if (jobControl?.cancelled) {
|
if (jobControl?.cancelled) {
|
||||||
@@ -146,23 +182,34 @@ async function runProxyTestJob(jobId: number): Promise<void> {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test the proxy
|
const batch = proxies.slice(i, i + concurrency);
|
||||||
const testResult = await testProxy(
|
|
||||||
proxy.host,
|
// Test batch in parallel
|
||||||
proxy.port,
|
const batchResults = await Promise.all(
|
||||||
proxy.protocol,
|
batch.map(async (proxy) => {
|
||||||
proxy.username,
|
const testResult = await testProxy(
|
||||||
proxy.password
|
proxy.host,
|
||||||
|
proxy.port,
|
||||||
|
proxy.protocol,
|
||||||
|
proxy.username,
|
||||||
|
proxy.password
|
||||||
|
);
|
||||||
|
|
||||||
|
// Save result
|
||||||
|
await saveProxyTestResult(proxy.id, testResult);
|
||||||
|
|
||||||
|
return testResult.success;
|
||||||
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
// Save result
|
// Count results
|
||||||
await saveProxyTestResult(proxy.id, testResult);
|
for (const success of batchResults) {
|
||||||
|
tested++;
|
||||||
tested++;
|
if (success) {
|
||||||
if (testResult.success) {
|
passed++;
|
||||||
passed++;
|
} else {
|
||||||
} else {
|
failed++;
|
||||||
failed++;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update job progress
|
// Update job progress
|
||||||
@@ -175,10 +222,8 @@ async function runProxyTestJob(jobId: number): Promise<void> {
|
|||||||
WHERE id = $4
|
WHERE id = $4
|
||||||
`, [tested, passed, failed, jobId]);
|
`, [tested, passed, failed, jobId]);
|
||||||
|
|
||||||
// Log progress every 10 proxies
|
// Log progress
|
||||||
if (tested % 10 === 0) {
|
console.log(`📊 Job ${jobId}: ${tested}/${proxies.length} proxies tested (${passed} passed, ${failed} failed)`);
|
||||||
console.log(`📊 Job ${jobId}: ${tested}/${result.rows.length} proxies tested (${passed} passed, ${failed} failed)`);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mark job as completed
|
// Mark job as completed
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import StealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|||||||
import { Browser, Page } from 'puppeteer';
|
import { Browser, Page } from 'puppeteer';
|
||||||
import { SocksProxyAgent } from 'socks-proxy-agent';
|
import { SocksProxyAgent } from 'socks-proxy-agent';
|
||||||
import { pool } from '../db/pool';
|
import { pool } from '../db/pool';
|
||||||
import { uploadImageFromUrl, getImageUrl } from '../utils/minio';
|
import { downloadProductImageLegacy } from '../utils/image-storage';
|
||||||
import { logger } from './logger';
|
import { logger } from './logger';
|
||||||
import { registerScraper, updateScraperStats, completeScraper } from '../routes/scraper-monitor';
|
import { registerScraper, updateScraperStats, completeScraper } from '../routes/scraper-monitor';
|
||||||
import { incrementProxyFailure, getActiveProxy, isBotDetectionError, putProxyInTimeout } from './proxy';
|
import { incrementProxyFailure, getActiveProxy, isBotDetectionError, putProxyInTimeout } from './proxy';
|
||||||
@@ -767,7 +767,8 @@ export async function saveProducts(storeId: number, categoryId: number, products
|
|||||||
|
|
||||||
if (product.imageUrl && !localImagePath) {
|
if (product.imageUrl && !localImagePath) {
|
||||||
try {
|
try {
|
||||||
localImagePath = await uploadImageFromUrl(product.imageUrl, productId);
|
const result = await downloadProductImageLegacy(product.imageUrl, 0, productId);
|
||||||
|
localImagePath = result.urls?.original || null;
|
||||||
await client.query(`
|
await client.query(`
|
||||||
UPDATE products
|
UPDATE products
|
||||||
SET local_image_path = $1
|
SET local_image_path = $1
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user