Compare commits
1 Commits
feat/ui-po
...
feat/steal
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
74981fd399 |
@@ -1,3 +1,6 @@
|
||||
when:
|
||||
- event: [push, pull_request]
|
||||
|
||||
steps:
|
||||
# ===========================================
|
||||
# PR VALIDATION: Parallel type checks (PRs only)
|
||||
@@ -42,31 +45,6 @@ steps:
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
# ===========================================
|
||||
# AUTO-MERGE: Merge PR after all checks pass
|
||||
# ===========================================
|
||||
auto-merge:
|
||||
image: alpine:latest
|
||||
environment:
|
||||
GITEA_TOKEN:
|
||||
from_secret: gitea_token
|
||||
commands:
|
||||
- apk add --no-cache curl
|
||||
- |
|
||||
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||
curl -s -X POST \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"Do":"merge"}' \
|
||||
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||
depends_on:
|
||||
- typecheck-backend
|
||||
- typecheck-cannaiq
|
||||
- typecheck-findadispo
|
||||
- typecheck-findagram
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
# ===========================================
|
||||
# MASTER DEPLOY: Parallel Docker builds
|
||||
# ===========================================
|
||||
@@ -86,15 +64,11 @@ steps:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from:
|
||||
- "type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache"
|
||||
cache_to:
|
||||
- "type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max"
|
||||
build_args:
|
||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
||||
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
||||
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
||||
- APP_BUILD_VERSION=${CI_COMMIT_SHA}
|
||||
- APP_GIT_SHA=${CI_COMMIT_SHA}
|
||||
- APP_BUILD_TIME=${CI_PIPELINE_CREATED}
|
||||
- CONTAINER_IMAGE_TAG=${CI_COMMIT_SHA:0:8}
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
@@ -116,10 +90,6 @@ steps:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from:
|
||||
- "type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache"
|
||||
cache_to:
|
||||
- "type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max"
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
@@ -141,10 +111,6 @@ steps:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from:
|
||||
- "type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache"
|
||||
cache_to:
|
||||
- "type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max"
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
@@ -166,17 +132,13 @@ steps:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from:
|
||||
- "type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache"
|
||||
cache_to:
|
||||
- "type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max"
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
# ===========================================
|
||||
# STAGE 3: Deploy and Run Migrations
|
||||
# STAGE 3: Deploy (after Docker builds)
|
||||
# ===========================================
|
||||
deploy:
|
||||
image: bitnami/kubectl:latest
|
||||
@@ -187,17 +149,12 @@ steps:
|
||||
- mkdir -p ~/.kube
|
||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||
- chmod 600 ~/.kube/config
|
||||
# Deploy backend first
|
||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||
# Note: Migrations run automatically at startup via auto-migrate
|
||||
# Deploy remaining services
|
||||
# Resilience: ensure workers are scaled up if at 0
|
||||
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
|
||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||
depends_on:
|
||||
- docker-backend
|
||||
@@ -1,191 +0,0 @@
|
||||
steps:
|
||||
# ===========================================
|
||||
# PR VALIDATION: Only typecheck changed projects
|
||||
# ===========================================
|
||||
typecheck-backend:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- npm config set cache /npm-cache/backend --global
|
||||
- cd backend
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit
|
||||
volumes:
|
||||
- npm-cache:/npm-cache
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
path:
|
||||
include: ['backend/**']
|
||||
|
||||
typecheck-cannaiq:
|
||||
image: code.cannabrands.app/creationshop/node:20
|
||||
commands:
|
||||
- npm config set cache /npm-cache/cannaiq --global
|
||||
- cd cannaiq
|
||||
- npm ci --prefer-offline
|
||||
- npx tsc --noEmit
|
||||
volumes:
|
||||
- npm-cache:/npm-cache
|
||||
depends_on: []
|
||||
when:
|
||||
event: pull_request
|
||||
path:
|
||||
include: ['cannaiq/**']
|
||||
|
||||
# findadispo/findagram typechecks skipped - they have || true anyway
|
||||
|
||||
# ===========================================
|
||||
# AUTO-MERGE: Merge PR after all checks pass
|
||||
# ===========================================
|
||||
auto-merge:
|
||||
image: alpine:latest
|
||||
environment:
|
||||
GITEA_TOKEN:
|
||||
from_secret: gitea_token
|
||||
commands:
|
||||
- apk add --no-cache curl
|
||||
- |
|
||||
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
|
||||
curl -s -X POST \
|
||||
-H "Authorization: token $GITEA_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"Do":"merge"}' \
|
||||
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
|
||||
depends_on:
|
||||
- typecheck-backend
|
||||
- typecheck-cannaiq
|
||||
when:
|
||||
event: pull_request
|
||||
|
||||
# ===========================================
|
||||
# MASTER DEPLOY: Parallel Docker builds
|
||||
# ===========================================
|
||||
docker-backend:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/dispensary-scraper
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: backend/Dockerfile
|
||||
context: backend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
|
||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
|
||||
build_args:
|
||||
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
|
||||
APP_GIT_SHA: ${CI_COMMIT_SHA}
|
||||
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
|
||||
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-cannaiq:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/cannaiq-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: cannaiq/Dockerfile
|
||||
context: cannaiq
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
|
||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-findadispo:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/findadispo-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: findadispo/frontend/Dockerfile
|
||||
context: findadispo/frontend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
|
||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
docker-findagram:
|
||||
image: woodpeckerci/plugin-docker-buildx
|
||||
settings:
|
||||
registry: code.cannabrands.app
|
||||
repo: code.cannabrands.app/creationshop/findagram-frontend
|
||||
tags:
|
||||
- latest
|
||||
- ${CI_COMMIT_SHA:0:8}
|
||||
dockerfile: findagram/frontend/Dockerfile
|
||||
context: findagram/frontend
|
||||
username:
|
||||
from_secret: registry_username
|
||||
password:
|
||||
from_secret: registry_password
|
||||
platforms: linux/amd64
|
||||
provenance: false
|
||||
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
|
||||
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
|
||||
depends_on: []
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
|
||||
# ===========================================
|
||||
# STAGE 3: Deploy and Run Migrations
|
||||
# ===========================================
|
||||
deploy:
|
||||
image: bitnami/kubectl:latest
|
||||
environment:
|
||||
KUBECONFIG_CONTENT:
|
||||
from_secret: kubeconfig_data
|
||||
commands:
|
||||
- mkdir -p ~/.kube
|
||||
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
|
||||
- chmod 600 ~/.kube/config
|
||||
# Deploy backend first
|
||||
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
|
||||
# Note: Migrations run automatically at startup via auto-migrate
|
||||
# Deploy remaining services
|
||||
# Resilience: ensure workers are scaled up if at 0
|
||||
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
|
||||
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
|
||||
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
|
||||
depends_on:
|
||||
- docker-backend
|
||||
- docker-cannaiq
|
||||
- docker-findadispo
|
||||
- docker-findagram
|
||||
when:
|
||||
branch: master
|
||||
event: push
|
||||
41
CLAUDE.md
41
CLAUDE.md
@@ -119,42 +119,7 @@ npx tsx src/db/migrate.ts
|
||||
- Importing it at runtime causes startup crashes if env vars aren't perfect
|
||||
- `pool.ts` uses lazy initialization - only validates when first query is made
|
||||
|
||||
### 6. ALL API ROUTES REQUIRE AUTHENTICATION — NO EXCEPTIONS
|
||||
|
||||
**Every API router MUST apply `authMiddleware` at the router level.**
|
||||
|
||||
```typescript
|
||||
import { authMiddleware } from '../auth/middleware';
|
||||
|
||||
const router = Router();
|
||||
router.use(authMiddleware); // REQUIRED - first line after router creation
|
||||
```
|
||||
|
||||
**Authentication flow (see `src/auth/middleware.ts`):**
|
||||
1. Check Bearer token (JWT or API token) → grant access if valid
|
||||
2. Check trusted origins (cannaiq.co, findadispo.com, localhost, etc.) → grant access
|
||||
3. Check trusted IPs (127.0.0.1, ::1, internal pod IPs) → grant access
|
||||
4. **Return 401 Unauthorized** if none of the above
|
||||
|
||||
**NEVER create API routes without auth middleware:**
|
||||
- No "public" endpoints that bypass authentication
|
||||
- No "read-only" exceptions
|
||||
- No "analytics-only" exceptions
|
||||
- If an endpoint exists under `/api/*`, it MUST be protected
|
||||
|
||||
**When creating new route files:**
|
||||
1. Import `authMiddleware` from `../auth/middleware`
|
||||
2. Add `router.use(authMiddleware)` immediately after creating the router
|
||||
3. Document security requirements in file header comments
|
||||
|
||||
**Trusted origins (defined in middleware):**
|
||||
- `https://cannaiq.co`
|
||||
- `https://findadispo.com`
|
||||
- `https://findagram.co`
|
||||
- `*.cannabrands.app` domains
|
||||
- `localhost:*` for development
|
||||
|
||||
### 7. LOCAL DEVELOPMENT BY DEFAULT
|
||||
### 6. LOCAL DEVELOPMENT BY DEFAULT
|
||||
|
||||
**Quick Start:**
|
||||
```bash
|
||||
@@ -487,7 +452,6 @@ const result = await pool.query(`
|
||||
16. **Running `lsof -ti:PORT | xargs kill`** or similar process-killing commands
|
||||
17. **Using hardcoded database names** in code or comments
|
||||
18. **Creating or connecting to a second database**
|
||||
19. **Creating API routes without authMiddleware** (all `/api/*` routes MUST be protected)
|
||||
|
||||
---
|
||||
|
||||
@@ -939,8 +903,7 @@ export default defineConfig({
|
||||
|
||||
20) **Crawler Architecture**
|
||||
- **Scraper pod (1 replica)**: Runs the Express API server + scheduler.
|
||||
- **Scraper-worker pods (25 replicas)**: Each runs `dist/tasks/task-worker.js`, polling the job queue.
|
||||
- **Worker naming**: Pods use fantasy names (Aethelgard, Xylos, Kryll, Coriolis, etc.) - see `k8s/scraper-worker.yaml` ConfigMap. Worker IDs: `{PodName}-worker-{n}`
|
||||
- **Scraper-worker pods (5 replicas)**: Each worker runs `dist/dutchie-az/services/worker.js`, polling the job queue.
|
||||
- **Job types**: `menu_detection`, `menu_detection_single`, `dutchie_product_crawl`
|
||||
- **Job schedules** (managed in `job_schedules` table):
|
||||
- `dutchie_az_menu_detection`: Runs daily with 60-min jitter
|
||||
|
||||
@@ -5,7 +5,7 @@ FROM code.cannabrands.app/creationshop/node:20-slim AS builder
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json ./
|
||||
RUN npm install
|
||||
RUN npm ci
|
||||
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
@@ -25,9 +25,8 @@ ENV APP_GIT_SHA=${APP_GIT_SHA}
|
||||
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
||||
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
||||
|
||||
# Install Chromium dependencies and curl for HTTP requests
|
||||
# Install Chromium dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
curl \
|
||||
chromium \
|
||||
fonts-liberation \
|
||||
libnss3 \
|
||||
@@ -44,13 +43,10 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
||||
WORKDIR /app
|
||||
|
||||
COPY package*.json ./
|
||||
RUN npm install --omit=dev
|
||||
RUN npm ci --omit=dev
|
||||
|
||||
COPY --from=builder /app/dist ./dist
|
||||
|
||||
# Copy migrations for auto-migrate on startup
|
||||
COPY migrations ./migrations
|
||||
|
||||
# Create local images directory for when MinIO is not configured
|
||||
RUN mkdir -p /app/public/images/products
|
||||
|
||||
|
||||
@@ -1,394 +0,0 @@
|
||||
# Brand Intelligence API
|
||||
|
||||
## Endpoint
|
||||
|
||||
```
|
||||
GET /api/analytics/v2/brand/:name/intelligence
|
||||
```
|
||||
|
||||
## Query Parameters
|
||||
|
||||
| Param | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `window` | `7d\|30d\|90d` | `30d` | Time window for trend calculations |
|
||||
| `state` | string | - | Filter by state code (e.g., `AZ`) |
|
||||
| `category` | string | - | Filter by category (e.g., `Flower`) |
|
||||
|
||||
## Response Payload Schema
|
||||
|
||||
```typescript
|
||||
interface BrandIntelligenceResult {
|
||||
brand_name: string;
|
||||
window: '7d' | '30d' | '90d';
|
||||
generated_at: string; // ISO timestamp when data was computed
|
||||
|
||||
performance_snapshot: PerformanceSnapshot;
|
||||
alerts: Alerts;
|
||||
sku_performance: SkuPerformance[];
|
||||
retail_footprint: RetailFootprint;
|
||||
competitive_landscape: CompetitiveLandscape;
|
||||
inventory_health: InventoryHealth;
|
||||
promo_performance: PromoPerformance;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Section 1: Performance Snapshot
|
||||
|
||||
Summary cards with key brand metrics.
|
||||
|
||||
```typescript
|
||||
interface PerformanceSnapshot {
|
||||
active_skus: number; // Total products in catalog
|
||||
total_revenue_30d: number | null; // Estimated from qty × price
|
||||
total_stores: number; // Active retail partners
|
||||
new_stores_30d: number; // New distribution in window
|
||||
market_share: number | null; // % of category SKUs
|
||||
avg_wholesale_price: number | null;
|
||||
price_position: 'premium' | 'value' | 'competitive';
|
||||
}
|
||||
```
|
||||
|
||||
**UI Label Mapping:**
|
||||
| Field | User-Facing Label | Helper Text |
|
||||
|-------|-------------------|-------------|
|
||||
| `active_skus` | Active Products | X total in catalog |
|
||||
| `total_revenue_30d` | Monthly Revenue | Estimated from sales |
|
||||
| `total_stores` | Retail Distribution | Active retail partners |
|
||||
| `new_stores_30d` | New Opportunities | X new in last 30 days |
|
||||
| `market_share` | Category Position | % of category |
|
||||
| `avg_wholesale_price` | Avg Wholesale | Per unit |
|
||||
| `price_position` | Pricing Tier | Premium/Value/Market Rate |
|
||||
|
||||
---
|
||||
|
||||
## Section 2: Alerts
|
||||
|
||||
Issues requiring attention.
|
||||
|
||||
```typescript
|
||||
interface Alerts {
|
||||
lost_stores_30d_count: number;
|
||||
lost_skus_30d_count: number;
|
||||
competitor_takeover_count: number;
|
||||
avg_oos_duration_days: number | null;
|
||||
avg_reorder_lag_days: number | null;
|
||||
items: AlertItem[];
|
||||
}
|
||||
|
||||
interface AlertItem {
|
||||
type: 'lost_store' | 'delisted_sku' | 'shelf_loss' | 'extended_oos';
|
||||
severity: 'critical' | 'warning';
|
||||
store_name?: string;
|
||||
product_name?: string;
|
||||
competitor_brand?: string;
|
||||
days_since?: number;
|
||||
state_code?: string;
|
||||
}
|
||||
```
|
||||
|
||||
**UI Label Mapping:**
|
||||
| Field | User-Facing Label |
|
||||
|-------|-------------------|
|
||||
| `lost_stores_30d_count` | Accounts at Risk |
|
||||
| `lost_skus_30d_count` | Delisted SKUs |
|
||||
| `competitor_takeover_count` | Shelf Losses |
|
||||
| `avg_oos_duration_days` | Avg Stockout Length |
|
||||
| `avg_reorder_lag_days` | Avg Restock Time |
|
||||
| `severity: critical` | Urgent |
|
||||
| `severity: warning` | Watch |
|
||||
|
||||
---
|
||||
|
||||
## Section 3: SKU Performance (Product Velocity)
|
||||
|
||||
How fast each SKU sells.
|
||||
|
||||
```typescript
|
||||
interface SkuPerformance {
|
||||
store_product_id: number;
|
||||
product_name: string;
|
||||
category: string | null;
|
||||
daily_velocity: number; // Units/day estimate
|
||||
velocity_status: 'hot' | 'steady' | 'slow' | 'stale';
|
||||
retail_price: number | null;
|
||||
on_sale: boolean;
|
||||
stores_carrying: number;
|
||||
stock_status: 'in_stock' | 'low_stock' | 'out_of_stock';
|
||||
}
|
||||
```
|
||||
|
||||
**UI Label Mapping:**
|
||||
| Field | User-Facing Label |
|
||||
|-------|-------------------|
|
||||
| `daily_velocity` | Daily Rate |
|
||||
| `velocity_status` | Momentum |
|
||||
| `velocity_status: hot` | Hot |
|
||||
| `velocity_status: steady` | Steady |
|
||||
| `velocity_status: slow` | Slow |
|
||||
| `velocity_status: stale` | Stale |
|
||||
| `retail_price` | Retail Price |
|
||||
| `on_sale` | Promo (badge) |
|
||||
|
||||
**Velocity Thresholds:**
|
||||
- `hot`: >= 5 units/day
|
||||
- `steady`: >= 1 unit/day
|
||||
- `slow`: >= 0.1 units/day
|
||||
- `stale`: < 0.1 units/day
|
||||
|
||||
---
|
||||
|
||||
## Section 4: Retail Footprint
|
||||
|
||||
Store placement and coverage.
|
||||
|
||||
```typescript
|
||||
interface RetailFootprint {
|
||||
total_stores: number;
|
||||
in_stock_count: number;
|
||||
out_of_stock_count: number;
|
||||
penetration_by_region: RegionPenetration[];
|
||||
whitespace_stores: WhitespaceStore[];
|
||||
}
|
||||
|
||||
interface RegionPenetration {
|
||||
state_code: string;
|
||||
store_count: number;
|
||||
percent_reached: number; // % of state's dispensaries
|
||||
in_stock: number;
|
||||
out_of_stock: number;
|
||||
}
|
||||
|
||||
interface WhitespaceStore {
|
||||
store_id: number;
|
||||
store_name: string;
|
||||
state_code: string;
|
||||
city: string | null;
|
||||
category_fit: number; // How many competing brands they carry
|
||||
competitor_brands: string[];
|
||||
}
|
||||
```
|
||||
|
||||
**UI Label Mapping:**
|
||||
| Field | User-Facing Label |
|
||||
|-------|-------------------|
|
||||
| `penetration_by_region` | Market Coverage by Region |
|
||||
| `percent_reached` | X% reached |
|
||||
| `in_stock` | X stocked |
|
||||
| `out_of_stock` | X out |
|
||||
| `whitespace_stores` | Expansion Opportunities |
|
||||
| `category_fit` | X fit |
|
||||
|
||||
---
|
||||
|
||||
## Section 5: Competitive Landscape
|
||||
|
||||
Market positioning vs competitors.
|
||||
|
||||
```typescript
|
||||
interface CompetitiveLandscape {
|
||||
brand_price_position: 'premium' | 'value' | 'competitive';
|
||||
market_share_trend: MarketSharePoint[];
|
||||
competitors: Competitor[];
|
||||
head_to_head_skus: HeadToHead[];
|
||||
}
|
||||
|
||||
interface MarketSharePoint {
|
||||
date: string;
|
||||
share_percent: number;
|
||||
}
|
||||
|
||||
interface Competitor {
|
||||
brand_name: string;
|
||||
store_overlap_percent: number;
|
||||
price_position: 'premium' | 'value' | 'competitive';
|
||||
avg_price: number | null;
|
||||
sku_count: number;
|
||||
}
|
||||
|
||||
interface HeadToHead {
|
||||
product_name: string;
|
||||
brand_price: number;
|
||||
competitor_brand: string;
|
||||
competitor_price: number;
|
||||
price_diff_percent: number;
|
||||
}
|
||||
```
|
||||
|
||||
**UI Label Mapping:**
|
||||
| Field | User-Facing Label |
|
||||
|-------|-------------------|
|
||||
| `price_position: premium` | Premium Tier |
|
||||
| `price_position: value` | Value Leader |
|
||||
| `price_position: competitive` | Market Rate |
|
||||
| `market_share_trend` | Share of Shelf Trend |
|
||||
| `head_to_head_skus` | Price Comparison |
|
||||
| `store_overlap_percent` | X% store overlap |
|
||||
|
||||
---
|
||||
|
||||
## Section 6: Inventory Health
|
||||
|
||||
Stock projections and risk levels.
|
||||
|
||||
```typescript
|
||||
interface InventoryHealth {
|
||||
critical_count: number; // <7 days stock
|
||||
warning_count: number; // 7-14 days stock
|
||||
healthy_count: number; // 14-90 days stock
|
||||
overstocked_count: number; // >90 days stock
|
||||
skus: InventorySku[];
|
||||
overstock_alert: OverstockItem[];
|
||||
}
|
||||
|
||||
interface InventorySku {
|
||||
store_product_id: number;
|
||||
product_name: string;
|
||||
store_name: string;
|
||||
days_of_stock: number | null;
|
||||
risk_level: 'critical' | 'elevated' | 'moderate' | 'healthy';
|
||||
current_quantity: number | null;
|
||||
daily_sell_rate: number | null;
|
||||
}
|
||||
|
||||
interface OverstockItem {
|
||||
product_name: string;
|
||||
store_name: string;
|
||||
excess_units: number;
|
||||
days_of_stock: number;
|
||||
}
|
||||
```
|
||||
|
||||
**UI Label Mapping:**
|
||||
| Field | User-Facing Label |
|
||||
|-------|-------------------|
|
||||
| `risk_level: critical` | Reorder Now |
|
||||
| `risk_level: elevated` | Low Stock |
|
||||
| `risk_level: moderate` | Monitor |
|
||||
| `risk_level: healthy` | Healthy |
|
||||
| `critical_count` | Urgent (<7 days) |
|
||||
| `warning_count` | Low (7-14 days) |
|
||||
| `overstocked_count` | Excess (>90 days) |
|
||||
| `days_of_stock` | X days remaining |
|
||||
| `overstock_alert` | Overstock Alert |
|
||||
| `excess_units` | X excess units |
|
||||
|
||||
---
|
||||
|
||||
## Section 7: Promotion Effectiveness
|
||||
|
||||
How promotions impact sales.
|
||||
|
||||
```typescript
|
||||
interface PromoPerformance {
|
||||
avg_baseline_velocity: number | null;
|
||||
avg_promo_velocity: number | null;
|
||||
avg_velocity_lift: number | null; // % increase during promo
|
||||
avg_efficiency_score: number | null; // ROI proxy
|
||||
promotions: Promotion[];
|
||||
}
|
||||
|
||||
interface Promotion {
|
||||
product_name: string;
|
||||
store_name: string;
|
||||
status: 'active' | 'scheduled' | 'ended';
|
||||
start_date: string;
|
||||
end_date: string | null;
|
||||
regular_price: number;
|
||||
promo_price: number;
|
||||
discount_percent: number;
|
||||
baseline_velocity: number | null;
|
||||
promo_velocity: number | null;
|
||||
velocity_lift: number | null;
|
||||
efficiency_score: number | null;
|
||||
}
|
||||
```
|
||||
|
||||
**UI Label Mapping:**
|
||||
| Field | User-Facing Label |
|
||||
|-------|-------------------|
|
||||
| `avg_baseline_velocity` | Normal Rate |
|
||||
| `avg_promo_velocity` | During Promos |
|
||||
| `avg_velocity_lift` | Avg Sales Lift |
|
||||
| `avg_efficiency_score` | ROI Score |
|
||||
| `velocity_lift` | Sales Lift |
|
||||
| `efficiency_score` | ROI Score |
|
||||
| `status: active` | Live |
|
||||
| `status: scheduled` | Scheduled |
|
||||
| `status: ended` | Ended |
|
||||
|
||||
---
|
||||
|
||||
## Example Queries
|
||||
|
||||
### Get full payload
|
||||
```javascript
|
||||
const response = await fetch('/api/analytics/v2/brand/Wyld/intelligence?window=30d');
|
||||
const data = await response.json();
|
||||
```
|
||||
|
||||
### Extract summary cards (flattened)
|
||||
```javascript
|
||||
const { performance_snapshot: ps, alerts } = data;
|
||||
|
||||
const summaryCards = {
|
||||
activeProducts: ps.active_skus,
|
||||
monthlyRevenue: ps.total_revenue_30d,
|
||||
retailDistribution: ps.total_stores,
|
||||
newOpportunities: ps.new_stores_30d,
|
||||
categoryPosition: ps.market_share,
|
||||
avgWholesale: ps.avg_wholesale_price,
|
||||
pricingTier: ps.price_position,
|
||||
accountsAtRisk: alerts.lost_stores_30d_count,
|
||||
delistedSkus: alerts.lost_skus_30d_count,
|
||||
shelfLosses: alerts.competitor_takeover_count,
|
||||
};
|
||||
```
|
||||
|
||||
### Get top 10 fastest selling SKUs
|
||||
```javascript
|
||||
const topSkus = data.sku_performance
|
||||
.filter(sku => sku.velocity_status === 'hot' || sku.velocity_status === 'steady')
|
||||
.sort((a, b) => b.daily_velocity - a.daily_velocity)
|
||||
.slice(0, 10);
|
||||
```
|
||||
|
||||
### Get critical inventory alerts only
|
||||
```javascript
|
||||
const criticalInventory = data.inventory_health.skus
|
||||
.filter(sku => sku.risk_level === 'critical');
|
||||
```
|
||||
|
||||
### Get states with <50% penetration
|
||||
```javascript
|
||||
const underPenetrated = data.retail_footprint.penetration_by_region
|
||||
.filter(region => region.percent_reached < 50)
|
||||
.sort((a, b) => a.percent_reached - b.percent_reached);
|
||||
```
|
||||
|
||||
### Get active promotions with positive lift
|
||||
```javascript
|
||||
const effectivePromos = data.promo_performance.promotions
|
||||
.filter(p => p.status === 'active' && p.velocity_lift > 0)
|
||||
.sort((a, b) => b.velocity_lift - a.velocity_lift);
|
||||
```
|
||||
|
||||
### Build chart data for market share trend
|
||||
```javascript
|
||||
const chartData = data.competitive_landscape.market_share_trend.map(point => ({
|
||||
x: new Date(point.date),
|
||||
y: point.share_percent,
|
||||
}));
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Notes for Frontend Implementation
|
||||
|
||||
1. **All fields are snake_case** - transform to camelCase if needed
|
||||
2. **Null values are possible** - handle gracefully in UI
|
||||
3. **Arrays may be empty** - show appropriate empty states
|
||||
4. **Timestamps are ISO format** - parse with `new Date()`
|
||||
5. **Percentages are already computed** - no need to multiply by 100
|
||||
6. **The `window` parameter affects trend calculations** - 7d/30d/90d
|
||||
@@ -500,18 +500,17 @@ CREATE TABLE proxies (
|
||||
|
||||
Proxies are mandatory. There is no environment variable to disable them. Workers will refuse to start without active proxies in the database.
|
||||
|
||||
### User-Agent Generation
|
||||
### Fingerprints Available
|
||||
|
||||
See `workflow-12102025.md` for full specification.
|
||||
The client includes 6 browser fingerprints:
|
||||
- Chrome 131 on Windows
|
||||
- Chrome 131 on macOS
|
||||
- Chrome 120 on Windows
|
||||
- Firefox 133 on Windows
|
||||
- Safari 17.2 on macOS
|
||||
- Edge 131 on Windows
|
||||
|
||||
**Summary:**
|
||||
- Uses `intoli/user-agents` library (daily-updated market share data)
|
||||
- Device distribution: Mobile 62%, Desktop 36%, Tablet 2%
|
||||
- Browser whitelist: Chrome, Safari, Edge, Firefox only
|
||||
- UA sticks until IP rotates (403 or manual rotation)
|
||||
- Failure = alert admin + stop crawl (no fallback)
|
||||
|
||||
Each fingerprint includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers.
|
||||
Each includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1,584 +0,0 @@
|
||||
# Task Workflow Documentation
|
||||
**Date: 2024-12-10**
|
||||
|
||||
This document describes the complete task/job processing architecture after the 2024-12-10 rewrite.
|
||||
|
||||
---
|
||||
|
||||
## Complete Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ KUBERNETES CLUSTER │
|
||||
├─────────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ API SERVER POD (scraper) │ │
|
||||
│ │ │ │
|
||||
│ │ ┌──────────────────┐ ┌────────────────────────────────────────┐ │ │
|
||||
│ │ │ Express API │ │ TaskScheduler │ │ │
|
||||
│ │ │ │ │ (src/services/task-scheduler.ts) │ │ │
|
||||
│ │ │ /api/job-queue │ │ │ │ │
|
||||
│ │ │ /api/tasks │ │ • Polls every 60s │ │ │
|
||||
│ │ │ /api/schedules │ │ • Checks task_schedules table │ │ │
|
||||
│ │ └────────┬─────────┘ │ • SELECT FOR UPDATE SKIP LOCKED │ │ │
|
||||
│ │ │ │ • Generates tasks when due │ │ │
|
||||
│ │ │ └──────────────────┬─────────────────────┘ │ │
|
||||
│ │ │ │ │ │
|
||||
│ └────────────┼──────────────────────────────────┼──────────────────────────┘ │
|
||||
│ │ │ │
|
||||
│ │ ┌────────────────────────┘ │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ POSTGRESQL DATABASE │ │
|
||||
│ │ │ │
|
||||
│ │ ┌─────────────────────┐ ┌─────────────────────┐ │ │
|
||||
│ │ │ task_schedules │ │ worker_tasks │ │ │
|
||||
│ │ │ │ │ │ │ │
|
||||
│ │ │ • product_refresh │───────►│ • pending tasks │ │ │
|
||||
│ │ │ • store_discovery │ create │ • claimed tasks │ │ │
|
||||
│ │ │ • analytics_refresh │ tasks │ • running tasks │ │ │
|
||||
│ │ │ │ │ • completed tasks │ │ │
|
||||
│ │ │ next_run_at │ │ │ │ │
|
||||
│ │ │ last_run_at │ │ role, dispensary_id │ │ │
|
||||
│ │ │ interval_hours │ │ priority, status │ │ │
|
||||
│ │ └─────────────────────┘ └──────────┬──────────┘ │ │
|
||||
│ │ │ │ │
|
||||
│ └─────────────────────────────────────────────┼────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌──────────────────────┘ │
|
||||
│ │ Workers poll for tasks │
|
||||
│ │ (SELECT FOR UPDATE SKIP LOCKED) │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ WORKER PODS (StatefulSet: scraper-worker) │ │
|
||||
│ │ │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
│ │ │ Worker 0 │ │ Worker 1 │ │ Worker 2 │ │ Worker N │ │ │
|
||||
│ │ │ │ │ │ │ │ │ │ │ │
|
||||
│ │ │ task-worker │ │ task-worker │ │ task-worker │ │ task-worker │ │ │
|
||||
│ │ │ .ts │ │ .ts │ │ .ts │ │ .ts │ │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||
│ │ │ │
|
||||
│ └──────────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
└──────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Startup Sequence
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ API SERVER STARTUP │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ 1. Express app initializes │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 2. runAutoMigrations() │
|
||||
│ • Runs pending migrations (including 079_task_schedules.sql) │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 3. initializeMinio() / initializeImageStorage() │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 4. cleanupOrphanedJobs() │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 5. taskScheduler.start() ◄─── NEW (per TASK_WORKFLOW_2024-12-10.md) │
|
||||
│ │ │
|
||||
│ ├── Recover stale tasks (workers that died) │
|
||||
│ ├── Ensure default schedules exist in task_schedules │
|
||||
│ ├── Check and run any due schedules immediately │
|
||||
│ └── Start 60-second poll interval │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 6. app.listen(PORT) │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ WORKER POD STARTUP │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ 1. K8s starts pod from StatefulSet │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 2. TaskWorker.constructor() │
|
||||
│ • Create DB pool │
|
||||
│ • Create CrawlRotator │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 3. initializeStealth() │
|
||||
│ • Load proxies from DB (REQUIRED - fails if none) │
|
||||
│ • Wire rotator to Dutchie client │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 4. register() with API │
|
||||
│ • Optional - continues if fails │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 5. startRegistryHeartbeat() every 30s │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ 6. processNextTask() loop │
|
||||
│ │ │
|
||||
│ ├── Poll for pending task (FOR UPDATE SKIP LOCKED) │
|
||||
│ ├── Claim task atomically │
|
||||
│ ├── Execute handler (product_refresh, store_discovery, etc.) │
|
||||
│ ├── Mark complete/failed │
|
||||
│ ├── Chain next task if applicable │
|
||||
│ └── Loop │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Schedule Flow
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ SCHEDULER POLL (every 60 seconds) │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ BEGIN TRANSACTION │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ SELECT * FROM task_schedules │
|
||||
│ WHERE enabled = true AND next_run_at <= NOW() │
|
||||
│ FOR UPDATE SKIP LOCKED ◄─── Prevents duplicate execution across replicas │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ For each due schedule: │
|
||||
│ │ │
|
||||
│ ├── product_refresh_all │
|
||||
│ │ └─► Query dispensaries needing crawl │
|
||||
│ │ └─► Create product_refresh tasks in worker_tasks │
|
||||
│ │ │
|
||||
│ ├── store_discovery_dutchie │
|
||||
│ │ └─► Create single store_discovery task │
|
||||
│ │ │
|
||||
│ └── analytics_refresh │
|
||||
│ └─► Create single analytics_refresh task │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ UPDATE task_schedules SET │
|
||||
│ last_run_at = NOW(), │
|
||||
│ next_run_at = NOW() + interval_hours │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ COMMIT │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Task Lifecycle
|
||||
|
||||
```
|
||||
┌──────────┐
|
||||
│ SCHEDULE │
|
||||
│ DUE │
|
||||
└────┬─────┘
|
||||
│
|
||||
▼
|
||||
┌──────────────┐ claim ┌──────────────┐ start ┌──────────────┐
|
||||
│ PENDING │────────────►│ CLAIMED │────────────►│ RUNNING │
|
||||
└──────────────┘ └──────────────┘ └──────┬───────┘
|
||||
▲ │
|
||||
│ ┌──────────────┼──────────────┐
|
||||
│ retry │ │ │
|
||||
│ (if retries < max) ▼ ▼ ▼
|
||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐
|
||||
└──────────────────────────────────│ FAILED │ │ COMPLETED│ │ STALE │
|
||||
└──────────┘ └──────────┘ └────┬─────┘
|
||||
│
|
||||
recover_stale_tasks()
|
||||
│
|
||||
▼
|
||||
┌──────────┐
|
||||
│ PENDING │
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Database Tables
|
||||
|
||||
### task_schedules (NEW - migration 079)
|
||||
|
||||
Stores schedule definitions. Survives restarts.
|
||||
|
||||
```sql
|
||||
CREATE TABLE task_schedules (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(100) NOT NULL UNIQUE,
|
||||
role VARCHAR(50) NOT NULL, -- product_refresh, store_discovery, etc.
|
||||
enabled BOOLEAN DEFAULT TRUE,
|
||||
interval_hours INTEGER NOT NULL, -- How often to run
|
||||
priority INTEGER DEFAULT 0, -- Task priority when created
|
||||
state_code VARCHAR(2), -- Optional filter
|
||||
last_run_at TIMESTAMPTZ, -- When it last ran
|
||||
next_run_at TIMESTAMPTZ, -- When it's due next
|
||||
last_task_count INTEGER, -- Tasks created last run
|
||||
last_error TEXT -- Error message if failed
|
||||
);
|
||||
```
|
||||
|
||||
### worker_tasks (migration 074)
|
||||
|
||||
The task queue. Workers pull from here.
|
||||
|
||||
```sql
|
||||
CREATE TABLE worker_tasks (
|
||||
id SERIAL PRIMARY KEY,
|
||||
role task_role NOT NULL, -- What type of work
|
||||
dispensary_id INTEGER, -- Which store (if applicable)
|
||||
platform VARCHAR(50), -- Which platform
|
||||
status task_status DEFAULT 'pending',
|
||||
priority INTEGER DEFAULT 0, -- Higher = process first
|
||||
scheduled_for TIMESTAMP, -- Don't process before this time
|
||||
worker_id VARCHAR(100), -- Which worker claimed it
|
||||
claimed_at TIMESTAMP,
|
||||
started_at TIMESTAMP,
|
||||
completed_at TIMESTAMP,
|
||||
last_heartbeat_at TIMESTAMP, -- For stale detection
|
||||
result JSONB,
|
||||
error_message TEXT,
|
||||
retry_count INTEGER DEFAULT 0,
|
||||
max_retries INTEGER DEFAULT 3
|
||||
);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Default Schedules
|
||||
|
||||
| Name | Role | Interval | Priority | Description |
|
||||
|------|------|----------|----------|-------------|
|
||||
| `payload_fetch_all` | payload_fetch | 4 hours | 0 | Fetch payloads from Dutchie API (chains to product_refresh) |
|
||||
| `store_discovery_dutchie` | store_discovery | 24 hours | 5 | Find new Dutchie stores |
|
||||
| `analytics_refresh` | analytics_refresh | 6 hours | 0 | Refresh MVs |
|
||||
|
||||
---
|
||||
|
||||
## Task Roles
|
||||
|
||||
| Role | Description | Creates Tasks For |
|
||||
|------|-------------|-------------------|
|
||||
| `payload_fetch` | **NEW** - Fetch from Dutchie API, save to disk | Each dispensary needing crawl |
|
||||
| `product_refresh` | **CHANGED** - Read local payload, normalize, upsert to DB | Chained from payload_fetch |
|
||||
| `store_discovery` | Find new dispensaries, returns newStoreIds[] | Single task per platform |
|
||||
| `entry_point_discovery` | **DEPRECATED** - Resolve platform IDs | No longer used |
|
||||
| `product_discovery` | Initial product fetch for new stores | Chained from store_discovery |
|
||||
| `analytics_refresh` | Refresh MVs | Single global task |
|
||||
|
||||
### Payload/Refresh Separation (2024-12-10)
|
||||
|
||||
The crawl workflow is now split into two phases:
|
||||
|
||||
```
|
||||
payload_fetch (scheduled every 4h)
|
||||
└─► Hit Dutchie GraphQL API
|
||||
└─► Save raw JSON to /storage/payloads/{year}/{month}/{day}/store_{id}_{ts}.json.gz
|
||||
└─► Record metadata in raw_crawl_payloads table
|
||||
└─► Queue product_refresh task with payload_id
|
||||
|
||||
product_refresh (chained from payload_fetch)
|
||||
└─► Load payload from filesystem (NOT from API)
|
||||
└─► Normalize via DutchieNormalizer
|
||||
└─► Upsert to store_products
|
||||
└─► Create snapshots
|
||||
└─► Track missing products
|
||||
└─► Download images
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- **Retry-friendly**: If normalize fails, re-run product_refresh without re-crawling
|
||||
- **Replay-able**: Run product_refresh against any historical payload
|
||||
- **Faster refreshes**: Local file read vs network call
|
||||
- **Historical diffs**: Compare payloads to see what changed between crawls
|
||||
- **Less API pressure**: Only payload_fetch hits Dutchie
|
||||
|
||||
---
|
||||
|
||||
## Task Chaining
|
||||
|
||||
Tasks automatically queue follow-up tasks upon successful completion. This creates two main flows:
|
||||
|
||||
### Discovery Flow (New Stores)
|
||||
|
||||
When `store_discovery` finds new dispensaries, they automatically get their initial product data:
|
||||
|
||||
```
|
||||
store_discovery
|
||||
└─► Discovers new locations via Dutchie GraphQL
|
||||
└─► Auto-promotes valid locations to dispensaries table
|
||||
└─► Collects newDispensaryIds[] from promotions
|
||||
└─► Returns { newStoreIds: [...] } in result
|
||||
|
||||
chainNextTask() detects newStoreIds
|
||||
└─► Creates product_discovery task for each new store
|
||||
|
||||
product_discovery
|
||||
└─► Calls handlePayloadFetch() internally
|
||||
└─► payload_fetch hits Dutchie API
|
||||
└─► Saves raw JSON to /storage/payloads/
|
||||
└─► Queues product_refresh task with payload_id
|
||||
|
||||
product_refresh
|
||||
└─► Loads payload from filesystem
|
||||
└─► Normalizes and upserts to store_products
|
||||
└─► Creates snapshots, downloads images
|
||||
```
|
||||
|
||||
**Complete Discovery Chain:**
|
||||
```
|
||||
store_discovery → product_discovery → payload_fetch → product_refresh
|
||||
(internal call) (queues next)
|
||||
```
|
||||
|
||||
### Scheduled Flow (Existing Stores)
|
||||
|
||||
For existing stores, `payload_fetch_all` schedule runs every 4 hours:
|
||||
|
||||
```
|
||||
TaskScheduler (every 60s)
|
||||
└─► Checks task_schedules for due schedules
|
||||
└─► payload_fetch_all is due
|
||||
└─► Generates payload_fetch task for each dispensary
|
||||
|
||||
payload_fetch
|
||||
└─► Hits Dutchie GraphQL API
|
||||
└─► Saves raw JSON to /storage/payloads/
|
||||
└─► Queues product_refresh task with payload_id
|
||||
|
||||
product_refresh
|
||||
└─► Loads payload from filesystem (NOT API)
|
||||
└─► Normalizes via DutchieNormalizer
|
||||
└─► Upserts to store_products
|
||||
└─► Creates snapshots
|
||||
```
|
||||
|
||||
**Complete Scheduled Chain:**
|
||||
```
|
||||
payload_fetch → product_refresh
|
||||
(queues) (reads local)
|
||||
```
|
||||
|
||||
### Chaining Implementation
|
||||
|
||||
Task chaining is handled in two places:
|
||||
|
||||
1. **Internal chaining (handler calls handler):**
|
||||
- `product_discovery` calls `handlePayloadFetch()` directly
|
||||
|
||||
2. **External chaining (chainNextTask() in task-service.ts):**
|
||||
- Called after task completion
|
||||
- `store_discovery` → queues `product_discovery` for each newStoreId
|
||||
|
||||
3. **Queue-based chaining (taskService.createTask):**
|
||||
- `payload_fetch` queues `product_refresh` with `payload: { payload_id }`
|
||||
|
||||
---
|
||||
|
||||
## Payload API Endpoints
|
||||
|
||||
Raw crawl payloads can be accessed via the Payloads API:
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `GET /api/payloads` | GET | List payload metadata (paginated) |
|
||||
| `GET /api/payloads/:id` | GET | Get payload metadata by ID |
|
||||
| `GET /api/payloads/:id/data` | GET | Get full payload JSON (decompressed) |
|
||||
| `GET /api/payloads/store/:dispensaryId` | GET | List payloads for a store |
|
||||
| `GET /api/payloads/store/:dispensaryId/latest` | GET | Get latest payload for a store |
|
||||
| `GET /api/payloads/store/:dispensaryId/diff` | GET | Diff two payloads for changes |
|
||||
|
||||
### Payload Diff Response
|
||||
|
||||
The diff endpoint returns:
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"from": { "id": 123, "fetchedAt": "...", "productCount": 100 },
|
||||
"to": { "id": 456, "fetchedAt": "...", "productCount": 105 },
|
||||
"diff": {
|
||||
"added": 10,
|
||||
"removed": 5,
|
||||
"priceChanges": 8,
|
||||
"stockChanges": 12
|
||||
},
|
||||
"details": {
|
||||
"added": [...],
|
||||
"removed": [...],
|
||||
"priceChanges": [...],
|
||||
"stockChanges": [...]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Schedules (NEW)
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `GET /api/schedules` | GET | List all schedules |
|
||||
| `PUT /api/schedules/:id` | PUT | Update schedule |
|
||||
| `POST /api/schedules/:id/trigger` | POST | Run schedule immediately |
|
||||
|
||||
### Task Creation (rewired 2024-12-10)
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `POST /api/job-queue/enqueue` | POST | Create single task |
|
||||
| `POST /api/job-queue/enqueue-batch` | POST | Create batch tasks |
|
||||
| `POST /api/job-queue/enqueue-state` | POST | Create tasks for state |
|
||||
| `POST /api/tasks` | POST | Direct task creation |
|
||||
|
||||
### Task Management
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `GET /api/tasks` | GET | List tasks |
|
||||
| `GET /api/tasks/:id` | GET | Get single task |
|
||||
| `GET /api/tasks/counts` | GET | Task counts by status |
|
||||
| `POST /api/tasks/recover-stale` | POST | Recover stale tasks |
|
||||
|
||||
---
|
||||
|
||||
## Key Files
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `src/services/task-scheduler.ts` | **NEW** - DB-driven scheduler |
|
||||
| `src/tasks/task-worker.ts` | Worker that processes tasks |
|
||||
| `src/tasks/task-service.ts` | Task CRUD operations |
|
||||
| `src/tasks/handlers/payload-fetch.ts` | **NEW** - Fetches from API, saves to disk |
|
||||
| `src/tasks/handlers/product-refresh.ts` | **CHANGED** - Reads from disk, processes to DB |
|
||||
| `src/utils/payload-storage.ts` | **NEW** - Payload save/load utilities |
|
||||
| `src/routes/tasks.ts` | Task API endpoints |
|
||||
| `src/routes/job-queue.ts` | Job Queue UI endpoints (rewired) |
|
||||
| `migrations/079_task_schedules.sql` | Schedule table |
|
||||
| `migrations/080_raw_crawl_payloads.sql` | Payload metadata table |
|
||||
| `migrations/081_payload_fetch_columns.sql` | payload, last_fetch_at columns |
|
||||
| `migrations/074_worker_task_queue.sql` | Task queue table |
|
||||
|
||||
---
|
||||
|
||||
## Legacy Code (DEPRECATED)
|
||||
|
||||
| File | Status | Replacement |
|
||||
|------|--------|-------------|
|
||||
| `src/services/scheduler.ts` | DEPRECATED | `task-scheduler.ts` |
|
||||
| `dispensary_crawl_jobs` table | ORPHANED | `worker_tasks` |
|
||||
| `job_schedules` table | LEGACY | `task_schedules` |
|
||||
|
||||
---
|
||||
|
||||
## Dashboard Integration
|
||||
|
||||
Both pages remain wired to the dashboard:
|
||||
|
||||
| Page | Data Source | Actions |
|
||||
|------|-------------|---------|
|
||||
| **Job Queue** | `worker_tasks`, `task_schedules` | Create tasks, view schedules |
|
||||
| **Task Queue** | `worker_tasks` | View tasks, recover stale |
|
||||
|
||||
---
|
||||
|
||||
## Multi-Replica Safety
|
||||
|
||||
The scheduler uses `SELECT FOR UPDATE SKIP LOCKED` to ensure:
|
||||
|
||||
1. **Only one replica** executes a schedule at a time
|
||||
2. **No duplicate tasks** created
|
||||
3. **Survives pod restarts** - state in DB, not memory
|
||||
4. **Self-healing** - recovers stale tasks on startup
|
||||
|
||||
```sql
|
||||
-- This query is atomic across all API server replicas
|
||||
SELECT * FROM task_schedules
|
||||
WHERE enabled = true AND next_run_at <= NOW()
|
||||
FOR UPDATE SKIP LOCKED
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Worker Scaling (K8s)
|
||||
|
||||
Workers run as a StatefulSet in Kubernetes. You can scale from the admin UI or CLI.
|
||||
|
||||
### From Admin UI
|
||||
|
||||
The Workers page (`/admin/workers`) provides:
|
||||
- Current replica count display
|
||||
- Scale up/down buttons
|
||||
- Target replica input
|
||||
|
||||
### API Endpoints
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `GET /api/workers/k8s/replicas` | GET | Get current/desired replica counts |
|
||||
| `POST /api/workers/k8s/scale` | POST | Scale to N replicas (body: `{ replicas: N }`) |
|
||||
|
||||
### From CLI
|
||||
|
||||
```bash
|
||||
# View current replicas
|
||||
kubectl get statefulset scraper-worker -n dispensary-scraper
|
||||
|
||||
# Scale to 10 workers
|
||||
kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=10
|
||||
|
||||
# Scale down to 3 workers
|
||||
kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=3
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
Environment variables for the API server:
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `K8S_NAMESPACE` | `dispensary-scraper` | Kubernetes namespace |
|
||||
| `K8S_WORKER_STATEFULSET` | `scraper-worker` | StatefulSet name |
|
||||
|
||||
### RBAC Requirements
|
||||
|
||||
The API server pod needs these K8s permissions:
|
||||
|
||||
```yaml
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: worker-scaler
|
||||
namespace: dispensary-scraper
|
||||
rules:
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["statefulsets"]
|
||||
verbs: ["get", "patch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: scraper-worker-scaler
|
||||
namespace: dispensary-scraper
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: default
|
||||
namespace: dispensary-scraper
|
||||
roleRef:
|
||||
kind: Role
|
||||
name: worker-scaler
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
```
|
||||
@@ -362,148 +362,6 @@ SET status = 'pending', retry_count = retry_count + 1
|
||||
WHERE status = 'failed' AND retry_count < max_retries;
|
||||
```
|
||||
|
||||
## Concurrent Task Processing (Added 2024-12)
|
||||
|
||||
Workers can now process multiple tasks concurrently within a single worker instance. This improves throughput by utilizing async I/O efficiently.
|
||||
|
||||
### Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Pod (K8s) │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ TaskWorker │ │
|
||||
│ │ │ │
|
||||
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
|
||||
│ │ │ Task 1 │ │ Task 2 │ │ Task 3 │ (concurrent)│ │
|
||||
│ │ └─────────┘ └─────────┘ └─────────┘ │ │
|
||||
│ │ │ │
|
||||
│ │ Resource Monitor │ │
|
||||
│ │ ├── Memory: 65% (threshold: 85%) │ │
|
||||
│ │ ├── CPU: 45% (threshold: 90%) │ │
|
||||
│ │ └── Status: Normal │ │
|
||||
│ └─────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `MAX_CONCURRENT_TASKS` | 3 | Maximum tasks a worker will run concurrently |
|
||||
| `MEMORY_BACKOFF_THRESHOLD` | 0.85 | Back off when heap memory exceeds 85% |
|
||||
| `CPU_BACKOFF_THRESHOLD` | 0.90 | Back off when CPU exceeds 90% |
|
||||
| `BACKOFF_DURATION_MS` | 10000 | How long to wait when backing off (10s) |
|
||||
|
||||
### How It Works
|
||||
|
||||
1. **Main Loop**: Worker continuously tries to fill up to `MAX_CONCURRENT_TASKS`
|
||||
2. **Resource Monitoring**: Before claiming a new task, worker checks memory and CPU
|
||||
3. **Backoff**: If resources exceed thresholds, worker pauses and stops claiming new tasks
|
||||
4. **Concurrent Execution**: Tasks run in parallel using `Promise` - they don't block each other
|
||||
5. **Graceful Shutdown**: On SIGTERM/decommission, worker stops claiming but waits for active tasks
|
||||
|
||||
### Resource Monitoring
|
||||
|
||||
```typescript
|
||||
// ResourceStats interface
|
||||
interface ResourceStats {
|
||||
memoryPercent: number; // Current heap usage as decimal (0.0-1.0)
|
||||
memoryMb: number; // Current heap used in MB
|
||||
memoryTotalMb: number; // Total heap available in MB
|
||||
cpuPercent: number; // CPU usage as percentage (0-100)
|
||||
isBackingOff: boolean; // True if worker is in backoff state
|
||||
backoffReason: string; // Why the worker is backing off
|
||||
}
|
||||
```
|
||||
|
||||
### Heartbeat Data
|
||||
|
||||
Workers report the following in their heartbeat:
|
||||
|
||||
```json
|
||||
{
|
||||
"worker_id": "worker-abc123",
|
||||
"current_task_id": 456,
|
||||
"current_task_ids": [456, 457, 458],
|
||||
"active_task_count": 3,
|
||||
"max_concurrent_tasks": 3,
|
||||
"status": "active",
|
||||
"resources": {
|
||||
"memory_mb": 256,
|
||||
"memory_total_mb": 512,
|
||||
"memory_rss_mb": 320,
|
||||
"memory_percent": 50,
|
||||
"cpu_user_ms": 12500,
|
||||
"cpu_system_ms": 3200,
|
||||
"cpu_percent": 45,
|
||||
"is_backing_off": false,
|
||||
"backoff_reason": null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Backoff Behavior
|
||||
|
||||
When resources exceed thresholds:
|
||||
|
||||
1. Worker logs the backoff reason:
|
||||
```
|
||||
[TaskWorker] MyWorker backing off: Memory at 87.3% (threshold: 85%)
|
||||
```
|
||||
|
||||
2. Worker stops claiming new tasks but continues existing tasks
|
||||
|
||||
3. After `BACKOFF_DURATION_MS`, worker rechecks resources
|
||||
|
||||
4. When resources return to normal:
|
||||
```
|
||||
[TaskWorker] MyWorker resuming normal operation
|
||||
```
|
||||
|
||||
### UI Display
|
||||
|
||||
The Workers Dashboard shows:
|
||||
|
||||
- **Tasks Column**: `2/3 tasks` (active/max concurrent)
|
||||
- **Resources Column**: Memory % and CPU % with color coding
|
||||
- Green: < 50%
|
||||
- Yellow: 50-74%
|
||||
- Amber: 75-89%
|
||||
- Red: 90%+
|
||||
- **Backing Off**: Orange warning badge when worker is in backoff state
|
||||
|
||||
### Task Count Badge Details
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ Worker: "MyWorker" │
|
||||
│ Tasks: 2/3 tasks #456, #457 │
|
||||
│ Resources: 🧠 65% 💻 45% │
|
||||
│ Status: ● Active │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Best Practices
|
||||
|
||||
1. **Start Conservative**: Use `MAX_CONCURRENT_TASKS=3` initially
|
||||
2. **Monitor Resources**: Watch for frequent backoffs in logs
|
||||
3. **Tune Per Workload**: I/O-bound tasks benefit from higher concurrency
|
||||
4. **Scale Horizontally**: Add more pods rather than cranking concurrency too high
|
||||
|
||||
### Code References
|
||||
|
||||
| File | Purpose |
|
||||
|------|---------|
|
||||
| `src/tasks/task-worker.ts:68-71` | Concurrency environment variables |
|
||||
| `src/tasks/task-worker.ts:104-111` | ResourceStats interface |
|
||||
| `src/tasks/task-worker.ts:149-179` | getResourceStats() method |
|
||||
| `src/tasks/task-worker.ts:184-196` | shouldBackOff() method |
|
||||
| `src/tasks/task-worker.ts:462-516` | mainLoop() with concurrent claiming |
|
||||
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
|
||||
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Logs
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
-- Migration: Worker Commands Table
|
||||
-- Purpose: Store commands for workers (decommission, etc.)
|
||||
-- Workers poll this table after each task to check for commands
|
||||
|
||||
CREATE TABLE IF NOT EXISTS worker_commands (
|
||||
id SERIAL PRIMARY KEY,
|
||||
worker_id TEXT NOT NULL,
|
||||
command TEXT NOT NULL, -- 'decommission', 'pause', 'resume'
|
||||
reason TEXT,
|
||||
issued_by TEXT,
|
||||
issued_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
acknowledged_at TIMESTAMPTZ,
|
||||
executed_at TIMESTAMPTZ,
|
||||
status TEXT DEFAULT 'pending' -- 'pending', 'acknowledged', 'executed', 'cancelled'
|
||||
);
|
||||
|
||||
-- Index for worker lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_commands_worker_id ON worker_commands(worker_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_worker_commands_pending ON worker_commands(worker_id, status) WHERE status = 'pending';
|
||||
|
||||
-- Add decommission_requested column to worker_registry for quick checks
|
||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested BOOLEAN DEFAULT FALSE;
|
||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_reason TEXT;
|
||||
ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested_at TIMESTAMPTZ;
|
||||
|
||||
-- Comment
|
||||
COMMENT ON TABLE worker_commands IS 'Commands issued to workers (decommission after task, pause, etc.)';
|
||||
@@ -1,8 +0,0 @@
|
||||
-- Migration 078: Add consecutive_403_count to proxies table
|
||||
-- Per workflow-12102025.md: Track consecutive 403s per proxy
|
||||
-- After 3 consecutive 403s with different fingerprints → disable proxy
|
||||
|
||||
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS consecutive_403_count INTEGER DEFAULT 0;
|
||||
|
||||
-- Add comment explaining the column
|
||||
COMMENT ON COLUMN proxies.consecutive_403_count IS 'Tracks consecutive 403 blocks. Reset to 0 on success. Proxy disabled at 3.';
|
||||
@@ -1,49 +0,0 @@
|
||||
-- Migration 079: Task Schedules for Database-Driven Scheduler
|
||||
-- Per TASK_WORKFLOW_2024-12-10.md: Replaces node-cron with DB-driven scheduling
|
||||
--
|
||||
-- 2024-12-10: Created for reliable, multi-replica-safe task scheduling
|
||||
|
||||
-- task_schedules: Stores schedule definitions and state
|
||||
CREATE TABLE IF NOT EXISTS task_schedules (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(100) NOT NULL UNIQUE,
|
||||
role VARCHAR(50) NOT NULL, -- TaskRole: product_refresh, store_discovery, etc.
|
||||
description TEXT,
|
||||
|
||||
-- Schedule configuration
|
||||
enabled BOOLEAN DEFAULT TRUE,
|
||||
interval_hours INTEGER NOT NULL DEFAULT 4,
|
||||
priority INTEGER DEFAULT 0,
|
||||
|
||||
-- Optional scope filters
|
||||
state_code VARCHAR(2), -- NULL = all states
|
||||
platform VARCHAR(50), -- NULL = all platforms
|
||||
|
||||
-- Execution state (updated by scheduler)
|
||||
last_run_at TIMESTAMPTZ,
|
||||
next_run_at TIMESTAMPTZ,
|
||||
last_task_count INTEGER DEFAULT 0,
|
||||
last_error TEXT,
|
||||
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Indexes for scheduler queries
|
||||
CREATE INDEX IF NOT EXISTS idx_task_schedules_enabled ON task_schedules(enabled) WHERE enabled = TRUE;
|
||||
CREATE INDEX IF NOT EXISTS idx_task_schedules_next_run ON task_schedules(next_run_at) WHERE enabled = TRUE;
|
||||
|
||||
-- Insert default schedules
|
||||
INSERT INTO task_schedules (name, role, interval_hours, priority, description, next_run_at)
|
||||
VALUES
|
||||
('product_refresh_all', 'product_refresh', 4, 0, 'Generate product refresh tasks for all crawl-enabled stores every 4 hours', NOW()),
|
||||
('store_discovery_dutchie', 'store_discovery', 24, 5, 'Discover new Dutchie stores daily', NOW()),
|
||||
('analytics_refresh', 'analytics_refresh', 6, 0, 'Refresh analytics materialized views every 6 hours', NOW())
|
||||
ON CONFLICT (name) DO NOTHING;
|
||||
|
||||
-- Comment for documentation
|
||||
COMMENT ON TABLE task_schedules IS 'Database-driven task scheduler configuration. Per TASK_WORKFLOW_2024-12-10.md:
|
||||
- Schedules persist in DB (survive restarts)
|
||||
- Uses SELECT FOR UPDATE SKIP LOCKED for multi-replica safety
|
||||
- Scheduler polls every 60s and executes due schedules
|
||||
- Creates tasks in worker_tasks for task-worker.ts to process';
|
||||
@@ -1,58 +0,0 @@
|
||||
-- Migration 080: Raw Crawl Payloads Metadata Table
|
||||
-- Per TASK_WORKFLOW_2024-12-10.md: Store full GraphQL payloads for historical analysis
|
||||
--
|
||||
-- Design Pattern: Metadata/Payload Separation
|
||||
-- - Metadata (this table): Small, indexed, queryable
|
||||
-- - Payload (filesystem): Gzipped JSON at storage_path
|
||||
--
|
||||
-- Benefits:
|
||||
-- - Compare any two crawls to see what changed
|
||||
-- - Replay/re-normalize historical data if logic changes
|
||||
-- - Debug issues by seeing exactly what the API returned
|
||||
-- - DB stays small, backups stay fast
|
||||
--
|
||||
-- Storage location: /storage/payloads/{year}/{month}/{day}/store_{id}_{timestamp}.json.gz
|
||||
-- Compression: ~90% reduction (1.5MB -> 150KB per crawl)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS raw_crawl_payloads (
|
||||
id SERIAL PRIMARY KEY,
|
||||
|
||||
-- Links to crawl tracking
|
||||
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
|
||||
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
|
||||
|
||||
-- File location (gzipped JSON)
|
||||
storage_path TEXT NOT NULL,
|
||||
|
||||
-- Metadata for quick queries without loading file
|
||||
product_count INTEGER NOT NULL DEFAULT 0,
|
||||
size_bytes INTEGER, -- Compressed size
|
||||
size_bytes_raw INTEGER, -- Uncompressed size
|
||||
|
||||
-- Timestamps
|
||||
fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
-- Optional: checksum for integrity verification
|
||||
checksum_sha256 VARCHAR(64)
|
||||
);
|
||||
|
||||
-- Indexes for common queries
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary
|
||||
ON raw_crawl_payloads(dispensary_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary_fetched
|
||||
ON raw_crawl_payloads(dispensary_id, fetched_at DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_fetched
|
||||
ON raw_crawl_payloads(fetched_at DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_crawl_run
|
||||
ON raw_crawl_payloads(crawl_run_id)
|
||||
WHERE crawl_run_id IS NOT NULL;
|
||||
|
||||
-- Comments
|
||||
COMMENT ON TABLE raw_crawl_payloads IS 'Metadata for raw GraphQL payloads stored on filesystem. Per TASK_WORKFLOW_2024-12-10.md: Full payloads enable historical diffs and replay.';
|
||||
COMMENT ON COLUMN raw_crawl_payloads.storage_path IS 'Path to gzipped JSON file, e.g. /storage/payloads/2024/12/10/store_123_1702234567.json.gz';
|
||||
COMMENT ON COLUMN raw_crawl_payloads.size_bytes IS 'Compressed file size in bytes';
|
||||
COMMENT ON COLUMN raw_crawl_payloads.size_bytes_raw IS 'Uncompressed payload size in bytes';
|
||||
@@ -1,37 +0,0 @@
|
||||
-- Migration 081: Payload Fetch Columns
|
||||
-- Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing
|
||||
--
|
||||
-- New architecture:
|
||||
-- - payload_fetch: Hits Dutchie API, saves raw payload to disk
|
||||
-- - product_refresh: Reads local payload, normalizes, upserts to DB
|
||||
--
|
||||
-- This migration adds:
|
||||
-- 1. payload column to worker_tasks (for task chaining data)
|
||||
-- 2. processed_at column to raw_crawl_payloads (track when payload was processed)
|
||||
-- 3. last_fetch_at column to dispensaries (track when last payload was fetched)
|
||||
|
||||
-- Add payload column to worker_tasks for task chaining
|
||||
-- Used by payload_fetch to pass payload_id to product_refresh
|
||||
ALTER TABLE worker_tasks
|
||||
ADD COLUMN IF NOT EXISTS payload JSONB DEFAULT NULL;
|
||||
|
||||
COMMENT ON COLUMN worker_tasks.payload IS 'Per TASK_WORKFLOW_2024-12-10.md: Task chaining data (e.g., payload_id from payload_fetch to product_refresh)';
|
||||
|
||||
-- Add processed_at to raw_crawl_payloads
|
||||
-- Tracks when the payload was processed by product_refresh
|
||||
ALTER TABLE raw_crawl_payloads
|
||||
ADD COLUMN IF NOT EXISTS processed_at TIMESTAMPTZ DEFAULT NULL;
|
||||
|
||||
COMMENT ON COLUMN raw_crawl_payloads.processed_at IS 'When this payload was processed by product_refresh handler';
|
||||
|
||||
-- Index for finding unprocessed payloads
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_unprocessed
|
||||
ON raw_crawl_payloads(dispensary_id, fetched_at DESC)
|
||||
WHERE processed_at IS NULL;
|
||||
|
||||
-- Add last_fetch_at to dispensaries
|
||||
-- Tracks when the last payload was fetched (separate from last_crawl_at which is when processing completed)
|
||||
ALTER TABLE dispensaries
|
||||
ADD COLUMN IF NOT EXISTS last_fetch_at TIMESTAMPTZ DEFAULT NULL;
|
||||
|
||||
COMMENT ON COLUMN dispensaries.last_fetch_at IS 'Per TASK_WORKFLOW_2024-12-10.md: When last payload was fetched from API (separate from last_crawl_at which is when processing completed)';
|
||||
@@ -1,27 +0,0 @@
|
||||
-- Migration: 082_proxy_notification_trigger
|
||||
-- Date: 2024-12-11
|
||||
-- Description: Add PostgreSQL NOTIFY trigger to alert workers when proxies are added
|
||||
|
||||
-- Create function to notify workers when active proxy is added/activated
|
||||
CREATE OR REPLACE FUNCTION notify_proxy_added()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
-- Only notify if proxy is active
|
||||
IF NEW.active = true THEN
|
||||
PERFORM pg_notify('proxy_added', NEW.id::text);
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Drop existing trigger if any
|
||||
DROP TRIGGER IF EXISTS proxy_added_trigger ON proxies;
|
||||
|
||||
-- Create trigger on insert and update of active column
|
||||
CREATE TRIGGER proxy_added_trigger
|
||||
AFTER INSERT OR UPDATE OF active ON proxies
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION notify_proxy_added();
|
||||
|
||||
COMMENT ON FUNCTION notify_proxy_added() IS
|
||||
'Sends PostgreSQL NOTIFY to proxy_added channel when an active proxy is added or activated. Workers LISTEN on this channel to wake up immediately.';
|
||||
@@ -1,88 +0,0 @@
|
||||
-- Migration 083: Discovery Run Tracking
|
||||
-- Tracks progress of store discovery runs step-by-step
|
||||
|
||||
-- Main discovery runs table
|
||||
CREATE TABLE IF NOT EXISTS discovery_runs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
platform VARCHAR(50) NOT NULL DEFAULT 'dutchie',
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
task_id INTEGER REFERENCES worker_task_queue(id),
|
||||
|
||||
-- Totals
|
||||
states_total INTEGER DEFAULT 0,
|
||||
states_completed INTEGER DEFAULT 0,
|
||||
locations_discovered INTEGER DEFAULT 0,
|
||||
locations_promoted INTEGER DEFAULT 0,
|
||||
new_store_ids INTEGER[] DEFAULT '{}',
|
||||
|
||||
-- Error info
|
||||
error_message TEXT,
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Per-state progress within a run
|
||||
CREATE TABLE IF NOT EXISTS discovery_run_states (
|
||||
id SERIAL PRIMARY KEY,
|
||||
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
||||
state_code VARCHAR(2) NOT NULL,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, completed, failed
|
||||
started_at TIMESTAMPTZ,
|
||||
finished_at TIMESTAMPTZ,
|
||||
|
||||
-- Results
|
||||
cities_found INTEGER DEFAULT 0,
|
||||
locations_found INTEGER DEFAULT 0,
|
||||
locations_upserted INTEGER DEFAULT 0,
|
||||
new_dispensary_ids INTEGER[] DEFAULT '{}',
|
||||
|
||||
-- Error info
|
||||
error_message TEXT,
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
|
||||
UNIQUE(run_id, state_code)
|
||||
);
|
||||
|
||||
-- Step-by-step log for detailed progress tracking
|
||||
CREATE TABLE IF NOT EXISTS discovery_run_steps (
|
||||
id SERIAL PRIMARY KEY,
|
||||
run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
|
||||
state_code VARCHAR(2),
|
||||
step_name VARCHAR(100) NOT NULL,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'started', -- started, completed, failed
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
finished_at TIMESTAMPTZ,
|
||||
|
||||
-- Details (JSON for flexibility)
|
||||
details JSONB DEFAULT '{}',
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- Indexes for querying
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_status ON discovery_runs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_platform ON discovery_runs(platform);
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_runs_started_at ON discovery_runs(started_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_run_states_run_id ON discovery_run_states(run_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_discovery_run_steps_run_id ON discovery_run_steps(run_id);
|
||||
|
||||
-- View for latest run status per platform
|
||||
CREATE OR REPLACE VIEW v_latest_discovery_runs AS
|
||||
SELECT DISTINCT ON (platform)
|
||||
id,
|
||||
platform,
|
||||
status,
|
||||
started_at,
|
||||
finished_at,
|
||||
states_total,
|
||||
states_completed,
|
||||
locations_discovered,
|
||||
locations_promoted,
|
||||
array_length(new_store_ids, 1) as new_stores_count,
|
||||
error_message,
|
||||
EXTRACT(EPOCH FROM (COALESCE(finished_at, NOW()) - started_at)) as duration_seconds
|
||||
FROM discovery_runs
|
||||
ORDER BY platform, started_at DESC;
|
||||
286
backend/node_modules/.package-lock.json
generated
vendored
286
backend/node_modules/.package-lock.json
generated
vendored
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "dutchie-menus-backend",
|
||||
"version": "1.6.0",
|
||||
"version": "1.5.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
@@ -46,97 +46,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
||||
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
||||
},
|
||||
"node_modules/@jsep-plugin/assignment": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
|
||||
"integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
|
||||
"engines": {
|
||||
"node": ">= 10.16.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"jsep": "^0.4.0||^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@jsep-plugin/regex": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
|
||||
"integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
|
||||
"engines": {
|
||||
"node": ">= 10.16.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"jsep": "^0.4.0||^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@kubernetes/client-node": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
|
||||
"integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
|
||||
"dependencies": {
|
||||
"@types/js-yaml": "^4.0.1",
|
||||
"@types/node": "^24.0.0",
|
||||
"@types/node-fetch": "^2.6.13",
|
||||
"@types/stream-buffers": "^3.0.3",
|
||||
"form-data": "^4.0.0",
|
||||
"hpagent": "^1.2.0",
|
||||
"isomorphic-ws": "^5.0.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
"jsonpath-plus": "^10.3.0",
|
||||
"node-fetch": "^2.7.0",
|
||||
"openid-client": "^6.1.3",
|
||||
"rfc4648": "^1.3.0",
|
||||
"socks-proxy-agent": "^8.0.4",
|
||||
"stream-buffers": "^3.0.2",
|
||||
"tar-fs": "^3.0.9",
|
||||
"ws": "^8.18.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@kubernetes/client-node/node_modules/@types/node": {
|
||||
"version": "24.10.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
|
||||
"integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
|
||||
"dependencies": {
|
||||
"undici-types": "~7.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@kubernetes/client-node/node_modules/tar-fs": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
|
||||
"integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
|
||||
"dependencies": {
|
||||
"pump": "^3.0.0",
|
||||
"tar-stream": "^3.1.5"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"bare-fs": "^4.0.1",
|
||||
"bare-path": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@kubernetes/client-node/node_modules/undici-types": {
|
||||
"version": "7.16.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
|
||||
},
|
||||
"node_modules/@kubernetes/client-node/node_modules/ws": {
|
||||
"version": "8.18.3",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
||||
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bufferutil": "^4.0.1",
|
||||
"utf-8-validate": ">=5.0.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bufferutil": {
|
||||
"optional": true
|
||||
},
|
||||
"utf-8-validate": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@mapbox/node-pre-gyp": {
|
||||
"version": "1.0.11",
|
||||
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
||||
@@ -342,11 +251,6 @@
|
||||
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/js-yaml": {
|
||||
"version": "4.0.9",
|
||||
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
||||
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
|
||||
},
|
||||
"node_modules/@types/jsonwebtoken": {
|
||||
"version": "9.0.10",
|
||||
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
||||
@@ -372,6 +276,7 @@
|
||||
"version": "20.19.25",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
||||
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
||||
"devOptional": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
}
|
||||
@@ -382,15 +287,6 @@
|
||||
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/node-fetch": {
|
||||
"version": "2.6.13",
|
||||
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
|
||||
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
|
||||
"dependencies": {
|
||||
"@types/node": "*",
|
||||
"form-data": "^4.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/pg": {
|
||||
"version": "8.15.6",
|
||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
||||
@@ -444,14 +340,6 @@
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/stream-buffers": {
|
||||
"version": "3.0.8",
|
||||
"resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
|
||||
"integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
|
||||
"dependencies": {
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/uuid": {
|
||||
"version": "9.0.8",
|
||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||
@@ -632,78 +520,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-fs": {
|
||||
"version": "4.5.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
|
||||
"integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-events": "^2.5.4",
|
||||
"bare-path": "^3.0.0",
|
||||
"bare-stream": "^2.6.4",
|
||||
"bare-url": "^2.2.2",
|
||||
"fast-fifo": "^1.3.2"
|
||||
},
|
||||
"engines": {
|
||||
"bare": ">=1.16.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bare-buffer": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bare-buffer": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-os": {
|
||||
"version": "3.6.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
|
||||
"integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
"bare": ">=1.14.0"
|
||||
}
|
||||
},
|
||||
"node_modules/bare-path": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
|
||||
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-os": "^3.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/bare-stream": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
|
||||
"integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"streamx": "^2.21.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bare-buffer": "*",
|
||||
"bare-events": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bare-buffer": {
|
||||
"optional": true
|
||||
},
|
||||
"bare-events": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-url": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
|
||||
"integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-path": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/base64-js": {
|
||||
"version": "1.5.1",
|
||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||
@@ -2203,14 +2019,6 @@
|
||||
"node": ">=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/hpagent": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
|
||||
"integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/htmlparser2": {
|
||||
"version": "10.0.0",
|
||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
||||
@@ -2574,22 +2382,6 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/isomorphic-ws": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
|
||||
"integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
|
||||
"peerDependencies": {
|
||||
"ws": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/jose": {
|
||||
"version": "6.1.3",
|
||||
"resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
|
||||
"integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/panva"
|
||||
}
|
||||
},
|
||||
"node_modules/js-tokens": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||
@@ -2606,14 +2398,6 @@
|
||||
"js-yaml": "bin/js-yaml.js"
|
||||
}
|
||||
},
|
||||
"node_modules/jsep": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
|
||||
"integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
|
||||
"engines": {
|
||||
"node": ">= 10.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/json-parse-even-better-errors": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
||||
@@ -2635,23 +2419,6 @@
|
||||
"graceful-fs": "^4.1.6"
|
||||
}
|
||||
},
|
||||
"node_modules/jsonpath-plus": {
|
||||
"version": "10.3.0",
|
||||
"resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
|
||||
"integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
|
||||
"dependencies": {
|
||||
"@jsep-plugin/assignment": "^1.3.0",
|
||||
"@jsep-plugin/regex": "^1.0.4",
|
||||
"jsep": "^1.4.0"
|
||||
},
|
||||
"bin": {
|
||||
"jsonpath": "bin/jsonpath-cli.js",
|
||||
"jsonpath-plus": "bin/jsonpath-cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/jsonwebtoken": {
|
||||
"version": "9.0.2",
|
||||
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
||||
@@ -2726,11 +2493,6 @@
|
||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
||||
},
|
||||
"node_modules/lodash.clonedeep": {
|
||||
"version": "4.5.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
|
||||
"integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
|
||||
},
|
||||
"node_modules/lodash.defaults": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||
@@ -3180,14 +2942,6 @@
|
||||
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/oauth4webapi": {
|
||||
"version": "3.8.3",
|
||||
"resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
|
||||
"integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/panva"
|
||||
}
|
||||
},
|
||||
"node_modules/object-assign": {
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||
@@ -3226,18 +2980,6 @@
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/openid-client": {
|
||||
"version": "6.8.1",
|
||||
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
|
||||
"integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
|
||||
"dependencies": {
|
||||
"jose": "^6.1.0",
|
||||
"oauth4webapi": "^3.8.2"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/panva"
|
||||
}
|
||||
},
|
||||
"node_modules/pac-proxy-agent": {
|
||||
"version": "7.2.0",
|
||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||
@@ -4141,11 +3883,6 @@
|
||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/rfc4648": {
|
||||
"version": "1.5.4",
|
||||
"resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
|
||||
"integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
|
||||
},
|
||||
"node_modules/rimraf": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
||||
@@ -4576,14 +4313,6 @@
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/stream-buffers": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
|
||||
"integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
|
||||
"engines": {
|
||||
"node": ">= 0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/streamx": {
|
||||
"version": "2.23.0",
|
||||
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
||||
@@ -4803,7 +4532,8 @@
|
||||
"node_modules/undici-types": {
|
||||
"version": "6.21.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
|
||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
||||
"devOptional": true
|
||||
},
|
||||
"node_modules/universalify": {
|
||||
"version": "2.0.1",
|
||||
@@ -4826,14 +4556,6 @@
|
||||
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
||||
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
||||
},
|
||||
"node_modules/user-agents": {
|
||||
"version": "1.1.669",
|
||||
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
|
||||
"integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
|
||||
"dependencies": {
|
||||
"lodash.clonedeep": "^4.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/util": {
|
||||
"version": "0.12.5",
|
||||
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
||||
|
||||
290
backend/package-lock.json
generated
290
backend/package-lock.json
generated
@@ -1,14 +1,13 @@
|
||||
{
|
||||
"name": "dutchie-menus-backend",
|
||||
"version": "1.6.0",
|
||||
"version": "1.5.1",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "dutchie-menus-backend",
|
||||
"version": "1.6.0",
|
||||
"version": "1.5.1",
|
||||
"dependencies": {
|
||||
"@kubernetes/client-node": "^1.4.0",
|
||||
"@types/bcryptjs": "^3.0.0",
|
||||
"axios": "^1.6.2",
|
||||
"bcrypt": "^5.1.1",
|
||||
@@ -35,7 +34,6 @@
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"sharp": "^0.32.0",
|
||||
"socks-proxy-agent": "^8.0.2",
|
||||
"user-agents": "^1.1.669",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.22.4"
|
||||
},
|
||||
@@ -494,97 +492,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
||||
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
||||
},
|
||||
"node_modules/@jsep-plugin/assignment": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
|
||||
"integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
|
||||
"engines": {
|
||||
"node": ">= 10.16.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"jsep": "^0.4.0||^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@jsep-plugin/regex": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
|
||||
"integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
|
||||
"engines": {
|
||||
"node": ">= 10.16.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"jsep": "^0.4.0||^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@kubernetes/client-node": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
|
||||
"integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
|
||||
"dependencies": {
|
||||
"@types/js-yaml": "^4.0.1",
|
||||
"@types/node": "^24.0.0",
|
||||
"@types/node-fetch": "^2.6.13",
|
||||
"@types/stream-buffers": "^3.0.3",
|
||||
"form-data": "^4.0.0",
|
||||
"hpagent": "^1.2.0",
|
||||
"isomorphic-ws": "^5.0.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
"jsonpath-plus": "^10.3.0",
|
||||
"node-fetch": "^2.7.0",
|
||||
"openid-client": "^6.1.3",
|
||||
"rfc4648": "^1.3.0",
|
||||
"socks-proxy-agent": "^8.0.4",
|
||||
"stream-buffers": "^3.0.2",
|
||||
"tar-fs": "^3.0.9",
|
||||
"ws": "^8.18.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@kubernetes/client-node/node_modules/@types/node": {
|
||||
"version": "24.10.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
|
||||
"integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
|
||||
"dependencies": {
|
||||
"undici-types": "~7.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@kubernetes/client-node/node_modules/tar-fs": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
|
||||
"integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
|
||||
"dependencies": {
|
||||
"pump": "^3.0.0",
|
||||
"tar-stream": "^3.1.5"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"bare-fs": "^4.0.1",
|
||||
"bare-path": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@kubernetes/client-node/node_modules/undici-types": {
|
||||
"version": "7.16.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
|
||||
},
|
||||
"node_modules/@kubernetes/client-node/node_modules/ws": {
|
||||
"version": "8.18.3",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
||||
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bufferutil": "^4.0.1",
|
||||
"utf-8-validate": ">=5.0.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bufferutil": {
|
||||
"optional": true
|
||||
},
|
||||
"utf-8-validate": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@mapbox/node-pre-gyp": {
|
||||
"version": "1.0.11",
|
||||
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
||||
@@ -850,11 +757,6 @@
|
||||
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/js-yaml": {
|
||||
"version": "4.0.9",
|
||||
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
||||
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
|
||||
},
|
||||
"node_modules/@types/jsonwebtoken": {
|
||||
"version": "9.0.10",
|
||||
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
||||
@@ -880,6 +782,7 @@
|
||||
"version": "20.19.25",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
||||
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
||||
"devOptional": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
}
|
||||
@@ -890,15 +793,6 @@
|
||||
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/node-fetch": {
|
||||
"version": "2.6.13",
|
||||
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
|
||||
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
|
||||
"dependencies": {
|
||||
"@types/node": "*",
|
||||
"form-data": "^4.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/pg": {
|
||||
"version": "8.15.6",
|
||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
||||
@@ -952,14 +846,6 @@
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/stream-buffers": {
|
||||
"version": "3.0.8",
|
||||
"resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
|
||||
"integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
|
||||
"dependencies": {
|
||||
"@types/node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/uuid": {
|
||||
"version": "9.0.8",
|
||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||
@@ -1140,78 +1026,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-fs": {
|
||||
"version": "4.5.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
|
||||
"integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-events": "^2.5.4",
|
||||
"bare-path": "^3.0.0",
|
||||
"bare-stream": "^2.6.4",
|
||||
"bare-url": "^2.2.2",
|
||||
"fast-fifo": "^1.3.2"
|
||||
},
|
||||
"engines": {
|
||||
"bare": ">=1.16.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bare-buffer": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bare-buffer": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-os": {
|
||||
"version": "3.6.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
|
||||
"integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
"bare": ">=1.14.0"
|
||||
}
|
||||
},
|
||||
"node_modules/bare-path": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
|
||||
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-os": "^3.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/bare-stream": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
|
||||
"integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"streamx": "^2.21.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bare-buffer": "*",
|
||||
"bare-events": "*"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bare-buffer": {
|
||||
"optional": true
|
||||
},
|
||||
"bare-events": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/bare-url": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
|
||||
"integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"bare-path": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/base64-js": {
|
||||
"version": "1.5.1",
|
||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||
@@ -2725,14 +2539,6 @@
|
||||
"node": ">=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/hpagent": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
|
||||
"integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/htmlparser2": {
|
||||
"version": "10.0.0",
|
||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
||||
@@ -3096,22 +2902,6 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/isomorphic-ws": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
|
||||
"integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
|
||||
"peerDependencies": {
|
||||
"ws": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/jose": {
|
||||
"version": "6.1.3",
|
||||
"resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
|
||||
"integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/panva"
|
||||
}
|
||||
},
|
||||
"node_modules/js-tokens": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||
@@ -3128,14 +2918,6 @@
|
||||
"js-yaml": "bin/js-yaml.js"
|
||||
}
|
||||
},
|
||||
"node_modules/jsep": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
|
||||
"integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
|
||||
"engines": {
|
||||
"node": ">= 10.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/json-parse-even-better-errors": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
||||
@@ -3157,23 +2939,6 @@
|
||||
"graceful-fs": "^4.1.6"
|
||||
}
|
||||
},
|
||||
"node_modules/jsonpath-plus": {
|
||||
"version": "10.3.0",
|
||||
"resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
|
||||
"integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
|
||||
"dependencies": {
|
||||
"@jsep-plugin/assignment": "^1.3.0",
|
||||
"@jsep-plugin/regex": "^1.0.4",
|
||||
"jsep": "^1.4.0"
|
||||
},
|
||||
"bin": {
|
||||
"jsonpath": "bin/jsonpath-cli.js",
|
||||
"jsonpath-plus": "bin/jsonpath-cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/jsonwebtoken": {
|
||||
"version": "9.0.2",
|
||||
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
||||
@@ -3248,11 +3013,6 @@
|
||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
||||
},
|
||||
"node_modules/lodash.clonedeep": {
|
||||
"version": "4.5.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
|
||||
"integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
|
||||
},
|
||||
"node_modules/lodash.defaults": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||
@@ -3702,14 +3462,6 @@
|
||||
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/oauth4webapi": {
|
||||
"version": "3.8.3",
|
||||
"resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
|
||||
"integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/panva"
|
||||
}
|
||||
},
|
||||
"node_modules/object-assign": {
|
||||
"version": "4.1.1",
|
||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||
@@ -3748,18 +3500,6 @@
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/openid-client": {
|
||||
"version": "6.8.1",
|
||||
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
|
||||
"integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
|
||||
"dependencies": {
|
||||
"jose": "^6.1.0",
|
||||
"oauth4webapi": "^3.8.2"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/panva"
|
||||
}
|
||||
},
|
||||
"node_modules/pac-proxy-agent": {
|
||||
"version": "7.2.0",
|
||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||
@@ -4676,11 +4416,6 @@
|
||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/rfc4648": {
|
||||
"version": "1.5.4",
|
||||
"resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
|
||||
"integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
|
||||
},
|
||||
"node_modules/rimraf": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
||||
@@ -5111,14 +4846,6 @@
|
||||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/stream-buffers": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
|
||||
"integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
|
||||
"engines": {
|
||||
"node": ">= 0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/streamx": {
|
||||
"version": "2.23.0",
|
||||
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
||||
@@ -5338,7 +5065,8 @@
|
||||
"node_modules/undici-types": {
|
||||
"version": "6.21.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
|
||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
||||
"devOptional": true
|
||||
},
|
||||
"node_modules/universalify": {
|
||||
"version": "2.0.1",
|
||||
@@ -5361,14 +5089,6 @@
|
||||
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
||||
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
||||
},
|
||||
"node_modules/user-agents": {
|
||||
"version": "1.1.669",
|
||||
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
|
||||
"integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
|
||||
"dependencies": {
|
||||
"lodash.clonedeep": "^4.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/util": {
|
||||
"version": "0.12.5",
|
||||
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "dutchie-menus-backend",
|
||||
"version": "1.6.0",
|
||||
"version": "1.5.1",
|
||||
"description": "Backend API for Dutchie Menus scraper and management",
|
||||
"main": "dist/index.js",
|
||||
"scripts": {
|
||||
@@ -22,7 +22,6 @@
|
||||
"seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@kubernetes/client-node": "^1.4.0",
|
||||
"@types/bcryptjs": "^3.0.0",
|
||||
"axios": "^1.6.2",
|
||||
"bcrypt": "^5.1.1",
|
||||
@@ -49,7 +48,6 @@
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"sharp": "^0.32.0",
|
||||
"socks-proxy-agent": "^8.0.2",
|
||||
"user-agents": "^1.1.669",
|
||||
"uuid": "^9.0.1",
|
||||
"zod": "^3.22.4"
|
||||
},
|
||||
|
||||
Binary file not shown.
@@ -1 +0,0 @@
|
||||
cannaiq-menus-1.6.0.zip
|
||||
@@ -32,7 +32,6 @@ const TRUSTED_ORIGINS = [
|
||||
// Pattern-based trusted origins (wildcards)
|
||||
const TRUSTED_ORIGIN_PATTERNS = [
|
||||
/^https:\/\/.*\.cannabrands\.app$/, // *.cannabrands.app
|
||||
/^https:\/\/.*\.cannaiq\.co$/, // *.cannaiq.co
|
||||
];
|
||||
|
||||
// Trusted IPs for internal pod-to-pod communication
|
||||
@@ -153,53 +152,7 @@ export async function authenticateUser(email: string, password: string): Promise
|
||||
}
|
||||
|
||||
export async function authMiddleware(req: AuthRequest, res: Response, next: NextFunction) {
|
||||
const authHeader = req.headers.authorization;
|
||||
|
||||
// If a Bearer token is provided, always try to use it first (logged-in user)
|
||||
if (authHeader && authHeader.startsWith('Bearer ')) {
|
||||
const token = authHeader.substring(7);
|
||||
|
||||
// Try JWT first
|
||||
const jwtUser = verifyToken(token);
|
||||
|
||||
if (jwtUser) {
|
||||
req.user = jwtUser;
|
||||
return next();
|
||||
}
|
||||
|
||||
// If JWT fails, try API token
|
||||
try {
|
||||
const result = await pool.query(`
|
||||
SELECT id, name, rate_limit, active, expires_at, allowed_endpoints
|
||||
FROM api_tokens
|
||||
WHERE token = $1
|
||||
`, [token]);
|
||||
|
||||
if (result.rows.length > 0) {
|
||||
const apiToken = result.rows[0];
|
||||
if (!apiToken.active) {
|
||||
return res.status(401).json({ error: 'API token is inactive' });
|
||||
}
|
||||
if (apiToken.expires_at && new Date(apiToken.expires_at) < new Date()) {
|
||||
return res.status(401).json({ error: 'API token has expired' });
|
||||
}
|
||||
req.user = {
|
||||
id: 0,
|
||||
email: `api:${apiToken.name}`,
|
||||
role: 'api_token'
|
||||
};
|
||||
req.apiToken = apiToken;
|
||||
return next();
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('API token lookup error:', err);
|
||||
}
|
||||
|
||||
// Token provided but invalid
|
||||
return res.status(401).json({ error: 'Invalid token' });
|
||||
}
|
||||
|
||||
// No token provided - check trusted origins for API access (WordPress, etc.)
|
||||
// Allow trusted origins/IPs to bypass auth (internal services, same-origin)
|
||||
if (isTrustedRequest(req)) {
|
||||
req.user = {
|
||||
id: 0,
|
||||
@@ -209,9 +162,79 @@ export async function authMiddleware(req: AuthRequest, res: Response, next: Next
|
||||
return next();
|
||||
}
|
||||
|
||||
return res.status(401).json({ error: 'No token provided' });
|
||||
}
|
||||
const authHeader = req.headers.authorization;
|
||||
|
||||
if (!authHeader || !authHeader.startsWith('Bearer ')) {
|
||||
return res.status(401).json({ error: 'No token provided' });
|
||||
}
|
||||
|
||||
const token = authHeader.substring(7);
|
||||
|
||||
// Try JWT first
|
||||
const jwtUser = verifyToken(token);
|
||||
|
||||
if (jwtUser) {
|
||||
req.user = jwtUser;
|
||||
return next();
|
||||
}
|
||||
|
||||
// If JWT fails, try API token
|
||||
try {
|
||||
const result = await pool.query(`
|
||||
SELECT id, name, rate_limit, active, expires_at, allowed_endpoints
|
||||
FROM api_tokens
|
||||
WHERE token = $1
|
||||
`, [token]);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(401).json({ error: 'Invalid token' });
|
||||
}
|
||||
|
||||
const apiToken = result.rows[0];
|
||||
|
||||
// Check if token is active
|
||||
if (!apiToken.active) {
|
||||
return res.status(401).json({ error: 'Token is disabled' });
|
||||
}
|
||||
|
||||
// Check if token is expired
|
||||
if (apiToken.expires_at && new Date(apiToken.expires_at) < new Date()) {
|
||||
return res.status(401).json({ error: 'Token has expired' });
|
||||
}
|
||||
|
||||
// Check allowed endpoints
|
||||
if (apiToken.allowed_endpoints && apiToken.allowed_endpoints.length > 0) {
|
||||
const isAllowed = apiToken.allowed_endpoints.some((pattern: string) => {
|
||||
// Simple wildcard matching
|
||||
const regex = new RegExp('^' + pattern.replace('*', '.*') + '$');
|
||||
return regex.test(req.path);
|
||||
});
|
||||
|
||||
if (!isAllowed) {
|
||||
return res.status(403).json({ error: 'Endpoint not allowed for this token' });
|
||||
}
|
||||
}
|
||||
|
||||
// Set API token on request for tracking
|
||||
req.apiToken = {
|
||||
id: apiToken.id,
|
||||
name: apiToken.name,
|
||||
rate_limit: apiToken.rate_limit
|
||||
};
|
||||
|
||||
// Set a generic user for compatibility with existing code
|
||||
req.user = {
|
||||
id: apiToken.id,
|
||||
email: `api-token-${apiToken.id}@system`,
|
||||
role: 'api'
|
||||
};
|
||||
|
||||
next();
|
||||
} catch (error) {
|
||||
console.error('Error verifying API token:', error);
|
||||
return res.status(500).json({ error: 'Authentication failed' });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Require specific role(s) to access endpoint.
|
||||
|
||||
@@ -172,9 +172,6 @@ export async function runFullDiscovery(
|
||||
console.log(`Errors: ${totalErrors}`);
|
||||
}
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||
let newDispensaryIds: number[] = [];
|
||||
|
||||
// Step 4: Auto-validate and promote discovered locations
|
||||
if (!dryRun && totalLocationsUpserted > 0) {
|
||||
console.log('\n[Discovery] Step 4: Auto-promoting discovered locations...');
|
||||
@@ -183,13 +180,6 @@ export async function runFullDiscovery(
|
||||
console.log(` Created: ${promotionResult.created} new dispensaries`);
|
||||
console.log(` Updated: ${promotionResult.updated} existing dispensaries`);
|
||||
console.log(` Rejected: ${promotionResult.rejected} (validation failed)`);
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Capture new IDs for task chaining
|
||||
newDispensaryIds = promotionResult.newDispensaryIds;
|
||||
if (newDispensaryIds.length > 0) {
|
||||
console.log(` New store IDs for crawl: [${newDispensaryIds.join(', ')}]`);
|
||||
}
|
||||
|
||||
if (promotionResult.rejectedRecords.length > 0) {
|
||||
console.log(` Rejection reasons:`);
|
||||
promotionResult.rejectedRecords.slice(0, 5).forEach(r => {
|
||||
@@ -224,8 +214,6 @@ export async function runFullDiscovery(
|
||||
totalLocationsFound,
|
||||
totalLocationsUpserted,
|
||||
durationMs,
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
|
||||
newDispensaryIds,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -127,8 +127,6 @@ export interface PromotionSummary {
|
||||
errors: string[];
|
||||
}>;
|
||||
durationMs: number;
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||
newDispensaryIds: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -471,8 +469,6 @@ export async function promoteDiscoveredLocations(
|
||||
|
||||
const results: PromotionResult[] = [];
|
||||
const rejectedRecords: PromotionSummary['rejectedRecords'] = [];
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||
const newDispensaryIds: number[] = [];
|
||||
let created = 0;
|
||||
let updated = 0;
|
||||
let skipped = 0;
|
||||
@@ -529,8 +525,6 @@ export async function promoteDiscoveredLocations(
|
||||
|
||||
if (promotionResult.action === 'created') {
|
||||
created++;
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new IDs for task chaining
|
||||
newDispensaryIds.push(promotionResult.dispensaryId);
|
||||
} else {
|
||||
updated++;
|
||||
}
|
||||
@@ -554,8 +548,6 @@ export async function promoteDiscoveredLocations(
|
||||
results,
|
||||
rejectedRecords,
|
||||
durationMs: Date.now() - startTime,
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
|
||||
newDispensaryIds,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -211,8 +211,6 @@ export interface FullDiscoveryResult {
|
||||
totalLocationsFound: number;
|
||||
totalLocationsUpserted: number;
|
||||
durationMs: number;
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
|
||||
newDispensaryIds?: number[];
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
|
||||
@@ -90,7 +90,7 @@ export async function upsertStoreProducts(
|
||||
name_raw, brand_name_raw, category_raw, subcategory_raw,
|
||||
price_rec, price_med, price_rec_special, price_med_special,
|
||||
is_on_special, discount_percent,
|
||||
is_in_stock, stock_status, stock_quantity, total_quantity_available,
|
||||
is_in_stock, stock_status,
|
||||
thc_percent, cbd_percent,
|
||||
image_url,
|
||||
first_seen_at, last_seen_at, updated_at
|
||||
@@ -99,9 +99,9 @@ export async function upsertStoreProducts(
|
||||
$5, $6, $7, $8,
|
||||
$9, $10, $11, $12,
|
||||
$13, $14,
|
||||
$15, $16, $17, $17,
|
||||
$18, $19,
|
||||
$20,
|
||||
$15, $16,
|
||||
$17, $18,
|
||||
$19,
|
||||
NOW(), NOW(), NOW()
|
||||
)
|
||||
ON CONFLICT (dispensary_id, provider, provider_product_id)
|
||||
@@ -118,8 +118,6 @@ export async function upsertStoreProducts(
|
||||
discount_percent = EXCLUDED.discount_percent,
|
||||
is_in_stock = EXCLUDED.is_in_stock,
|
||||
stock_status = EXCLUDED.stock_status,
|
||||
stock_quantity = EXCLUDED.stock_quantity,
|
||||
total_quantity_available = EXCLUDED.total_quantity_available,
|
||||
thc_percent = EXCLUDED.thc_percent,
|
||||
cbd_percent = EXCLUDED.cbd_percent,
|
||||
image_url = EXCLUDED.image_url,
|
||||
@@ -143,7 +141,6 @@ export async function upsertStoreProducts(
|
||||
productPricing?.discountPercent,
|
||||
productAvailability?.inStock ?? true,
|
||||
productAvailability?.stockStatus || 'unknown',
|
||||
productAvailability?.quantity ?? null, // stock_quantity and total_quantity_available
|
||||
// Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
|
||||
product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
|
||||
product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
|
||||
|
||||
@@ -6,8 +6,6 @@ import { initializeMinio, isMinioEnabled } from './utils/minio';
|
||||
import { initializeImageStorage } from './utils/image-storage';
|
||||
import { logger } from './services/logger';
|
||||
import { cleanupOrphanedJobs } from './services/proxyTestQueue';
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Database-driven task scheduler
|
||||
import { taskScheduler } from './services/task-scheduler';
|
||||
import { runAutoMigrations } from './db/auto-migrate';
|
||||
import { getPool } from './db/pool';
|
||||
import healthRoutes from './routes/health';
|
||||
@@ -131,6 +129,7 @@ import { createStatesRouter } from './routes/states';
|
||||
import { createAnalyticsV2Router } from './routes/analytics-v2';
|
||||
import { createDiscoveryRoutes } from './discovery';
|
||||
import pipelineRoutes from './routes/pipeline';
|
||||
import { getPool } from './db/pool';
|
||||
|
||||
// Consumer API routes (findadispo.com, findagram.co)
|
||||
import consumerAuthRoutes from './routes/consumer-auth';
|
||||
@@ -144,9 +143,6 @@ import seoRoutes from './routes/seo';
|
||||
import priceAnalyticsRoutes from './routes/price-analytics';
|
||||
import tasksRoutes from './routes/tasks';
|
||||
import workerRegistryRoutes from './routes/worker-registry';
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
|
||||
import payloadsRoutes from './routes/payloads';
|
||||
import k8sRoutes from './routes/k8s';
|
||||
|
||||
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
|
||||
// These domains can access the API without authentication
|
||||
@@ -227,14 +223,6 @@ console.log('[Tasks] Routes registered at /api/tasks');
|
||||
app.use('/api/worker-registry', workerRegistryRoutes);
|
||||
console.log('[WorkerRegistry] Routes registered at /api/worker-registry');
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
|
||||
app.use('/api/payloads', payloadsRoutes);
|
||||
console.log('[Payloads] Routes registered at /api/payloads');
|
||||
|
||||
// K8s control routes - worker scaling from admin UI
|
||||
app.use('/api/k8s', k8sRoutes);
|
||||
console.log('[K8s] Routes registered at /api/k8s');
|
||||
|
||||
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
|
||||
try {
|
||||
const analyticsV2Router = createAnalyticsV2Router(getPool());
|
||||
@@ -339,17 +327,6 @@ async function startServer() {
|
||||
// Clean up any orphaned proxy test jobs from previous server runs
|
||||
await cleanupOrphanedJobs();
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Start database-driven task scheduler
|
||||
// This replaces node-cron - schedules are stored in DB and survive restarts
|
||||
// Uses SELECT FOR UPDATE SKIP LOCKED for multi-replica safety
|
||||
try {
|
||||
await taskScheduler.start();
|
||||
logger.info('system', 'Task scheduler started');
|
||||
} catch (err: any) {
|
||||
// Non-fatal - scheduler can recover on next poll
|
||||
logger.warn('system', `Task scheduler startup warning: ${err.message}`);
|
||||
}
|
||||
|
||||
app.listen(PORT, () => {
|
||||
logger.info('system', `Server running on port ${PORT}`);
|
||||
console.log(`🚀 Server running on port ${PORT}`);
|
||||
|
||||
@@ -5,8 +5,8 @@ import { Request, Response, NextFunction } from 'express';
|
||||
* These are our own frontends that should have unrestricted access.
|
||||
*/
|
||||
const TRUSTED_DOMAINS = [
|
||||
'*.cannaiq.co',
|
||||
'*.cannabrands.app',
|
||||
'cannaiq.co',
|
||||
'www.cannaiq.co',
|
||||
'findagram.co',
|
||||
'www.findagram.co',
|
||||
'findadispo.com',
|
||||
@@ -32,24 +32,6 @@ function extractDomain(header: string): string | null {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a domain matches any trusted domain (supports *.domain.com wildcards)
|
||||
*/
|
||||
function isTrustedDomain(domain: string): boolean {
|
||||
for (const trusted of TRUSTED_DOMAINS) {
|
||||
if (trusted.startsWith('*.')) {
|
||||
// Wildcard: *.example.com matches example.com and any subdomain
|
||||
const baseDomain = trusted.slice(2);
|
||||
if (domain === baseDomain || domain.endsWith('.' + baseDomain)) {
|
||||
return true;
|
||||
}
|
||||
} else if (domain === trusted) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the request comes from a trusted domain
|
||||
*/
|
||||
@@ -60,7 +42,7 @@ function isRequestFromTrustedDomain(req: Request): boolean {
|
||||
// Check Origin header first (preferred for CORS requests)
|
||||
if (origin) {
|
||||
const domain = extractDomain(origin);
|
||||
if (domain && isTrustedDomain(domain)) {
|
||||
if (domain && TRUSTED_DOMAINS.includes(domain)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -68,7 +50,7 @@ function isRequestFromTrustedDomain(req: Request): boolean {
|
||||
// Fallback to Referer header
|
||||
if (referer) {
|
||||
const domain = extractDomain(referer);
|
||||
if (domain && isTrustedDomain(domain)) {
|
||||
if (domain && TRUSTED_DOMAINS.includes(domain)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -702,10 +702,12 @@ export class StateQueryService {
|
||||
async getNationalSummary(): Promise<NationalSummary> {
|
||||
const stateMetrics = await this.getAllStateMetrics();
|
||||
|
||||
// Get all states count and aggregate metrics
|
||||
const result = await this.pool.query(`
|
||||
SELECT
|
||||
COUNT(DISTINCT s.code) AS total_states,
|
||||
COUNT(DISTINCT CASE WHEN EXISTS (
|
||||
SELECT 1 FROM dispensaries d WHERE d.state = s.code AND d.menu_type IS NOT NULL
|
||||
) THEN s.code END) AS active_states,
|
||||
(SELECT COUNT(*) FROM dispensaries WHERE state IS NOT NULL) AS total_stores,
|
||||
(SELECT COUNT(*) FROM store_products sp
|
||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||
@@ -723,7 +725,7 @@ export class StateQueryService {
|
||||
|
||||
return {
|
||||
totalStates: parseInt(data.total_states),
|
||||
activeStates: parseInt(data.total_states), // Same as totalStates - all states shown
|
||||
activeStates: parseInt(data.active_states),
|
||||
totalStores: parseInt(data.total_stores),
|
||||
totalProducts: parseInt(data.total_products),
|
||||
totalBrands: parseInt(data.total_brands),
|
||||
|
||||
@@ -5,35 +5,22 @@
|
||||
*
|
||||
* DO NOT MODIFY THIS FILE WITHOUT EXPLICIT AUTHORIZATION.
|
||||
*
|
||||
* Updated: 2025-12-10 per workflow-12102025.md
|
||||
*
|
||||
* KEY BEHAVIORS (per workflow-12102025.md):
|
||||
* 1. startSession() gets identity from PROXY LOCATION, not task params
|
||||
* 2. On 403: immediately get new IP + new fingerprint, then retry
|
||||
* 3. After 3 consecutive 403s on same proxy → disable it (burned)
|
||||
* 4. Language is always English (en-US)
|
||||
* This is the canonical HTTP client for all Dutchie communication.
|
||||
* All Dutchie workers (Alice, Bella, etc.) MUST use this client.
|
||||
*
|
||||
* IMPLEMENTATION:
|
||||
* - Uses curl via child_process.execSync (bypasses TLS fingerprinting)
|
||||
* - NO Puppeteer, NO axios, NO fetch
|
||||
* - Uses intoli/user-agents via CrawlRotator for realistic fingerprints
|
||||
* - Fingerprint rotation on 403
|
||||
* - Residential IP compatible
|
||||
*
|
||||
* USAGE:
|
||||
* import { curlPost, curlGet, executeGraphQL, startSession } from '@dutchie/client';
|
||||
* import { curlPost, curlGet, executeGraphQL } from '@dutchie/client';
|
||||
*
|
||||
* ============================================================
|
||||
*/
|
||||
|
||||
import { execSync } from 'child_process';
|
||||
import {
|
||||
buildOrderedHeaders,
|
||||
buildRefererFromMenuUrl,
|
||||
getCurlBinary,
|
||||
isCurlImpersonateAvailable,
|
||||
HeaderContext,
|
||||
BrowserType,
|
||||
} from '../../services/http-fingerprint';
|
||||
|
||||
// ============================================================
|
||||
// TYPES
|
||||
@@ -45,8 +32,6 @@ export interface CurlResponse {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
// Per workflow-12102025.md: fingerprint comes from CrawlRotator's BrowserFingerprint
|
||||
// We keep a simplified interface here for header building
|
||||
export interface Fingerprint {
|
||||
userAgent: string;
|
||||
acceptLanguage: string;
|
||||
@@ -72,13 +57,15 @@ export const DUTCHIE_CONFIG = {
|
||||
|
||||
// ============================================================
|
||||
// PROXY SUPPORT
|
||||
// Per workflow-12102025.md:
|
||||
// - On 403: recordBlock() → increment consecutive_403_count
|
||||
// - After 3 consecutive 403s → proxy disabled
|
||||
// - Immediately rotate to new IP + new fingerprint on 403
|
||||
// ============================================================
|
||||
// Integrates with the CrawlRotator system from proxy-rotator.ts
|
||||
// On 403 errors:
|
||||
// 1. Record failure on current proxy
|
||||
// 2. Rotate to next proxy
|
||||
// 3. Retry with new proxy
|
||||
// ============================================================
|
||||
|
||||
import type { CrawlRotator, BrowserFingerprint } from '../../services/crawl-rotator';
|
||||
import type { CrawlRotator, Proxy } from '../../services/crawl-rotator';
|
||||
|
||||
let currentProxy: string | null = null;
|
||||
let crawlRotator: CrawlRotator | null = null;
|
||||
@@ -105,12 +92,13 @@ export function getProxy(): string | null {
|
||||
|
||||
/**
|
||||
* Set CrawlRotator for proxy rotation on 403s
|
||||
* Per workflow-12102025.md: enables automatic rotation when blocked
|
||||
* This enables automatic proxy rotation when blocked
|
||||
*/
|
||||
export function setCrawlRotator(rotator: CrawlRotator | null): void {
|
||||
crawlRotator = rotator;
|
||||
if (rotator) {
|
||||
console.log('[Dutchie Client] CrawlRotator attached - proxy rotation enabled');
|
||||
// Set initial proxy from rotator
|
||||
const proxy = rotator.proxy.getCurrent();
|
||||
if (proxy) {
|
||||
currentProxy = rotator.proxy.getProxyUrl(proxy);
|
||||
@@ -127,41 +115,30 @@ export function getCrawlRotator(): CrawlRotator | null {
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle 403 block - per workflow-12102025.md:
|
||||
* 1. Record block on current proxy (increments consecutive_403_count)
|
||||
* 2. Immediately rotate to new proxy (new IP)
|
||||
* 3. Rotate fingerprint
|
||||
* Returns false if no more proxies available
|
||||
* Rotate to next proxy (called on 403)
|
||||
*/
|
||||
async function handle403Block(): Promise<boolean> {
|
||||
async function rotateProxyOn403(error?: string): Promise<boolean> {
|
||||
if (!crawlRotator) {
|
||||
console.warn('[Dutchie Client] No CrawlRotator - cannot handle 403');
|
||||
return false;
|
||||
}
|
||||
|
||||
// Per workflow-12102025.md: record block (tracks consecutive 403s)
|
||||
const wasDisabled = await crawlRotator.recordBlock();
|
||||
if (wasDisabled) {
|
||||
console.log('[Dutchie Client] Current proxy was disabled (3 consecutive 403s)');
|
||||
}
|
||||
|
||||
// Per workflow-12102025.md: immediately get new IP + new fingerprint
|
||||
const { proxy: nextProxy, fingerprint } = crawlRotator.rotateBoth();
|
||||
// Record failure on current proxy
|
||||
await crawlRotator.recordFailure(error || '403 Forbidden');
|
||||
|
||||
// Rotate to next proxy
|
||||
const nextProxy = crawlRotator.rotateProxy();
|
||||
if (nextProxy) {
|
||||
currentProxy = crawlRotator.proxy.getProxyUrl(nextProxy);
|
||||
console.log(`[Dutchie Client] Rotated to new proxy: ${currentProxy.replace(/:[^:@]+@/, ':***@')}`);
|
||||
console.log(`[Dutchie Client] New fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
|
||||
console.log(`[Dutchie Client] Rotated proxy: ${currentProxy.replace(/:[^:@]+@/, ':***@')}`);
|
||||
return true;
|
||||
}
|
||||
|
||||
console.error('[Dutchie Client] No more proxies available!');
|
||||
console.warn('[Dutchie Client] No more proxies available');
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record success on current proxy
|
||||
* Per workflow-12102025.md: resets consecutive_403_count
|
||||
*/
|
||||
async function recordProxySuccess(responseTimeMs?: number): Promise<void> {
|
||||
if (crawlRotator) {
|
||||
@@ -185,69 +162,163 @@ export const GRAPHQL_HASHES = {
|
||||
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// FINGERPRINTS - Browser profiles for anti-detect
|
||||
// ============================================================
|
||||
|
||||
const FINGERPRINTS: Fingerprint[] = [
|
||||
// Chrome Windows (latest) - typical residential user, use first
|
||||
{
|
||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||
acceptLanguage: 'en-US,en;q=0.9',
|
||||
secChUa: '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||
secChUaPlatform: '"Windows"',
|
||||
secChUaMobile: '?0',
|
||||
},
|
||||
// Chrome Mac (latest)
|
||||
{
|
||||
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||
acceptLanguage: 'en-US,en;q=0.9',
|
||||
secChUa: '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||
secChUaPlatform: '"macOS"',
|
||||
secChUaMobile: '?0',
|
||||
},
|
||||
// Chrome Windows (120)
|
||||
{
|
||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
acceptLanguage: 'en-US,en;q=0.9',
|
||||
secChUa: '"Chromium";v="120", "Google Chrome";v="120", "Not-A.Brand";v="99"',
|
||||
secChUaPlatform: '"Windows"',
|
||||
secChUaMobile: '?0',
|
||||
},
|
||||
// Firefox Windows
|
||||
{
|
||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
|
||||
acceptLanguage: 'en-US,en;q=0.5',
|
||||
},
|
||||
// Safari Mac
|
||||
{
|
||||
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
|
||||
acceptLanguage: 'en-US,en;q=0.9',
|
||||
},
|
||||
// Edge Windows
|
||||
{
|
||||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
|
||||
acceptLanguage: 'en-US,en;q=0.9',
|
||||
secChUa: '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
||||
secChUaPlatform: '"Windows"',
|
||||
secChUaMobile: '?0',
|
||||
},
|
||||
];
|
||||
|
||||
let currentFingerprintIndex = 0;
|
||||
|
||||
// Forward declaration for session (actual CrawlSession interface defined later)
|
||||
let currentSession: {
|
||||
sessionId: string;
|
||||
fingerprint: Fingerprint;
|
||||
proxyUrl: string | null;
|
||||
stateCode?: string;
|
||||
timezone?: string;
|
||||
startedAt: Date;
|
||||
} | null = null;
|
||||
|
||||
/**
|
||||
* Get current fingerprint - returns session fingerprint if active, otherwise default
|
||||
*/
|
||||
export function getFingerprint(): Fingerprint {
|
||||
// Use session fingerprint if a session is active
|
||||
if (currentSession) {
|
||||
return currentSession.fingerprint;
|
||||
}
|
||||
return FINGERPRINTS[currentFingerprintIndex];
|
||||
}
|
||||
|
||||
export function rotateFingerprint(): Fingerprint {
|
||||
currentFingerprintIndex = (currentFingerprintIndex + 1) % FINGERPRINTS.length;
|
||||
const fp = FINGERPRINTS[currentFingerprintIndex];
|
||||
console.log(`[Dutchie Client] Rotated to fingerprint: ${fp.userAgent.slice(0, 50)}...`);
|
||||
return fp;
|
||||
}
|
||||
|
||||
export function resetFingerprint(): void {
|
||||
currentFingerprintIndex = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a random fingerprint from the pool
|
||||
*/
|
||||
export function getRandomFingerprint(): Fingerprint {
|
||||
const index = Math.floor(Math.random() * FINGERPRINTS.length);
|
||||
return FINGERPRINTS[index];
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// SESSION MANAGEMENT
|
||||
// Per workflow-12102025.md:
|
||||
// - Session identity comes from PROXY LOCATION
|
||||
// - NOT from task params (no stateCode/timezone params)
|
||||
// - Language is always English
|
||||
// Per-session fingerprint rotation for stealth
|
||||
// ============================================================
|
||||
|
||||
export interface CrawlSession {
|
||||
sessionId: string;
|
||||
fingerprint: BrowserFingerprint;
|
||||
fingerprint: Fingerprint;
|
||||
proxyUrl: string | null;
|
||||
proxyTimezone?: string;
|
||||
proxyState?: string;
|
||||
stateCode?: string;
|
||||
timezone?: string;
|
||||
startedAt: Date;
|
||||
// Per workflow-12102025.md: Dynamic Referer per dispensary
|
||||
menuUrl?: string;
|
||||
referer: string;
|
||||
}
|
||||
|
||||
let currentSession: CrawlSession | null = null;
|
||||
// Note: currentSession variable declared earlier in file for proper scoping
|
||||
|
||||
/**
|
||||
* Start a new crawl session
|
||||
*
|
||||
* Per workflow-12102025.md:
|
||||
* - NO state/timezone params - identity comes from proxy location
|
||||
* - Gets fingerprint from CrawlRotator (uses intoli/user-agents)
|
||||
* - Language is always English (en-US)
|
||||
* - Dynamic Referer per dispensary (from menuUrl)
|
||||
*
|
||||
* @param menuUrl - The dispensary's menu URL for dynamic Referer header
|
||||
* Timezone to Accept-Language mapping
|
||||
* US timezones all use en-US but this can be extended for international
|
||||
*/
|
||||
export function startSession(menuUrl?: string): CrawlSession {
|
||||
if (!crawlRotator) {
|
||||
throw new Error('[Dutchie Client] Cannot start session without CrawlRotator');
|
||||
}
|
||||
const TIMEZONE_TO_LOCALE: Record<string, string> = {
|
||||
'America/Phoenix': 'en-US,en;q=0.9',
|
||||
'America/Los_Angeles': 'en-US,en;q=0.9',
|
||||
'America/Denver': 'en-US,en;q=0.9',
|
||||
'America/Chicago': 'en-US,en;q=0.9',
|
||||
'America/New_York': 'en-US,en;q=0.9',
|
||||
'America/Detroit': 'en-US,en;q=0.9',
|
||||
'America/Anchorage': 'en-US,en;q=0.9',
|
||||
'Pacific/Honolulu': 'en-US,en;q=0.9',
|
||||
};
|
||||
|
||||
// Per workflow-12102025.md: get identity from proxy location
|
||||
const proxyLocation = crawlRotator.getProxyLocation();
|
||||
const fingerprint = crawlRotator.userAgent.getCurrent();
|
||||
/**
|
||||
* Get Accept-Language header for a given timezone
|
||||
*/
|
||||
export function getLocaleForTimezone(timezone?: string): string {
|
||||
if (!timezone) return 'en-US,en;q=0.9';
|
||||
return TIMEZONE_TO_LOCALE[timezone] || 'en-US,en;q=0.9';
|
||||
}
|
||||
|
||||
// Per workflow-12102025.md: Dynamic Referer per dispensary
|
||||
const referer = buildRefererFromMenuUrl(menuUrl);
|
||||
/**
|
||||
* Start a new crawl session with a random fingerprint
|
||||
* Call this before crawling a store to get a fresh identity
|
||||
*/
|
||||
export function startSession(stateCode?: string, timezone?: string): CrawlSession {
|
||||
const baseFp = getRandomFingerprint();
|
||||
|
||||
// Override Accept-Language based on timezone for geographic consistency
|
||||
const fingerprint: Fingerprint = {
|
||||
...baseFp,
|
||||
acceptLanguage: getLocaleForTimezone(timezone),
|
||||
};
|
||||
|
||||
currentSession = {
|
||||
sessionId: `session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
||||
fingerprint,
|
||||
proxyUrl: currentProxy,
|
||||
proxyTimezone: proxyLocation?.timezone,
|
||||
proxyState: proxyLocation?.state,
|
||||
stateCode,
|
||||
timezone,
|
||||
startedAt: new Date(),
|
||||
menuUrl,
|
||||
referer,
|
||||
};
|
||||
|
||||
console.log(`[Dutchie Client] Started session ${currentSession.sessionId}`);
|
||||
console.log(`[Dutchie Client] Browser: ${fingerprint.browserName} (${fingerprint.deviceCategory})`);
|
||||
console.log(`[Dutchie Client] DNT: ${fingerprint.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`);
|
||||
console.log(`[Dutchie Client] TLS: ${fingerprint.httpFingerprint.curlImpersonateBinary}`);
|
||||
console.log(`[Dutchie Client] Referer: ${referer}`);
|
||||
if (proxyLocation?.timezone) {
|
||||
console.log(`[Dutchie Client] Proxy: ${proxyLocation.state || 'unknown'} (${proxyLocation.timezone})`);
|
||||
console.log(`[Dutchie Client] Fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
|
||||
console.log(`[Dutchie Client] Accept-Language: ${fingerprint.acceptLanguage}`);
|
||||
if (timezone) {
|
||||
console.log(`[Dutchie Client] Timezone: ${timezone}`);
|
||||
}
|
||||
|
||||
return currentSession;
|
||||
@@ -276,80 +347,48 @@ export function getCurrentSession(): CrawlSession | null {
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Build headers using HTTP fingerprint system
|
||||
* Returns headers in browser-specific order with all natural variations
|
||||
* Build headers for Dutchie requests
|
||||
*/
|
||||
export function buildHeaders(isPost: boolean, contentLength?: number): { headers: Record<string, string>; orderedHeaders: string[] } {
|
||||
if (!currentSession || !crawlRotator) {
|
||||
throw new Error('[Dutchie Client] Cannot build headers without active session');
|
||||
}
|
||||
export function buildHeaders(refererPath: string, fingerprint?: Fingerprint): Record<string, string> {
|
||||
const fp = fingerprint || getFingerprint();
|
||||
const refererUrl = `https://dutchie.com${refererPath}`;
|
||||
|
||||
const fp = currentSession.fingerprint;
|
||||
const httpFp = fp.httpFingerprint;
|
||||
|
||||
// Per workflow-12102025.md: Build context for ordered headers
|
||||
const context: HeaderContext = {
|
||||
userAgent: fp.userAgent,
|
||||
secChUa: fp.secChUa,
|
||||
secChUaPlatform: fp.secChUaPlatform,
|
||||
secChUaMobile: fp.secChUaMobile,
|
||||
referer: currentSession.referer,
|
||||
isPost,
|
||||
contentLength,
|
||||
const headers: Record<string, string> = {
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': fp.acceptLanguage,
|
||||
'content-type': 'application/json',
|
||||
'origin': 'https://dutchie.com',
|
||||
'referer': refererUrl,
|
||||
'user-agent': fp.userAgent,
|
||||
'apollographql-client-name': 'Marketplace (production)',
|
||||
};
|
||||
|
||||
// Per workflow-12102025.md: Get ordered headers from HTTP fingerprint service
|
||||
return buildOrderedHeaders(httpFp, context);
|
||||
if (fp.secChUa) {
|
||||
headers['sec-ch-ua'] = fp.secChUa;
|
||||
headers['sec-ch-ua-mobile'] = fp.secChUaMobile || '?0';
|
||||
headers['sec-ch-ua-platform'] = fp.secChUaPlatform || '"Windows"';
|
||||
headers['sec-fetch-dest'] = 'empty';
|
||||
headers['sec-fetch-mode'] = 'cors';
|
||||
headers['sec-fetch-site'] = 'same-site';
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Get curl binary for current session's browser
|
||||
* Uses curl-impersonate for TLS fingerprint matching
|
||||
* Execute HTTP POST using curl (bypasses TLS fingerprinting)
|
||||
*/
|
||||
function getCurlBinaryForSession(): string {
|
||||
if (!currentSession) {
|
||||
return 'curl'; // Fallback to standard curl
|
||||
}
|
||||
|
||||
const browserType = currentSession.fingerprint.browserName as BrowserType;
|
||||
|
||||
// Per workflow-12102025.md: Check if curl-impersonate is available
|
||||
if (isCurlImpersonateAvailable(browserType)) {
|
||||
return getCurlBinary(browserType);
|
||||
}
|
||||
|
||||
// Fallback to standard curl with warning
|
||||
console.warn(`[Dutchie Client] curl-impersonate not available for ${browserType}, using standard curl`);
|
||||
return 'curl';
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Execute HTTP POST using curl/curl-impersonate
|
||||
* - Uses browser-specific TLS fingerprint via curl-impersonate
|
||||
* - Headers sent in browser-specific order
|
||||
* - Dynamic Referer per dispensary
|
||||
*/
|
||||
export function curlPost(url: string, body: any, timeout = 30000): CurlResponse {
|
||||
const bodyJson = JSON.stringify(body);
|
||||
|
||||
// Per workflow-12102025.md: Build ordered headers for POST request
|
||||
const { headers, orderedHeaders } = buildHeaders(true, bodyJson.length);
|
||||
|
||||
// Per workflow-12102025.md: Build header args in browser-specific order
|
||||
const headerArgs = orderedHeaders
|
||||
.filter(h => h !== 'Host' && h !== 'Content-Length') // curl handles these
|
||||
.map(h => `-H '${h}: ${headers[h]}'`)
|
||||
export function curlPost(url: string, body: any, headers: Record<string, string>, timeout = 30000): CurlResponse {
|
||||
const filteredHeaders = Object.entries(headers)
|
||||
.filter(([k]) => k.toLowerCase() !== 'accept-encoding')
|
||||
.map(([k, v]) => `-H '${k}: ${v}'`)
|
||||
.join(' ');
|
||||
|
||||
const bodyEscaped = bodyJson.replace(/'/g, "'\\''");
|
||||
const bodyJson = JSON.stringify(body).replace(/'/g, "'\\''");
|
||||
const timeoutSec = Math.ceil(timeout / 1000);
|
||||
const separator = '___HTTP_STATUS___';
|
||||
const proxyArg = getProxyArg();
|
||||
|
||||
// Per workflow-12102025.md: Use curl-impersonate for TLS fingerprint matching
|
||||
const curlBinary = getCurlBinaryForSession();
|
||||
|
||||
const cmd = `${curlBinary} -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${headerArgs} -d '${bodyEscaped}' '${url}'`;
|
||||
const cmd = `curl -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${filteredHeaders} -d '${bodyJson}' '${url}'`;
|
||||
|
||||
try {
|
||||
const output = execSync(cmd, {
|
||||
@@ -388,29 +427,19 @@ export function curlPost(url: string, body: any, timeout = 30000): CurlResponse
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Execute HTTP GET using curl/curl-impersonate
|
||||
* - Uses browser-specific TLS fingerprint via curl-impersonate
|
||||
* - Headers sent in browser-specific order
|
||||
* - Dynamic Referer per dispensary
|
||||
* Execute HTTP GET using curl (bypasses TLS fingerprinting)
|
||||
* Returns HTML or JSON depending on response content-type
|
||||
*/
|
||||
export function curlGet(url: string, timeout = 30000): CurlResponse {
|
||||
// Per workflow-12102025.md: Build ordered headers for GET request
|
||||
const { headers, orderedHeaders } = buildHeaders(false);
|
||||
|
||||
// Per workflow-12102025.md: Build header args in browser-specific order
|
||||
const headerArgs = orderedHeaders
|
||||
.filter(h => h !== 'Host' && h !== 'Content-Length') // curl handles these
|
||||
.map(h => `-H '${h}: ${headers[h]}'`)
|
||||
export function curlGet(url: string, headers: Record<string, string>, timeout = 30000): CurlResponse {
|
||||
const filteredHeaders = Object.entries(headers)
|
||||
.filter(([k]) => k.toLowerCase() !== 'accept-encoding')
|
||||
.map(([k, v]) => `-H '${k}: ${v}'`)
|
||||
.join(' ');
|
||||
|
||||
const timeoutSec = Math.ceil(timeout / 1000);
|
||||
const separator = '___HTTP_STATUS___';
|
||||
const proxyArg = getProxyArg();
|
||||
|
||||
// Per workflow-12102025.md: Use curl-impersonate for TLS fingerprint matching
|
||||
const curlBinary = getCurlBinaryForSession();
|
||||
|
||||
const cmd = `${curlBinary} -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${headerArgs} '${url}'`;
|
||||
const cmd = `curl -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${filteredHeaders} '${url}'`;
|
||||
|
||||
try {
|
||||
const output = execSync(cmd, {
|
||||
@@ -430,6 +459,7 @@ export function curlGet(url: string, timeout = 30000): CurlResponse {
|
||||
const responseBody = output.slice(0, separatorIndex);
|
||||
const statusCode = parseInt(output.slice(separatorIndex + separator.length).trim(), 10);
|
||||
|
||||
// Try to parse as JSON, otherwise return as string (HTML)
|
||||
try {
|
||||
return { status: statusCode, data: JSON.parse(responseBody) };
|
||||
} catch {
|
||||
@@ -446,22 +476,16 @@ export function curlGet(url: string, timeout = 30000): CurlResponse {
|
||||
|
||||
// ============================================================
|
||||
// GRAPHQL EXECUTION
|
||||
// Per workflow-12102025.md:
|
||||
// - On 403: immediately rotate IP + fingerprint (no delay first)
|
||||
// - Then retry
|
||||
// ============================================================
|
||||
|
||||
export interface ExecuteGraphQLOptions {
|
||||
maxRetries?: number;
|
||||
retryOn403?: boolean;
|
||||
cName?: string;
|
||||
cName?: string; // Optional - used for Referer header, defaults to 'cities'
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Execute GraphQL query with curl/curl-impersonate
|
||||
* - Uses browser-specific TLS fingerprint
|
||||
* - Headers in browser-specific order
|
||||
* - On 403: immediately rotate IP + fingerprint, then retry
|
||||
* Execute GraphQL query with curl (bypasses TLS fingerprinting)
|
||||
*/
|
||||
export async function executeGraphQL(
|
||||
operationName: string,
|
||||
@@ -469,12 +493,7 @@ export async function executeGraphQL(
|
||||
hash: string,
|
||||
options: ExecuteGraphQLOptions
|
||||
): Promise<any> {
|
||||
const { maxRetries = 3, retryOn403 = true } = options;
|
||||
|
||||
// Per workflow-12102025.md: Session must be active for requests
|
||||
if (!currentSession) {
|
||||
throw new Error('[Dutchie Client] Cannot execute GraphQL without active session - call startSession() first');
|
||||
}
|
||||
const { maxRetries = 3, retryOn403 = true, cName = 'cities' } = options;
|
||||
|
||||
const body = {
|
||||
operationName,
|
||||
@@ -488,14 +507,14 @@ export async function executeGraphQL(
|
||||
let attempt = 0;
|
||||
|
||||
while (attempt <= maxRetries) {
|
||||
const fingerprint = getFingerprint();
|
||||
const headers = buildHeaders(`/embedded-menu/${cName}`, fingerprint);
|
||||
|
||||
console.log(`[Dutchie Client] curl POST ${operationName} (attempt ${attempt + 1}/${maxRetries + 1})`);
|
||||
|
||||
const startTime = Date.now();
|
||||
// Per workflow-12102025.md: curlPost now uses ordered headers and curl-impersonate
|
||||
const response = curlPost(DUTCHIE_CONFIG.graphqlEndpoint, body, DUTCHIE_CONFIG.timeout);
|
||||
const responseTime = Date.now() - startTime;
|
||||
const response = curlPost(DUTCHIE_CONFIG.graphqlEndpoint, body, headers, DUTCHIE_CONFIG.timeout);
|
||||
|
||||
console.log(`[Dutchie Client] Response status: ${response.status} (${responseTime}ms)`);
|
||||
console.log(`[Dutchie Client] Response status: ${response.status}`);
|
||||
|
||||
if (response.error) {
|
||||
console.error(`[Dutchie Client] curl error: ${response.error}`);
|
||||
@@ -508,9 +527,6 @@ export async function executeGraphQL(
|
||||
}
|
||||
|
||||
if (response.status === 200) {
|
||||
// Per workflow-12102025.md: success resets consecutive 403 count
|
||||
await recordProxySuccess(responseTime);
|
||||
|
||||
if (response.data?.errors?.length > 0) {
|
||||
console.warn(`[Dutchie Client] GraphQL errors: ${JSON.stringify(response.data.errors[0])}`);
|
||||
}
|
||||
@@ -518,20 +534,10 @@ export async function executeGraphQL(
|
||||
}
|
||||
|
||||
if (response.status === 403 && retryOn403) {
|
||||
// Per workflow-12102025.md: immediately rotate IP + fingerprint
|
||||
console.warn(`[Dutchie Client] 403 blocked - immediately rotating proxy + fingerprint...`);
|
||||
const hasMoreProxies = await handle403Block();
|
||||
|
||||
if (!hasMoreProxies) {
|
||||
throw new Error('All proxies exhausted - no more IPs available');
|
||||
}
|
||||
|
||||
// Per workflow-12102025.md: Update session referer after rotation
|
||||
currentSession.referer = buildRefererFromMenuUrl(currentSession.menuUrl);
|
||||
|
||||
console.warn(`[Dutchie Client] 403 blocked - rotating fingerprint...`);
|
||||
rotateFingerprint();
|
||||
attempt++;
|
||||
// Per workflow-12102025.md: small backoff after rotation
|
||||
await sleep(500);
|
||||
await sleep(1000 * attempt);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -560,10 +566,8 @@ export interface FetchPageOptions {
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Fetch HTML page from Dutchie
|
||||
* - Uses browser-specific TLS fingerprint
|
||||
* - Headers in browser-specific order
|
||||
* - Same 403 handling as GraphQL
|
||||
* Fetch HTML page from Dutchie (for city pages, dispensary pages, etc.)
|
||||
* Returns raw HTML string
|
||||
*/
|
||||
export async function fetchPage(
|
||||
path: string,
|
||||
@@ -572,22 +576,32 @@ export async function fetchPage(
|
||||
const { maxRetries = 3, retryOn403 = true } = options;
|
||||
const url = `${DUTCHIE_CONFIG.baseUrl}${path}`;
|
||||
|
||||
// Per workflow-12102025.md: Session must be active for requests
|
||||
if (!currentSession) {
|
||||
throw new Error('[Dutchie Client] Cannot fetch page without active session - call startSession() first');
|
||||
}
|
||||
|
||||
let attempt = 0;
|
||||
|
||||
while (attempt <= maxRetries) {
|
||||
// Per workflow-12102025.md: curlGet now uses ordered headers and curl-impersonate
|
||||
const fingerprint = getFingerprint();
|
||||
const headers: Record<string, string> = {
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
||||
'accept-language': fingerprint.acceptLanguage,
|
||||
'user-agent': fingerprint.userAgent,
|
||||
};
|
||||
|
||||
if (fingerprint.secChUa) {
|
||||
headers['sec-ch-ua'] = fingerprint.secChUa;
|
||||
headers['sec-ch-ua-mobile'] = fingerprint.secChUaMobile || '?0';
|
||||
headers['sec-ch-ua-platform'] = fingerprint.secChUaPlatform || '"Windows"';
|
||||
headers['sec-fetch-dest'] = 'document';
|
||||
headers['sec-fetch-mode'] = 'navigate';
|
||||
headers['sec-fetch-site'] = 'none';
|
||||
headers['sec-fetch-user'] = '?1';
|
||||
headers['upgrade-insecure-requests'] = '1';
|
||||
}
|
||||
|
||||
console.log(`[Dutchie Client] curl GET ${path} (attempt ${attempt + 1}/${maxRetries + 1})`);
|
||||
|
||||
const startTime = Date.now();
|
||||
const response = curlGet(url, DUTCHIE_CONFIG.timeout);
|
||||
const responseTime = Date.now() - startTime;
|
||||
const response = curlGet(url, headers, DUTCHIE_CONFIG.timeout);
|
||||
|
||||
console.log(`[Dutchie Client] Response status: ${response.status} (${responseTime}ms)`);
|
||||
console.log(`[Dutchie Client] Response status: ${response.status}`);
|
||||
|
||||
if (response.error) {
|
||||
console.error(`[Dutchie Client] curl error: ${response.error}`);
|
||||
@@ -599,26 +613,14 @@ export async function fetchPage(
|
||||
}
|
||||
|
||||
if (response.status === 200) {
|
||||
// Per workflow-12102025.md: success resets consecutive 403 count
|
||||
await recordProxySuccess(responseTime);
|
||||
return { html: response.data, status: response.status };
|
||||
}
|
||||
|
||||
if (response.status === 403 && retryOn403) {
|
||||
// Per workflow-12102025.md: immediately rotate IP + fingerprint
|
||||
console.warn(`[Dutchie Client] 403 blocked - immediately rotating proxy + fingerprint...`);
|
||||
const hasMoreProxies = await handle403Block();
|
||||
|
||||
if (!hasMoreProxies) {
|
||||
throw new Error('All proxies exhausted - no more IPs available');
|
||||
}
|
||||
|
||||
// Per workflow-12102025.md: Update session after rotation
|
||||
currentSession.referer = buildRefererFromMenuUrl(currentSession.menuUrl);
|
||||
|
||||
console.warn(`[Dutchie Client] 403 blocked - rotating fingerprint...`);
|
||||
rotateFingerprint();
|
||||
attempt++;
|
||||
// Per workflow-12102025.md: small backoff after rotation
|
||||
await sleep(500);
|
||||
await sleep(1000 * attempt);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -6,17 +6,22 @@
|
||||
*/
|
||||
|
||||
export {
|
||||
// HTTP Client (per workflow-12102025.md: uses curl-impersonate + ordered headers)
|
||||
// HTTP Client
|
||||
curlPost,
|
||||
curlGet,
|
||||
executeGraphQL,
|
||||
fetchPage,
|
||||
extractNextData,
|
||||
|
||||
// Headers (per workflow-12102025.md: browser-specific ordering)
|
||||
// Headers & Fingerprints
|
||||
buildHeaders,
|
||||
getFingerprint,
|
||||
rotateFingerprint,
|
||||
resetFingerprint,
|
||||
getRandomFingerprint,
|
||||
getLocaleForTimezone,
|
||||
|
||||
// Session Management (per workflow-12102025.md: menuUrl for dynamic Referer)
|
||||
// Session Management (per-store fingerprint rotation)
|
||||
startSession,
|
||||
endSession,
|
||||
getCurrentSession,
|
||||
|
||||
@@ -7,23 +7,15 @@
|
||||
* Routes are prefixed with /api/analytics/v2
|
||||
*
|
||||
* Phase 3: Analytics Engine + Rec/Med by State
|
||||
*
|
||||
* SECURITY: All routes require authentication via authMiddleware.
|
||||
* Access is granted to:
|
||||
* - Trusted origins (cannaiq.co, findadispo.com, etc.)
|
||||
* - Trusted IPs (localhost, internal pods)
|
||||
* - Valid JWT or API tokens
|
||||
*/
|
||||
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { Pool } from 'pg';
|
||||
import { authMiddleware } from '../auth/middleware';
|
||||
import { PriceAnalyticsService } from '../services/analytics/PriceAnalyticsService';
|
||||
import { BrandPenetrationService } from '../services/analytics/BrandPenetrationService';
|
||||
import { CategoryAnalyticsService } from '../services/analytics/CategoryAnalyticsService';
|
||||
import { StoreAnalyticsService } from '../services/analytics/StoreAnalyticsService';
|
||||
import { StateAnalyticsService } from '../services/analytics/StateAnalyticsService';
|
||||
import { BrandIntelligenceService } from '../services/analytics/BrandIntelligenceService';
|
||||
import { TimeWindow, LegalType } from '../services/analytics/types';
|
||||
|
||||
function parseTimeWindow(window?: string): TimeWindow {
|
||||
@@ -43,17 +35,12 @@ function parseLegalType(legalType?: string): LegalType {
|
||||
export function createAnalyticsV2Router(pool: Pool): Router {
|
||||
const router = Router();
|
||||
|
||||
// SECURITY: Apply auth middleware to ALL routes
|
||||
// This gate ensures only authenticated requests can access analytics data
|
||||
router.use(authMiddleware);
|
||||
|
||||
// Initialize services
|
||||
const priceService = new PriceAnalyticsService(pool);
|
||||
const brandService = new BrandPenetrationService(pool);
|
||||
const categoryService = new CategoryAnalyticsService(pool);
|
||||
const storeService = new StoreAnalyticsService(pool);
|
||||
const stateService = new StateAnalyticsService(pool);
|
||||
const brandIntelligenceService = new BrandIntelligenceService(pool);
|
||||
|
||||
// ============================================================
|
||||
// PRICE ANALYTICS
|
||||
@@ -244,76 +231,6 @@ export function createAnalyticsV2Router(pool: Pool): Router {
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /brand/:name/promotions
|
||||
* Get brand promotional history - tracks specials, discounts, duration, and sales estimates
|
||||
*
|
||||
* Query params:
|
||||
* - window: 7d|30d|90d (default: 90d)
|
||||
* - state: state code filter (e.g., AZ)
|
||||
* - category: category filter (e.g., Flower)
|
||||
*/
|
||||
router.get('/brand/:name/promotions', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const brandName = decodeURIComponent(req.params.name);
|
||||
const window = parseTimeWindow(req.query.window as string) || '90d';
|
||||
const stateCode = req.query.state as string | undefined;
|
||||
const category = req.query.category as string | undefined;
|
||||
|
||||
const result = await brandService.getBrandPromotionalHistory(brandName, {
|
||||
window,
|
||||
stateCode,
|
||||
category,
|
||||
});
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[AnalyticsV2] Brand promotions error:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch brand promotional history' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /brand/:name/intelligence
|
||||
* Get comprehensive B2B brand intelligence dashboard data
|
||||
*
|
||||
* Returns all brand metrics in a single unified response:
|
||||
* - Performance Snapshot (active SKUs, revenue, stores, market share)
|
||||
* - Alerts/Slippage (lost stores, delisted SKUs, competitor takeovers)
|
||||
* - Product Velocity (daily rates, velocity status)
|
||||
* - Retail Footprint (penetration, whitespace opportunities)
|
||||
* - Competitive Landscape (price position, market share trend)
|
||||
* - Inventory Health (days of stock, risk levels)
|
||||
* - Promotion Effectiveness (baseline vs promo velocity, ROI)
|
||||
*
|
||||
* Query params:
|
||||
* - window: 7d|30d|90d (default: 30d)
|
||||
* - state: state code filter (e.g., AZ)
|
||||
* - category: category filter (e.g., Flower)
|
||||
*/
|
||||
router.get('/brand/:name/intelligence', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const brandName = decodeURIComponent(req.params.name);
|
||||
const window = parseTimeWindow(req.query.window as string);
|
||||
const stateCode = req.query.state as string | undefined;
|
||||
const category = req.query.category as string | undefined;
|
||||
|
||||
const result = await brandIntelligenceService.getBrandIntelligence(brandName, {
|
||||
window,
|
||||
stateCode,
|
||||
category,
|
||||
});
|
||||
|
||||
if (!result) {
|
||||
return res.status(404).json({ error: 'Brand not found' });
|
||||
}
|
||||
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[AnalyticsV2] Brand intelligence error:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch brand intelligence' });
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// CATEGORY ANALYTICS
|
||||
// ============================================================
|
||||
@@ -483,31 +400,6 @@ export function createAnalyticsV2Router(pool: Pool): Router {
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /store/:id/quantity-changes
|
||||
* Get quantity changes for a store (increases/decreases)
|
||||
* Useful for estimating sales (decreases) or restocks (increases)
|
||||
*
|
||||
* Query params:
|
||||
* - window: 7d|30d|90d (default: 7d)
|
||||
* - direction: increase|decrease|all (default: all)
|
||||
* - limit: number (default: 100)
|
||||
*/
|
||||
router.get('/store/:id/quantity-changes', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const dispensaryId = parseInt(req.params.id);
|
||||
const window = parseTimeWindow(req.query.window as string);
|
||||
const direction = (req.query.direction as 'increase' | 'decrease' | 'all') || 'all';
|
||||
const limit = req.query.limit ? parseInt(req.query.limit as string) : 100;
|
||||
|
||||
const result = await storeService.getQuantityChanges(dispensaryId, { window, direction, limit });
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[AnalyticsV2] Store quantity changes error:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch store quantity changes' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /store/:id/inventory
|
||||
* Get store inventory composition
|
||||
|
||||
@@ -47,27 +47,4 @@ router.post('/refresh', authMiddleware, async (req: AuthRequest, res) => {
|
||||
res.json({ token });
|
||||
});
|
||||
|
||||
// Verify password for sensitive actions (requires current user to be authenticated)
|
||||
router.post('/verify-password', authMiddleware, async (req: AuthRequest, res) => {
|
||||
try {
|
||||
const { password } = req.body;
|
||||
|
||||
if (!password) {
|
||||
return res.status(400).json({ error: 'Password required' });
|
||||
}
|
||||
|
||||
// Re-authenticate the current user with the provided password
|
||||
const user = await authenticateUser(req.user!.email, password);
|
||||
|
||||
if (!user) {
|
||||
return res.status(401).json({ error: 'Invalid password', verified: false });
|
||||
}
|
||||
|
||||
res.json({ verified: true });
|
||||
} catch (error) {
|
||||
console.error('Password verification error:', error);
|
||||
res.status(500).json({ error: 'Internal server error' });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
@@ -14,56 +14,35 @@ router.use(authMiddleware);
|
||||
/**
|
||||
* GET /api/admin/intelligence/brands
|
||||
* List all brands with state presence, store counts, and pricing
|
||||
* Query params:
|
||||
* - state: Filter by state (e.g., "AZ")
|
||||
* - limit: Max results (default 500)
|
||||
* - offset: Pagination offset
|
||||
*/
|
||||
router.get('/brands', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { limit = '500', offset = '0', state } = req.query;
|
||||
const { limit = '500', offset = '0' } = req.query;
|
||||
const limitNum = Math.min(parseInt(limit as string, 10), 1000);
|
||||
const offsetNum = parseInt(offset as string, 10);
|
||||
|
||||
// Build WHERE clause based on state filter
|
||||
let stateFilter = '';
|
||||
const params: any[] = [limitNum, offsetNum];
|
||||
if (state && state !== 'all') {
|
||||
stateFilter = 'AND d.state = $3';
|
||||
params.push(state);
|
||||
}
|
||||
|
||||
const { rows } = await pool.query(`
|
||||
SELECT
|
||||
sp.brand_name_raw as brand_name,
|
||||
array_agg(DISTINCT d.state) FILTER (WHERE d.state IS NOT NULL) as states,
|
||||
COUNT(DISTINCT d.id) as store_count,
|
||||
COUNT(DISTINCT sp.id) as sku_count,
|
||||
ROUND(AVG(sp.price_rec) FILTER (WHERE sp.price_rec > 0)::numeric, 2) as avg_price_rec,
|
||||
ROUND(AVG(sp.price_med) FILTER (WHERE sp.price_med > 0)::numeric, 2) as avg_price_med
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) FILTER (WHERE sp.price_rec > 0) as avg_price_rec,
|
||||
ROUND(AVG(sp.price_med)::numeric, 2) FILTER (WHERE sp.price_med > 0) as avg_price_med
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||
WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
|
||||
${stateFilter}
|
||||
GROUP BY sp.brand_name_raw
|
||||
ORDER BY store_count DESC, sku_count DESC
|
||||
LIMIT $1 OFFSET $2
|
||||
`, params);
|
||||
`, [limitNum, offsetNum]);
|
||||
|
||||
// Get total count with same state filter
|
||||
const countParams: any[] = [];
|
||||
let countStateFilter = '';
|
||||
if (state && state !== 'all') {
|
||||
countStateFilter = 'AND d.state = $1';
|
||||
countParams.push(state);
|
||||
}
|
||||
// Get total count
|
||||
const { rows: countRows } = await pool.query(`
|
||||
SELECT COUNT(DISTINCT sp.brand_name_raw) as total
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||
WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
|
||||
${countStateFilter}
|
||||
`, countParams);
|
||||
SELECT COUNT(DISTINCT brand_name_raw) as total
|
||||
FROM store_products
|
||||
WHERE brand_name_raw IS NOT NULL AND brand_name_raw != ''
|
||||
`);
|
||||
|
||||
res.json({
|
||||
brands: rows.map((r: any) => ({
|
||||
@@ -168,63 +147,29 @@ router.get('/brands/:brandName/penetration', async (req: Request, res: Response)
|
||||
/**
|
||||
* GET /api/admin/intelligence/pricing
|
||||
* Get pricing analytics by category
|
||||
* Query params:
|
||||
* - state: Filter by state (e.g., "AZ")
|
||||
*/
|
||||
router.get('/pricing', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { state } = req.query;
|
||||
const { rows: categoryRows } = await pool.query(`
|
||||
SELECT
|
||||
sp.category_raw as category,
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) as min_price,
|
||||
MAX(sp.price_rec) as max_price,
|
||||
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2)
|
||||
FILTER (WHERE sp.price_rec > 0) as median_price,
|
||||
COUNT(*) as product_count
|
||||
FROM store_products sp
|
||||
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0
|
||||
GROUP BY sp.category_raw
|
||||
ORDER BY product_count DESC
|
||||
`);
|
||||
|
||||
// Build WHERE clause based on state filter
|
||||
let stateFilter = '';
|
||||
const categoryParams: any[] = [];
|
||||
const stateQueryParams: any[] = [];
|
||||
const overallParams: any[] = [];
|
||||
|
||||
if (state && state !== 'all') {
|
||||
stateFilter = 'AND d.state = $1';
|
||||
categoryParams.push(state);
|
||||
overallParams.push(state);
|
||||
}
|
||||
|
||||
// Category pricing with optional state filter
|
||||
const categoryQuery = state && state !== 'all'
|
||||
? `
|
||||
SELECT
|
||||
sp.category_raw as category,
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MIN(sp.price_rec) as min_price,
|
||||
MAX(sp.price_rec) as max_price,
|
||||
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) as median_price,
|
||||
COUNT(*) as product_count
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0 ${stateFilter}
|
||||
GROUP BY sp.category_raw
|
||||
ORDER BY product_count DESC
|
||||
`
|
||||
: `
|
||||
SELECT
|
||||
sp.category_raw as category,
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MIN(sp.price_rec) as min_price,
|
||||
MAX(sp.price_rec) as max_price,
|
||||
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) as median_price,
|
||||
COUNT(*) as product_count
|
||||
FROM store_products sp
|
||||
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0
|
||||
GROUP BY sp.category_raw
|
||||
ORDER BY product_count DESC
|
||||
`;
|
||||
|
||||
const { rows: categoryRows } = await pool.query(categoryQuery, categoryParams);
|
||||
|
||||
// State pricing
|
||||
const { rows: stateRows } = await pool.query(`
|
||||
SELECT
|
||||
d.state,
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MIN(sp.price_rec) as min_price,
|
||||
MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) as min_price,
|
||||
MAX(sp.price_rec) as max_price,
|
||||
COUNT(DISTINCT sp.id) as product_count
|
||||
FROM store_products sp
|
||||
@@ -234,31 +179,6 @@ router.get('/pricing', async (req: Request, res: Response) => {
|
||||
ORDER BY avg_price DESC
|
||||
`);
|
||||
|
||||
// Overall stats with optional state filter
|
||||
const overallQuery = state && state !== 'all'
|
||||
? `
|
||||
SELECT
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MIN(sp.price_rec) as min_price,
|
||||
MAX(sp.price_rec) as max_price,
|
||||
COUNT(*) as total_products
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON sp.dispensary_id = d.id
|
||||
WHERE sp.price_rec > 0 ${stateFilter}
|
||||
`
|
||||
: `
|
||||
SELECT
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MIN(sp.price_rec) as min_price,
|
||||
MAX(sp.price_rec) as max_price,
|
||||
COUNT(*) as total_products
|
||||
FROM store_products sp
|
||||
WHERE sp.price_rec > 0
|
||||
`;
|
||||
|
||||
const { rows: overallRows } = await pool.query(overallQuery, overallParams);
|
||||
const overall = overallRows[0];
|
||||
|
||||
res.json({
|
||||
byCategory: categoryRows.map((r: any) => ({
|
||||
category: r.category,
|
||||
@@ -275,12 +195,6 @@ router.get('/pricing', async (req: Request, res: Response) => {
|
||||
maxPrice: r.max_price ? parseFloat(r.max_price) : null,
|
||||
productCount: parseInt(r.product_count, 10),
|
||||
})),
|
||||
overall: {
|
||||
avgPrice: overall?.avg_price ? parseFloat(overall.avg_price) : null,
|
||||
minPrice: overall?.min_price ? parseFloat(overall.min_price) : null,
|
||||
maxPrice: overall?.max_price ? parseFloat(overall.max_price) : null,
|
||||
totalProducts: parseInt(overall?.total_products || '0', 10),
|
||||
},
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[Intelligence] Error fetching pricing:', error.message);
|
||||
@@ -291,23 +205,9 @@ router.get('/pricing', async (req: Request, res: Response) => {
|
||||
/**
|
||||
* GET /api/admin/intelligence/stores
|
||||
* Get store intelligence summary
|
||||
* Query params:
|
||||
* - state: Filter by state (e.g., "AZ")
|
||||
* - limit: Max results (default 200)
|
||||
*/
|
||||
router.get('/stores', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { state, limit = '200' } = req.query;
|
||||
const limitNum = Math.min(parseInt(limit as string, 10), 500);
|
||||
|
||||
// Build WHERE clause based on state filter
|
||||
let stateFilter = '';
|
||||
const params: any[] = [limitNum];
|
||||
if (state && state !== 'all') {
|
||||
stateFilter = 'AND d.state = $2';
|
||||
params.push(state);
|
||||
}
|
||||
|
||||
const { rows: storeRows } = await pool.query(`
|
||||
SELECT
|
||||
d.id,
|
||||
@@ -317,22 +217,17 @@ router.get('/stores', async (req: Request, res: Response) => {
|
||||
d.state,
|
||||
d.menu_type,
|
||||
d.crawl_enabled,
|
||||
c.name as chain_name,
|
||||
COUNT(DISTINCT sp.id) as sku_count,
|
||||
COUNT(DISTINCT sp.id) as product_count,
|
||||
COUNT(DISTINCT sp.brand_name_raw) as brand_count,
|
||||
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
|
||||
MAX(sp.updated_at) as last_crawl,
|
||||
(SELECT COUNT(*) FROM store_product_snapshots sps
|
||||
WHERE sps.store_product_id IN (SELECT id FROM store_products WHERE dispensary_id = d.id)) as snapshot_count
|
||||
MAX(sp.updated_at) as last_product_update
|
||||
FROM dispensaries d
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
|
||||
LEFT JOIN chains c ON d.chain_id = c.id
|
||||
WHERE d.state IS NOT NULL AND d.crawl_enabled = true
|
||||
${stateFilter}
|
||||
GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled, c.name
|
||||
ORDER BY sku_count DESC
|
||||
LIMIT $1
|
||||
`, params);
|
||||
WHERE d.state IS NOT NULL
|
||||
GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled
|
||||
ORDER BY product_count DESC
|
||||
LIMIT 200
|
||||
`);
|
||||
|
||||
res.json({
|
||||
stores: storeRows.map((r: any) => ({
|
||||
@@ -343,13 +238,10 @@ router.get('/stores', async (req: Request, res: Response) => {
|
||||
state: r.state,
|
||||
menuType: r.menu_type,
|
||||
crawlEnabled: r.crawl_enabled,
|
||||
chainName: r.chain_name || null,
|
||||
skuCount: parseInt(r.sku_count || '0', 10),
|
||||
snapshotCount: parseInt(r.snapshot_count || '0', 10),
|
||||
productCount: parseInt(r.product_count || '0', 10),
|
||||
brandCount: parseInt(r.brand_count || '0', 10),
|
||||
avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
|
||||
lastCrawl: r.last_crawl,
|
||||
crawlFrequencyHours: 4, // Default crawl frequency
|
||||
lastProductUpdate: r.last_product_update,
|
||||
})),
|
||||
total: storeRows.length,
|
||||
});
|
||||
|
||||
@@ -543,9 +543,6 @@ router.post('/bulk-priority', async (req: Request, res: Response) => {
|
||||
|
||||
/**
|
||||
* POST /api/job-queue/enqueue - Add a new job to the queue
|
||||
*
|
||||
* 2024-12-10: Rewired to use worker_tasks via taskService.
|
||||
* Legacy dispensary_crawl_jobs code commented out below.
|
||||
*/
|
||||
router.post('/enqueue', async (req: Request, res: Response) => {
|
||||
try {
|
||||
@@ -555,59 +552,6 @@ router.post('/enqueue', async (req: Request, res: Response) => {
|
||||
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
||||
}
|
||||
|
||||
// 2024-12-10: Map legacy job_type to new task role
|
||||
const roleMap: Record<string, string> = {
|
||||
'dutchie_product_crawl': 'product_refresh',
|
||||
'menu_detection': 'entry_point_discovery',
|
||||
'menu_detection_single': 'entry_point_discovery',
|
||||
'product_discovery': 'product_discovery',
|
||||
'store_discovery': 'store_discovery',
|
||||
};
|
||||
const role = roleMap[job_type] || 'product_refresh';
|
||||
|
||||
// 2024-12-10: Use taskService to create task in worker_tasks table
|
||||
const { taskService } = await import('../tasks/task-service');
|
||||
|
||||
// Check if task already pending for this dispensary
|
||||
const existingTasks = await taskService.listTasks({
|
||||
dispensary_id,
|
||||
role: role as any,
|
||||
status: ['pending', 'claimed', 'running'],
|
||||
limit: 1,
|
||||
});
|
||||
|
||||
if (existingTasks.length > 0) {
|
||||
return res.json({
|
||||
success: true,
|
||||
task_id: existingTasks[0].id,
|
||||
message: 'Task already queued'
|
||||
});
|
||||
}
|
||||
|
||||
const task = await taskService.createTask({
|
||||
role: role as any,
|
||||
dispensary_id,
|
||||
priority,
|
||||
});
|
||||
|
||||
res.json({ success: true, task_id: task.id, message: 'Task enqueued' });
|
||||
} catch (error: any) {
|
||||
console.error('[JobQueue] Error enqueuing task:', error);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/*
|
||||
* LEGACY CODE - 2024-12-10: Commented out, was using orphaned dispensary_crawl_jobs table
|
||||
*
|
||||
router.post('/enqueue', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
|
||||
|
||||
if (!dispensary_id) {
|
||||
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
|
||||
}
|
||||
|
||||
// Check if job already pending for this dispensary
|
||||
const existing = await pool.query(`
|
||||
SELECT id FROM dispensary_crawl_jobs
|
||||
@@ -641,7 +585,6 @@ router.post('/enqueue', async (req: Request, res: Response) => {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
*/
|
||||
|
||||
/**
|
||||
* POST /api/job-queue/pause - Pause queue processing
|
||||
@@ -669,8 +612,6 @@ router.get('/paused', async (_req: Request, res: Response) => {
|
||||
/**
|
||||
* POST /api/job-queue/enqueue-batch - Queue multiple dispensaries at once
|
||||
* Body: { dispensary_ids: number[], job_type?: string, priority?: number }
|
||||
*
|
||||
* 2024-12-10: Rewired to use worker_tasks via taskService.
|
||||
*/
|
||||
router.post('/enqueue-batch', async (req: Request, res: Response) => {
|
||||
try {
|
||||
@@ -684,30 +625,35 @@ router.post('/enqueue-batch', async (req: Request, res: Response) => {
|
||||
return res.status(400).json({ success: false, error: 'Maximum 500 dispensaries per batch' });
|
||||
}
|
||||
|
||||
// 2024-12-10: Map legacy job_type to new task role
|
||||
const roleMap: Record<string, string> = {
|
||||
'dutchie_product_crawl': 'product_refresh',
|
||||
'menu_detection': 'entry_point_discovery',
|
||||
'product_discovery': 'product_discovery',
|
||||
};
|
||||
const role = roleMap[job_type] || 'product_refresh';
|
||||
|
||||
// 2024-12-10: Use taskService to create tasks in worker_tasks table
|
||||
const { taskService } = await import('../tasks/task-service');
|
||||
|
||||
const tasks = dispensary_ids.map(dispensary_id => ({
|
||||
role: role as any,
|
||||
dispensary_id,
|
||||
priority,
|
||||
}));
|
||||
|
||||
const createdCount = await taskService.createTasks(tasks);
|
||||
// Insert jobs, skipping duplicates
|
||||
const { rows } = await pool.query(`
|
||||
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type, status, created_at)
|
||||
SELECT
|
||||
d.id,
|
||||
$2::text,
|
||||
$3::integer,
|
||||
'api_batch',
|
||||
'pending',
|
||||
NOW()
|
||||
FROM dispensaries d
|
||||
WHERE d.id = ANY($1::int[])
|
||||
AND d.crawl_enabled = true
|
||||
AND d.platform_dispensary_id IS NOT NULL
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM dispensary_crawl_jobs cj
|
||||
WHERE cj.dispensary_id = d.id
|
||||
AND cj.job_type = $2::text
|
||||
AND cj.status IN ('pending', 'running')
|
||||
)
|
||||
RETURNING id, dispensary_id
|
||||
`, [dispensary_ids, job_type, priority]);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
queued: createdCount,
|
||||
queued: rows.length,
|
||||
requested: dispensary_ids.length,
|
||||
message: `Queued ${createdCount} of ${dispensary_ids.length} dispensaries`
|
||||
job_ids: rows.map(r => r.id),
|
||||
message: `Queued ${rows.length} of ${dispensary_ids.length} dispensaries`
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[JobQueue] Error batch enqueuing:', error);
|
||||
@@ -718,8 +664,6 @@ router.post('/enqueue-batch', async (req: Request, res: Response) => {
|
||||
/**
|
||||
* POST /api/job-queue/enqueue-state - Queue all crawl-enabled dispensaries for a state
|
||||
* Body: { state_code: string, job_type?: string, priority?: number, limit?: number }
|
||||
*
|
||||
* 2024-12-10: Rewired to use worker_tasks via taskService.
|
||||
*/
|
||||
router.post('/enqueue-state', async (req: Request, res: Response) => {
|
||||
try {
|
||||
@@ -729,55 +673,52 @@ router.post('/enqueue-state', async (req: Request, res: Response) => {
|
||||
return res.status(400).json({ success: false, error: 'state_code is required (e.g., "AZ")' });
|
||||
}
|
||||
|
||||
// 2024-12-10: Map legacy job_type to new task role
|
||||
const roleMap: Record<string, string> = {
|
||||
'dutchie_product_crawl': 'product_refresh',
|
||||
'menu_detection': 'entry_point_discovery',
|
||||
'product_discovery': 'product_discovery',
|
||||
};
|
||||
const role = roleMap[job_type] || 'product_refresh';
|
||||
|
||||
// Get dispensary IDs for the state
|
||||
const dispensaryResult = await pool.query(`
|
||||
SELECT d.id
|
||||
FROM dispensaries d
|
||||
JOIN states s ON s.id = d.state_id
|
||||
WHERE s.code = $1
|
||||
// Get state_id and queue jobs
|
||||
const { rows } = await pool.query(`
|
||||
WITH target_state AS (
|
||||
SELECT id FROM states WHERE code = $1
|
||||
)
|
||||
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type, status, created_at)
|
||||
SELECT
|
||||
d.id,
|
||||
$2::text,
|
||||
$3::integer,
|
||||
'api_state',
|
||||
'pending',
|
||||
NOW()
|
||||
FROM dispensaries d, target_state
|
||||
WHERE d.state_id = target_state.id
|
||||
AND d.crawl_enabled = true
|
||||
AND d.platform_dispensary_id IS NOT NULL
|
||||
LIMIT $2
|
||||
`, [state_code.toUpperCase(), limit]);
|
||||
|
||||
const dispensary_ids = dispensaryResult.rows.map((r: any) => r.id);
|
||||
|
||||
// 2024-12-10: Use taskService to create tasks in worker_tasks table
|
||||
const { taskService } = await import('../tasks/task-service');
|
||||
|
||||
const tasks = dispensary_ids.map((dispensary_id: number) => ({
|
||||
role: role as any,
|
||||
dispensary_id,
|
||||
priority,
|
||||
}));
|
||||
|
||||
const createdCount = await taskService.createTasks(tasks);
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM dispensary_crawl_jobs cj
|
||||
WHERE cj.dispensary_id = d.id
|
||||
AND cj.job_type = $2::text
|
||||
AND cj.status IN ('pending', 'running')
|
||||
)
|
||||
LIMIT $4::integer
|
||||
RETURNING id, dispensary_id
|
||||
`, [state_code.toUpperCase(), job_type, priority, limit]);
|
||||
|
||||
// Get total available count
|
||||
const countResult = await pool.query(`
|
||||
WITH target_state AS (
|
||||
SELECT id FROM states WHERE code = $1
|
||||
)
|
||||
SELECT COUNT(*) as total
|
||||
FROM dispensaries d
|
||||
JOIN states s ON s.id = d.state_id
|
||||
WHERE s.code = $1
|
||||
FROM dispensaries d, target_state
|
||||
WHERE d.state_id = target_state.id
|
||||
AND d.crawl_enabled = true
|
||||
AND d.platform_dispensary_id IS NOT NULL
|
||||
`, [state_code.toUpperCase()]);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
queued: createdCount,
|
||||
queued: rows.length,
|
||||
total_available: parseInt(countResult.rows[0].total),
|
||||
state: state_code.toUpperCase(),
|
||||
role,
|
||||
message: `Queued ${createdCount} dispensaries for ${state_code.toUpperCase()}`
|
||||
job_type,
|
||||
message: `Queued ${rows.length} dispensaries for ${state_code.toUpperCase()}`
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[JobQueue] Error enqueuing state:', error);
|
||||
|
||||
@@ -1,140 +0,0 @@
|
||||
/**
|
||||
* Kubernetes Control Routes
|
||||
*
|
||||
* Provides admin UI control over k8s resources like worker scaling.
|
||||
* Uses in-cluster config when running in k8s, or kubeconfig locally.
|
||||
*/
|
||||
|
||||
import { Router, Request, Response } from 'express';
|
||||
import * as k8s from '@kubernetes/client-node';
|
||||
|
||||
const router = Router();
|
||||
|
||||
// K8s client setup - lazy initialization
|
||||
let appsApi: k8s.AppsV1Api | null = null;
|
||||
let k8sError: string | null = null;
|
||||
|
||||
function getK8sClient(): k8s.AppsV1Api | null {
|
||||
if (appsApi) return appsApi;
|
||||
if (k8sError) return null;
|
||||
|
||||
try {
|
||||
const kc = new k8s.KubeConfig();
|
||||
|
||||
// Try in-cluster config first (when running in k8s)
|
||||
try {
|
||||
kc.loadFromCluster();
|
||||
console.log('[K8s] Loaded in-cluster config');
|
||||
} catch {
|
||||
// Fall back to default kubeconfig (local dev)
|
||||
try {
|
||||
kc.loadFromDefault();
|
||||
console.log('[K8s] Loaded default kubeconfig');
|
||||
} catch (e) {
|
||||
k8sError = 'No k8s config available';
|
||||
console.log('[K8s] No config available - k8s routes disabled');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
appsApi = kc.makeApiClient(k8s.AppsV1Api);
|
||||
return appsApi;
|
||||
} catch (e: any) {
|
||||
k8sError = e.message;
|
||||
console.error('[K8s] Failed to initialize client:', e.message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
||||
const WORKER_DEPLOYMENT = 'scraper-worker';
|
||||
|
||||
/**
|
||||
* GET /api/k8s/workers
|
||||
* Get current worker deployment status
|
||||
*/
|
||||
router.get('/workers', async (_req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
|
||||
if (!client) {
|
||||
return res.json({
|
||||
success: true,
|
||||
available: false,
|
||||
error: k8sError || 'K8s not available',
|
||||
replicas: 0,
|
||||
readyReplicas: 0,
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
const deployment = await client.readNamespacedDeployment({
|
||||
name: WORKER_DEPLOYMENT,
|
||||
namespace: NAMESPACE,
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
available: true,
|
||||
replicas: deployment.spec?.replicas || 0,
|
||||
readyReplicas: deployment.status?.readyReplicas || 0,
|
||||
availableReplicas: deployment.status?.availableReplicas || 0,
|
||||
updatedReplicas: deployment.status?.updatedReplicas || 0,
|
||||
});
|
||||
} catch (e: any) {
|
||||
console.error('[K8s] Error getting deployment:', e.message);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: e.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/k8s/workers/scale
|
||||
* Scale worker deployment
|
||||
* Body: { replicas: number }
|
||||
*/
|
||||
router.post('/workers/scale', async (req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
|
||||
if (!client) {
|
||||
return res.status(503).json({
|
||||
success: false,
|
||||
error: k8sError || 'K8s not available',
|
||||
});
|
||||
}
|
||||
|
||||
const { replicas } = req.body;
|
||||
|
||||
if (typeof replicas !== 'number' || replicas < 0 || replicas > 50) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'replicas must be a number between 0 and 50',
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
// Patch the deployment to set replicas
|
||||
await client.patchNamespacedDeploymentScale({
|
||||
name: WORKER_DEPLOYMENT,
|
||||
namespace: NAMESPACE,
|
||||
body: { spec: { replicas } },
|
||||
});
|
||||
|
||||
console.log(`[K8s] Scaled ${WORKER_DEPLOYMENT} to ${replicas} replicas`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
replicas,
|
||||
message: `Scaled to ${replicas} workers`,
|
||||
});
|
||||
} catch (e: any) {
|
||||
console.error('[K8s] Error scaling deployment:', e.message);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: e.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
@@ -291,107 +291,6 @@ router.get('/stores/:id/summary', async (req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/markets/stores/:id/crawl-history
|
||||
* Get crawl history for a specific store
|
||||
*/
|
||||
router.get('/stores/:id/crawl-history', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
const { limit = '50' } = req.query;
|
||||
const dispensaryId = parseInt(id, 10);
|
||||
const limitNum = Math.min(parseInt(limit as string, 10), 100);
|
||||
|
||||
// Get crawl history from crawl_orchestration_traces
|
||||
const { rows: historyRows } = await pool.query(`
|
||||
SELECT
|
||||
id,
|
||||
run_id,
|
||||
profile_key,
|
||||
crawler_module,
|
||||
state_at_start,
|
||||
state_at_end,
|
||||
total_steps,
|
||||
duration_ms,
|
||||
success,
|
||||
error_message,
|
||||
products_found,
|
||||
started_at,
|
||||
completed_at
|
||||
FROM crawl_orchestration_traces
|
||||
WHERE dispensary_id = $1
|
||||
ORDER BY started_at DESC
|
||||
LIMIT $2
|
||||
`, [dispensaryId, limitNum]);
|
||||
|
||||
// Get next scheduled crawl if available
|
||||
const { rows: scheduleRows } = await pool.query(`
|
||||
SELECT
|
||||
js.id as schedule_id,
|
||||
js.job_name,
|
||||
js.enabled,
|
||||
js.base_interval_minutes,
|
||||
js.jitter_minutes,
|
||||
js.next_run_at,
|
||||
js.last_run_at,
|
||||
js.last_status
|
||||
FROM job_schedules js
|
||||
WHERE js.enabled = true
|
||||
AND js.job_config->>'dispensaryId' = $1::text
|
||||
ORDER BY js.next_run_at
|
||||
LIMIT 1
|
||||
`, [dispensaryId.toString()]);
|
||||
|
||||
// Get dispensary info for slug
|
||||
const { rows: dispRows } = await pool.query(`
|
||||
SELECT
|
||||
id,
|
||||
name,
|
||||
dba_name,
|
||||
slug,
|
||||
state,
|
||||
city,
|
||||
menu_type,
|
||||
platform_dispensary_id,
|
||||
last_menu_scrape
|
||||
FROM dispensaries
|
||||
WHERE id = $1
|
||||
`, [dispensaryId]);
|
||||
|
||||
res.json({
|
||||
dispensary: dispRows[0] || null,
|
||||
history: historyRows.map(row => ({
|
||||
id: row.id,
|
||||
runId: row.run_id,
|
||||
profileKey: row.profile_key,
|
||||
crawlerModule: row.crawler_module,
|
||||
stateAtStart: row.state_at_start,
|
||||
stateAtEnd: row.state_at_end,
|
||||
totalSteps: row.total_steps,
|
||||
durationMs: row.duration_ms,
|
||||
success: row.success,
|
||||
errorMessage: row.error_message,
|
||||
productsFound: row.products_found,
|
||||
startedAt: row.started_at?.toISOString() || null,
|
||||
completedAt: row.completed_at?.toISOString() || null,
|
||||
})),
|
||||
nextSchedule: scheduleRows[0] ? {
|
||||
scheduleId: scheduleRows[0].schedule_id,
|
||||
jobName: scheduleRows[0].job_name,
|
||||
enabled: scheduleRows[0].enabled,
|
||||
baseIntervalMinutes: scheduleRows[0].base_interval_minutes,
|
||||
jitterMinutes: scheduleRows[0].jitter_minutes,
|
||||
nextRunAt: scheduleRows[0].next_run_at?.toISOString() || null,
|
||||
lastRunAt: scheduleRows[0].last_run_at?.toISOString() || null,
|
||||
lastStatus: scheduleRows[0].last_status,
|
||||
} : null,
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[Markets] Error fetching crawl history:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/markets/stores/:id/products
|
||||
* Get products for a store with filtering and pagination
|
||||
|
||||
@@ -78,14 +78,14 @@ router.get('/metrics', async (_req: Request, res: Response) => {
|
||||
|
||||
/**
|
||||
* GET /api/admin/orchestrator/states
|
||||
* Returns array of states with at least one crawl-enabled dispensary
|
||||
* Returns array of states with at least one known dispensary
|
||||
*/
|
||||
router.get('/states', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const { rows } = await pool.query(`
|
||||
SELECT DISTINCT state, COUNT(*) as store_count
|
||||
FROM dispensaries
|
||||
WHERE state IS NOT NULL AND crawl_enabled = true
|
||||
WHERE state IS NOT NULL
|
||||
GROUP BY state
|
||||
ORDER BY state
|
||||
`);
|
||||
|
||||
@@ -1,334 +0,0 @@
|
||||
/**
|
||||
* Payload Routes
|
||||
*
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: API access to raw crawl payloads.
|
||||
*
|
||||
* Endpoints:
|
||||
* - GET /api/payloads - List payload metadata (paginated)
|
||||
* - GET /api/payloads/:id - Get payload metadata by ID
|
||||
* - GET /api/payloads/:id/data - Get full payload JSON
|
||||
* - GET /api/payloads/store/:dispensaryId - List payloads for a store
|
||||
* - GET /api/payloads/store/:dispensaryId/latest - Get latest payload for a store
|
||||
* - GET /api/payloads/store/:dispensaryId/diff - Diff two payloads
|
||||
*/
|
||||
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { getPool } from '../db/pool';
|
||||
import {
|
||||
loadRawPayloadById,
|
||||
getLatestPayload,
|
||||
getRecentPayloads,
|
||||
listPayloadMetadata,
|
||||
} from '../utils/payload-storage';
|
||||
import { Pool } from 'pg';
|
||||
|
||||
const router = Router();
|
||||
|
||||
// Get pool instance for queries
|
||||
const getDbPool = (): Pool => getPool() as unknown as Pool;
|
||||
|
||||
/**
|
||||
* GET /api/payloads
|
||||
* List payload metadata (paginated)
|
||||
*/
|
||||
router.get('/', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const pool = getDbPool();
|
||||
const limit = Math.min(parseInt(req.query.limit as string) || 50, 100);
|
||||
const offset = parseInt(req.query.offset as string) || 0;
|
||||
const dispensaryId = req.query.dispensary_id ? parseInt(req.query.dispensary_id as string) : undefined;
|
||||
|
||||
const payloads = await listPayloadMetadata(pool, {
|
||||
dispensaryId,
|
||||
limit,
|
||||
offset,
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
payloads,
|
||||
pagination: { limit, offset },
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[Payloads] List error:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/payloads/:id
|
||||
* Get payload metadata by ID
|
||||
*/
|
||||
router.get('/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const pool = getDbPool();
|
||||
const id = parseInt(req.params.id);
|
||||
|
||||
const result = await pool.query(`
|
||||
SELECT
|
||||
p.id,
|
||||
p.dispensary_id,
|
||||
p.crawl_run_id,
|
||||
p.storage_path,
|
||||
p.product_count,
|
||||
p.size_bytes,
|
||||
p.size_bytes_raw,
|
||||
p.fetched_at,
|
||||
p.processed_at,
|
||||
p.checksum_sha256,
|
||||
d.name as dispensary_name
|
||||
FROM raw_crawl_payloads p
|
||||
LEFT JOIN dispensaries d ON d.id = p.dispensary_id
|
||||
WHERE p.id = $1
|
||||
`, [id]);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(404).json({ success: false, error: 'Payload not found' });
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
payload: result.rows[0],
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[Payloads] Get error:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/payloads/:id/data
|
||||
* Get full payload JSON (decompressed from disk)
|
||||
*/
|
||||
router.get('/:id/data', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const pool = getDbPool();
|
||||
const id = parseInt(req.params.id);
|
||||
|
||||
const result = await loadRawPayloadById(pool, id);
|
||||
|
||||
if (!result) {
|
||||
return res.status(404).json({ success: false, error: 'Payload not found' });
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
metadata: result.metadata,
|
||||
data: result.payload,
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[Payloads] Get data error:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/payloads/store/:dispensaryId
|
||||
* List payloads for a specific store
|
||||
*/
|
||||
router.get('/store/:dispensaryId', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const pool = getDbPool();
|
||||
const dispensaryId = parseInt(req.params.dispensaryId);
|
||||
const limit = Math.min(parseInt(req.query.limit as string) || 20, 100);
|
||||
const offset = parseInt(req.query.offset as string) || 0;
|
||||
|
||||
const payloads = await listPayloadMetadata(pool, {
|
||||
dispensaryId,
|
||||
limit,
|
||||
offset,
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
dispensaryId,
|
||||
payloads,
|
||||
pagination: { limit, offset },
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[Payloads] Store list error:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/payloads/store/:dispensaryId/latest
|
||||
* Get the latest payload for a store (with full data)
|
||||
*/
|
||||
router.get('/store/:dispensaryId/latest', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const pool = getDbPool();
|
||||
const dispensaryId = parseInt(req.params.dispensaryId);
|
||||
|
||||
const result = await getLatestPayload(pool, dispensaryId);
|
||||
|
||||
if (!result) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: `No payloads found for dispensary ${dispensaryId}`,
|
||||
});
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
metadata: result.metadata,
|
||||
data: result.payload,
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[Payloads] Latest error:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/payloads/store/:dispensaryId/diff
|
||||
* Compare two payloads for a store
|
||||
*
|
||||
* Query params:
|
||||
* - from: payload ID (older)
|
||||
* - to: payload ID (newer) - optional, defaults to latest
|
||||
*/
|
||||
router.get('/store/:dispensaryId/diff', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const pool = getDbPool();
|
||||
const dispensaryId = parseInt(req.params.dispensaryId);
|
||||
const fromId = req.query.from ? parseInt(req.query.from as string) : undefined;
|
||||
const toId = req.query.to ? parseInt(req.query.to as string) : undefined;
|
||||
|
||||
let fromPayload: any;
|
||||
let toPayload: any;
|
||||
|
||||
if (fromId && toId) {
|
||||
// Load specific payloads
|
||||
const [from, to] = await Promise.all([
|
||||
loadRawPayloadById(pool, fromId),
|
||||
loadRawPayloadById(pool, toId),
|
||||
]);
|
||||
fromPayload = from;
|
||||
toPayload = to;
|
||||
} else {
|
||||
// Load two most recent
|
||||
const recent = await getRecentPayloads(pool, dispensaryId, 2);
|
||||
if (recent.length < 2) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'Need at least 2 payloads to diff. Only found ' + recent.length,
|
||||
});
|
||||
}
|
||||
toPayload = recent[0]; // Most recent
|
||||
fromPayload = recent[1]; // Previous
|
||||
}
|
||||
|
||||
if (!fromPayload || !toPayload) {
|
||||
return res.status(404).json({ success: false, error: 'One or both payloads not found' });
|
||||
}
|
||||
|
||||
// Build product maps by ID
|
||||
const fromProducts = new Map<string, any>();
|
||||
const toProducts = new Map<string, any>();
|
||||
|
||||
for (const p of fromPayload.payload.products || []) {
|
||||
const id = p._id || p.id;
|
||||
if (id) fromProducts.set(id, p);
|
||||
}
|
||||
|
||||
for (const p of toPayload.payload.products || []) {
|
||||
const id = p._id || p.id;
|
||||
if (id) toProducts.set(id, p);
|
||||
}
|
||||
|
||||
// Find differences
|
||||
const added: any[] = [];
|
||||
const removed: any[] = [];
|
||||
const priceChanges: any[] = [];
|
||||
const stockChanges: any[] = [];
|
||||
|
||||
// Products in "to" but not in "from" = added
|
||||
for (const [id, product] of toProducts) {
|
||||
if (!fromProducts.has(id)) {
|
||||
added.push({
|
||||
id,
|
||||
name: product.name,
|
||||
brand: product.brand?.name,
|
||||
price: product.Prices?.[0]?.price,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Products in "from" but not in "to" = removed
|
||||
for (const [id, product] of fromProducts) {
|
||||
if (!toProducts.has(id)) {
|
||||
removed.push({
|
||||
id,
|
||||
name: product.name,
|
||||
brand: product.brand?.name,
|
||||
price: product.Prices?.[0]?.price,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Products in both - check for changes
|
||||
for (const [id, toProduct] of toProducts) {
|
||||
const fromProduct = fromProducts.get(id);
|
||||
if (!fromProduct) continue;
|
||||
|
||||
const fromPrice = fromProduct.Prices?.[0]?.price;
|
||||
const toPrice = toProduct.Prices?.[0]?.price;
|
||||
|
||||
if (fromPrice !== toPrice) {
|
||||
priceChanges.push({
|
||||
id,
|
||||
name: toProduct.name,
|
||||
brand: toProduct.brand?.name,
|
||||
oldPrice: fromPrice,
|
||||
newPrice: toPrice,
|
||||
change: toPrice && fromPrice ? toPrice - fromPrice : null,
|
||||
});
|
||||
}
|
||||
|
||||
const fromStock = fromProduct.Status || fromProduct.status;
|
||||
const toStock = toProduct.Status || toProduct.status;
|
||||
|
||||
if (fromStock !== toStock) {
|
||||
stockChanges.push({
|
||||
id,
|
||||
name: toProduct.name,
|
||||
brand: toProduct.brand?.name,
|
||||
oldStatus: fromStock,
|
||||
newStatus: toStock,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
from: {
|
||||
id: fromPayload.metadata.id,
|
||||
fetchedAt: fromPayload.metadata.fetchedAt,
|
||||
productCount: fromPayload.metadata.productCount,
|
||||
},
|
||||
to: {
|
||||
id: toPayload.metadata.id,
|
||||
fetchedAt: toPayload.metadata.fetchedAt,
|
||||
productCount: toPayload.metadata.productCount,
|
||||
},
|
||||
diff: {
|
||||
added: added.length,
|
||||
removed: removed.length,
|
||||
priceChanges: priceChanges.length,
|
||||
stockChanges: stockChanges.length,
|
||||
},
|
||||
details: {
|
||||
added,
|
||||
removed,
|
||||
priceChanges,
|
||||
stockChanges,
|
||||
},
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('[Payloads] Diff error:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
@@ -183,8 +183,8 @@ router.post('/test-all', requireRole('superadmin', 'admin'), async (req, res) =>
|
||||
return res.status(400).json({ error: 'Concurrency must be between 1 and 50' });
|
||||
}
|
||||
|
||||
const { jobId, totalProxies } = await createProxyTestJob(mode, concurrency);
|
||||
res.json({ jobId, total: totalProxies, mode, concurrency, message: `Proxy test job started (mode: ${mode}, concurrency: ${concurrency})` });
|
||||
const jobId = await createProxyTestJob(mode, concurrency);
|
||||
res.json({ jobId, mode, concurrency, message: `Proxy test job started (mode: ${mode}, concurrency: ${concurrency})` });
|
||||
} catch (error: any) {
|
||||
console.error('Error starting proxy test job:', error);
|
||||
res.status(500).json({ error: error.message || 'Failed to start proxy test job' });
|
||||
@@ -195,8 +195,8 @@ router.post('/test-all', requireRole('superadmin', 'admin'), async (req, res) =>
|
||||
router.post('/test-failed', requireRole('superadmin', 'admin'), async (req, res) => {
|
||||
try {
|
||||
const concurrency = parseInt(req.query.concurrency as string) || 10;
|
||||
const { jobId, totalProxies } = await createProxyTestJob('failed', concurrency);
|
||||
res.json({ jobId, total: totalProxies, mode: 'failed', concurrency, message: 'Retesting failed proxies...' });
|
||||
const jobId = await createProxyTestJob('failed', concurrency);
|
||||
res.json({ jobId, mode: 'failed', concurrency, message: 'Retesting failed proxies...' });
|
||||
} catch (error: any) {
|
||||
console.error('Error starting failed proxy test:', error);
|
||||
res.status(500).json({ error: error.message || 'Failed to start proxy test job' });
|
||||
|
||||
@@ -130,12 +130,6 @@ const CONSUMER_TRUSTED_ORIGINS = [
|
||||
'http://localhost:3002',
|
||||
];
|
||||
|
||||
// Wildcard trusted origin patterns (*.domain.com)
|
||||
const CONSUMER_TRUSTED_PATTERNS = [
|
||||
/^https:\/\/([a-z0-9-]+\.)?cannaiq\.co$/,
|
||||
/^https:\/\/([a-z0-9-]+\.)?cannabrands\.app$/,
|
||||
];
|
||||
|
||||
// Trusted IPs for local development (bypass API key auth)
|
||||
const TRUSTED_IPS = ['127.0.0.1', '::1', '::ffff:127.0.0.1'];
|
||||
|
||||
@@ -156,17 +150,8 @@ function isConsumerTrustedRequest(req: Request): boolean {
|
||||
return true;
|
||||
}
|
||||
const origin = req.headers.origin;
|
||||
if (origin) {
|
||||
// Check exact matches
|
||||
if (CONSUMER_TRUSTED_ORIGINS.includes(origin)) {
|
||||
return true;
|
||||
}
|
||||
// Check wildcard patterns
|
||||
for (const pattern of CONSUMER_TRUSTED_PATTERNS) {
|
||||
if (pattern.test(origin)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (origin && CONSUMER_TRUSTED_ORIGINS.includes(origin)) {
|
||||
return true;
|
||||
}
|
||||
const referer = req.headers.referer;
|
||||
if (referer) {
|
||||
@@ -175,18 +160,6 @@ function isConsumerTrustedRequest(req: Request): boolean {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// Check wildcard patterns against referer origin
|
||||
try {
|
||||
const refererUrl = new URL(referer);
|
||||
const refererOrigin = refererUrl.origin;
|
||||
for (const pattern of CONSUMER_TRUSTED_PATTERNS) {
|
||||
if (pattern.test(refererOrigin)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Invalid referer URL, ignore
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -13,12 +13,6 @@ import {
|
||||
TaskFilter,
|
||||
} from '../tasks/task-service';
|
||||
import { pool } from '../db/pool';
|
||||
import {
|
||||
isTaskPoolPaused,
|
||||
pauseTaskPool,
|
||||
resumeTaskPool,
|
||||
getTaskPoolStatus,
|
||||
} from '../tasks/task-pool-state';
|
||||
|
||||
const router = Router();
|
||||
|
||||
@@ -151,36 +145,6 @@ router.get('/:id', async (req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* DELETE /api/tasks/:id
|
||||
* Delete a specific task by ID
|
||||
* Only allows deletion of failed, completed, or pending tasks (not running)
|
||||
*/
|
||||
router.delete('/:id', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const taskId = parseInt(req.params.id, 10);
|
||||
|
||||
// First check if task exists and its status
|
||||
const task = await taskService.getTask(taskId);
|
||||
if (!task) {
|
||||
return res.status(404).json({ error: 'Task not found' });
|
||||
}
|
||||
|
||||
// Don't allow deleting running tasks
|
||||
if (task.status === 'running' || task.status === 'claimed') {
|
||||
return res.status(400).json({ error: 'Cannot delete a running or claimed task' });
|
||||
}
|
||||
|
||||
// Delete the task
|
||||
await pool.query('DELETE FROM worker_tasks WHERE id = $1', [taskId]);
|
||||
|
||||
res.json({ success: true, message: `Task ${taskId} deleted` });
|
||||
} catch (error: unknown) {
|
||||
console.error('Error deleting task:', error);
|
||||
res.status(500).json({ error: 'Failed to delete task' });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/tasks
|
||||
* Create a new task
|
||||
@@ -598,42 +562,4 @@ router.post('/migration/full-migrate', async (req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/tasks/pool/status
|
||||
* Check if task pool is paused
|
||||
*/
|
||||
router.get('/pool/status', async (_req: Request, res: Response) => {
|
||||
const status = getTaskPoolStatus();
|
||||
res.json({
|
||||
success: true,
|
||||
...status,
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/tasks/pool/pause
|
||||
* Pause the task pool - workers won't pick up new tasks
|
||||
*/
|
||||
router.post('/pool/pause', async (_req: Request, res: Response) => {
|
||||
pauseTaskPool();
|
||||
res.json({
|
||||
success: true,
|
||||
paused: true,
|
||||
message: 'Task pool paused - workers will not pick up new tasks',
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/tasks/pool/resume
|
||||
* Resume the task pool - workers will pick up tasks again
|
||||
*/
|
||||
router.post('/pool/resume', async (_req: Request, res: Response) => {
|
||||
resumeTaskPool();
|
||||
res.json({
|
||||
success: true,
|
||||
paused: false,
|
||||
message: 'Task pool resumed - workers will pick up new tasks',
|
||||
});
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
@@ -14,36 +14,23 @@ router.get('/', async (req: AuthRequest, res) => {
|
||||
try {
|
||||
const { search, domain } = req.query;
|
||||
|
||||
// Check which columns exist (schema-tolerant)
|
||||
const columnsResult = await pool.query(`
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'users' AND column_name IN ('first_name', 'last_name', 'phone', 'domain')
|
||||
`);
|
||||
const existingColumns = new Set(columnsResult.rows.map((r: any) => r.column_name));
|
||||
|
||||
// Build column list based on what exists
|
||||
const selectCols = ['id', 'email', 'role', 'created_at', 'updated_at'];
|
||||
if (existingColumns.has('first_name')) selectCols.push('first_name');
|
||||
if (existingColumns.has('last_name')) selectCols.push('last_name');
|
||||
if (existingColumns.has('phone')) selectCols.push('phone');
|
||||
if (existingColumns.has('domain')) selectCols.push('domain');
|
||||
|
||||
let query = `SELECT ${selectCols.join(', ')} FROM users WHERE 1=1`;
|
||||
let query = `
|
||||
SELECT id, email, role, first_name, last_name, phone, domain, created_at, updated_at
|
||||
FROM users
|
||||
WHERE 1=1
|
||||
`;
|
||||
const params: any[] = [];
|
||||
let paramIndex = 1;
|
||||
|
||||
// Search by email (and optionally first_name, last_name if they exist)
|
||||
// Search by email, first_name, or last_name
|
||||
if (search && typeof search === 'string') {
|
||||
const searchClauses = ['email ILIKE $' + paramIndex];
|
||||
if (existingColumns.has('first_name')) searchClauses.push('first_name ILIKE $' + paramIndex);
|
||||
if (existingColumns.has('last_name')) searchClauses.push('last_name ILIKE $' + paramIndex);
|
||||
query += ` AND (${searchClauses.join(' OR ')})`;
|
||||
query += ` AND (email ILIKE $${paramIndex} OR first_name ILIKE $${paramIndex} OR last_name ILIKE $${paramIndex})`;
|
||||
params.push(`%${search}%`);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
// Filter by domain (if column exists)
|
||||
if (domain && typeof domain === 'string' && existingColumns.has('domain')) {
|
||||
// Filter by domain
|
||||
if (domain && typeof domain === 'string') {
|
||||
query += ` AND domain = $${paramIndex}`;
|
||||
params.push(domain);
|
||||
paramIndex++;
|
||||
@@ -63,22 +50,8 @@ router.get('/', async (req: AuthRequest, res) => {
|
||||
router.get('/:id', async (req: AuthRequest, res) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
|
||||
// Check which columns exist (schema-tolerant)
|
||||
const columnsResult = await pool.query(`
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'users' AND column_name IN ('first_name', 'last_name', 'phone', 'domain')
|
||||
`);
|
||||
const existingColumns = new Set(columnsResult.rows.map((r: any) => r.column_name));
|
||||
|
||||
const selectCols = ['id', 'email', 'role', 'created_at', 'updated_at'];
|
||||
if (existingColumns.has('first_name')) selectCols.push('first_name');
|
||||
if (existingColumns.has('last_name')) selectCols.push('last_name');
|
||||
if (existingColumns.has('phone')) selectCols.push('phone');
|
||||
if (existingColumns.has('domain')) selectCols.push('domain');
|
||||
|
||||
const result = await pool.query(`
|
||||
SELECT ${selectCols.join(', ')}
|
||||
SELECT id, email, role, first_name, last_name, phone, domain, created_at, updated_at
|
||||
FROM users
|
||||
WHERE id = $1
|
||||
`, [id]);
|
||||
|
||||
@@ -70,20 +70,21 @@ router.post('/register', async (req: Request, res: Response) => {
|
||||
);
|
||||
|
||||
if (existing.rows.length > 0) {
|
||||
// Re-activate existing worker - keep existing pod_name (fantasy name), don't overwrite with K8s name
|
||||
// Re-activate existing worker
|
||||
const { rows } = await pool.query(`
|
||||
UPDATE worker_registry
|
||||
SET status = 'active',
|
||||
role = $1,
|
||||
hostname = $2,
|
||||
ip_address = $3,
|
||||
pod_name = $2,
|
||||
hostname = $3,
|
||||
ip_address = $4,
|
||||
last_heartbeat_at = NOW(),
|
||||
started_at = NOW(),
|
||||
metadata = $4,
|
||||
metadata = $5,
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = $5
|
||||
RETURNING id, worker_id, friendly_name, pod_name, role
|
||||
`, [role, finalHostname, clientIp, metadata, finalWorkerId]);
|
||||
WHERE worker_id = $6
|
||||
RETURNING id, worker_id, friendly_name, role
|
||||
`, [role, pod_name, finalHostname, clientIp, metadata, finalWorkerId]);
|
||||
|
||||
const worker = rows[0];
|
||||
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
||||
@@ -104,13 +105,13 @@ router.post('/register', async (req: Request, res: Response) => {
|
||||
const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]);
|
||||
const friendlyName = nameResult.rows[0].name;
|
||||
|
||||
// Register the worker - use friendlyName as pod_name (not K8s name)
|
||||
// Register the worker
|
||||
const { rows } = await pool.query(`
|
||||
INSERT INTO worker_registry (
|
||||
worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7)
|
||||
RETURNING id, worker_id, friendly_name, pod_name, role
|
||||
`, [finalWorkerId, friendlyName, role, friendlyName, finalHostname, clientIp, metadata]);
|
||||
RETURNING id, worker_id, friendly_name, role
|
||||
`, [finalWorkerId, friendlyName, role, pod_name, finalHostname, clientIp, metadata]);
|
||||
|
||||
const worker = rows[0];
|
||||
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
|
||||
@@ -137,36 +138,17 @@ router.post('/register', async (req: Request, res: Response) => {
|
||||
*
|
||||
* Body:
|
||||
* - worker_id: string (required)
|
||||
* - current_task_id: number (optional) - task currently being processed (primary task)
|
||||
* - current_task_ids: number[] (optional) - all tasks currently being processed (concurrent)
|
||||
* - active_task_count: number (optional) - number of tasks currently running
|
||||
* - max_concurrent_tasks: number (optional) - max concurrent tasks this worker can handle
|
||||
* - current_task_id: number (optional) - task currently being processed
|
||||
* - status: string (optional) - 'active', 'idle'
|
||||
* - resources: object (optional) - memory_mb, cpu_user_ms, cpu_system_ms, etc.
|
||||
*/
|
||||
router.post('/heartbeat', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const {
|
||||
worker_id,
|
||||
current_task_id,
|
||||
current_task_ids,
|
||||
active_task_count,
|
||||
max_concurrent_tasks,
|
||||
status = 'active',
|
||||
resources
|
||||
} = req.body;
|
||||
const { worker_id, current_task_id, status = 'active', resources } = req.body;
|
||||
|
||||
if (!worker_id) {
|
||||
return res.status(400).json({ success: false, error: 'worker_id is required' });
|
||||
}
|
||||
|
||||
// Build metadata object with all the new fields
|
||||
const metadata: Record<string, unknown> = {};
|
||||
if (resources) Object.assign(metadata, resources);
|
||||
if (current_task_ids) metadata.current_task_ids = current_task_ids;
|
||||
if (active_task_count !== undefined) metadata.active_task_count = active_task_count;
|
||||
if (max_concurrent_tasks !== undefined) metadata.max_concurrent_tasks = max_concurrent_tasks;
|
||||
|
||||
// Store resources in metadata jsonb column
|
||||
const { rows } = await pool.query(`
|
||||
UPDATE worker_registry
|
||||
@@ -177,7 +159,7 @@ router.post('/heartbeat', async (req: Request, res: Response) => {
|
||||
updated_at = NOW()
|
||||
WHERE worker_id = $3
|
||||
RETURNING id, friendly_name, status
|
||||
`, [current_task_id || null, status, worker_id, Object.keys(metadata).length > 0 ? JSON.stringify(metadata) : null]);
|
||||
`, [current_task_id || null, status, worker_id, resources ? JSON.stringify(resources) : null]);
|
||||
|
||||
if (rows.length === 0) {
|
||||
return res.status(404).json({ success: false, error: 'Worker not found - please register first' });
|
||||
@@ -291,29 +273,6 @@ router.post('/deregister', async (req: Request, res: Response) => {
|
||||
*/
|
||||
router.get('/workers', async (req: Request, res: Response) => {
|
||||
try {
|
||||
// Check if worker_registry table exists
|
||||
const tableCheck = await pool.query(`
|
||||
SELECT EXISTS (
|
||||
SELECT FROM information_schema.tables
|
||||
WHERE table_name = 'worker_registry'
|
||||
) as exists
|
||||
`);
|
||||
|
||||
if (!tableCheck.rows[0].exists) {
|
||||
// Return empty result if table doesn't exist yet
|
||||
return res.json({
|
||||
success: true,
|
||||
workers: [],
|
||||
summary: {
|
||||
active_count: 0,
|
||||
idle_count: 0,
|
||||
offline_count: 0,
|
||||
total_count: 0,
|
||||
active_roles: 0
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const { status, role, include_terminated = 'false' } = req.query;
|
||||
|
||||
let whereClause = include_terminated === 'true' ? 'WHERE 1=1' : "WHERE status != 'terminated'";
|
||||
@@ -348,21 +307,12 @@ router.get('/workers', async (req: Request, res: Response) => {
|
||||
tasks_completed,
|
||||
tasks_failed,
|
||||
current_task_id,
|
||||
-- Concurrent task fields from metadata
|
||||
(metadata->>'current_task_ids')::jsonb as current_task_ids,
|
||||
(metadata->>'active_task_count')::int as active_task_count,
|
||||
(metadata->>'max_concurrent_tasks')::int as max_concurrent_tasks,
|
||||
-- Decommission fields
|
||||
COALESCE(decommission_requested, false) as decommission_requested,
|
||||
decommission_reason,
|
||||
-- Full metadata for resources
|
||||
metadata,
|
||||
EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat,
|
||||
CASE
|
||||
WHEN status = 'offline' OR status = 'terminated' THEN status
|
||||
WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
|
||||
WHEN current_task_id IS NOT NULL THEN 'busy'
|
||||
WHEN (metadata->>'active_task_count')::int > 0 THEN 'busy'
|
||||
ELSE 'ready'
|
||||
END as health_status,
|
||||
created_at
|
||||
@@ -699,163 +649,4 @@ router.get('/capacity', async (_req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// WORKER LIFECYCLE MANAGEMENT
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* POST /api/worker-registry/workers/:workerId/decommission
|
||||
* Request graceful decommission of a worker (will stop after current task)
|
||||
*/
|
||||
router.post('/workers/:workerId/decommission', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { workerId } = req.params;
|
||||
const { reason, issued_by } = req.body;
|
||||
|
||||
// Update worker_registry to flag for decommission
|
||||
const result = await pool.query(
|
||||
`UPDATE worker_registry
|
||||
SET decommission_requested = true,
|
||||
decommission_reason = $2,
|
||||
decommission_requested_at = NOW()
|
||||
WHERE worker_id = $1
|
||||
RETURNING friendly_name, status, current_task_id`,
|
||||
[workerId, reason || 'Manual decommission from admin']
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||
}
|
||||
|
||||
const worker = result.rows[0];
|
||||
|
||||
// Also log to worker_commands for audit trail
|
||||
await pool.query(
|
||||
`INSERT INTO worker_commands (worker_id, command, reason, issued_by)
|
||||
VALUES ($1, 'decommission', $2, $3)
|
||||
ON CONFLICT DO NOTHING`,
|
||||
[workerId, reason || 'Manual decommission', issued_by || 'admin']
|
||||
).catch(() => {
|
||||
// Table might not exist yet - ignore
|
||||
});
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: worker.current_task_id
|
||||
? `Worker ${worker.friendly_name} will stop after completing task #${worker.current_task_id}`
|
||||
: `Worker ${worker.friendly_name} will stop on next poll`,
|
||||
worker: {
|
||||
friendly_name: worker.friendly_name,
|
||||
status: worker.status,
|
||||
current_task_id: worker.current_task_id,
|
||||
decommission_requested: true
|
||||
}
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/worker-registry/workers/:workerId/cancel-decommission
|
||||
* Cancel a pending decommission request
|
||||
*/
|
||||
router.post('/workers/:workerId/cancel-decommission', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { workerId } = req.params;
|
||||
|
||||
const result = await pool.query(
|
||||
`UPDATE worker_registry
|
||||
SET decommission_requested = false,
|
||||
decommission_reason = NULL,
|
||||
decommission_requested_at = NULL
|
||||
WHERE worker_id = $1
|
||||
RETURNING friendly_name`,
|
||||
[workerId]
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return res.status(404).json({ success: false, error: 'Worker not found' });
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Decommission cancelled for ${result.rows[0].friendly_name}`
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/worker-registry/spawn
|
||||
* Spawn a new worker in the current pod (only works in multi-worker-per-pod mode)
|
||||
* For now, this is a placeholder - actual spawning requires the pod supervisor
|
||||
*/
|
||||
router.post('/spawn', async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { pod_name, role } = req.body;
|
||||
|
||||
// For now, we can't actually spawn workers from the API
|
||||
// This would require a supervisor process in each pod that listens for spawn commands
|
||||
// Instead, return instructions for how to scale
|
||||
res.json({
|
||||
success: false,
|
||||
error: 'Direct worker spawning not yet implemented',
|
||||
instructions: 'To add workers, scale the K8s deployment: kubectl scale deployment/scraper-worker --replicas=N'
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/worker-registry/pods
|
||||
* Get workers grouped by pod
|
||||
*/
|
||||
router.get('/pods', async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const { rows } = await pool.query(`
|
||||
SELECT
|
||||
COALESCE(pod_name, 'Unknown') as pod_name,
|
||||
COUNT(*) as worker_count,
|
||||
COUNT(*) FILTER (WHERE current_task_id IS NOT NULL) as busy_count,
|
||||
COUNT(*) FILTER (WHERE current_task_id IS NULL) as idle_count,
|
||||
SUM(tasks_completed) as total_completed,
|
||||
SUM(tasks_failed) as total_failed,
|
||||
SUM((metadata->>'memory_rss_mb')::int) as total_memory_mb,
|
||||
array_agg(json_build_object(
|
||||
'worker_id', worker_id,
|
||||
'friendly_name', friendly_name,
|
||||
'status', status,
|
||||
'current_task_id', current_task_id,
|
||||
'tasks_completed', tasks_completed,
|
||||
'tasks_failed', tasks_failed,
|
||||
'decommission_requested', COALESCE(decommission_requested, false),
|
||||
'last_heartbeat_at', last_heartbeat_at
|
||||
)) as workers
|
||||
FROM worker_registry
|
||||
WHERE status NOT IN ('offline', 'terminated')
|
||||
GROUP BY pod_name
|
||||
ORDER BY pod_name
|
||||
`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
pods: rows.map(row => ({
|
||||
pod_name: row.pod_name,
|
||||
worker_count: parseInt(row.worker_count),
|
||||
busy_count: parseInt(row.busy_count),
|
||||
idle_count: parseInt(row.idle_count),
|
||||
total_completed: parseInt(row.total_completed) || 0,
|
||||
total_failed: parseInt(row.total_failed) || 0,
|
||||
total_memory_mb: parseInt(row.total_memory_mb) || 0,
|
||||
workers: row.workers
|
||||
}))
|
||||
});
|
||||
} catch (error: any) {
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
@@ -17,234 +17,13 @@
|
||||
* GET /api/monitor/jobs - Get recent job history
|
||||
* GET /api/monitor/active-jobs - Get currently running jobs
|
||||
* GET /api/monitor/summary - Get monitoring summary
|
||||
*
|
||||
* K8s Scaling (added 2024-12-10):
|
||||
* GET /api/workers/k8s/replicas - Get current replica count
|
||||
* POST /api/workers/k8s/scale - Scale worker replicas up/down
|
||||
*/
|
||||
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { pool } from '../db/pool';
|
||||
import * as k8s from '@kubernetes/client-node';
|
||||
|
||||
const router = Router();
|
||||
|
||||
// ============================================================
|
||||
// K8S SCALING CONFIGURATION (added 2024-12-10)
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Admin can scale workers from UI
|
||||
// ============================================================
|
||||
|
||||
const K8S_NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
|
||||
const K8S_DEPLOYMENT_NAME = process.env.K8S_WORKER_DEPLOYMENT || 'scraper-worker';
|
||||
|
||||
// Initialize K8s client - uses in-cluster config when running in K8s,
|
||||
// or kubeconfig when running locally
|
||||
let k8sAppsApi: k8s.AppsV1Api | null = null;
|
||||
|
||||
function getK8sClient(): k8s.AppsV1Api | null {
|
||||
if (k8sAppsApi) return k8sAppsApi;
|
||||
|
||||
try {
|
||||
const kc = new k8s.KubeConfig();
|
||||
|
||||
// Try in-cluster config first (when running as a pod)
|
||||
// Falls back to default kubeconfig (~/.kube/config) for local dev
|
||||
try {
|
||||
kc.loadFromCluster();
|
||||
} catch {
|
||||
kc.loadFromDefault();
|
||||
}
|
||||
|
||||
k8sAppsApi = kc.makeApiClient(k8s.AppsV1Api);
|
||||
return k8sAppsApi;
|
||||
} catch (err: any) {
|
||||
console.warn('[Workers] K8s client not available:', err.message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// K8S SCALING ROUTES (added 2024-12-10)
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Admin can scale workers from UI
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* GET /api/workers/k8s/replicas - Get current worker replica count
|
||||
* Returns current and desired replica counts from the Deployment
|
||||
*/
|
||||
router.get('/k8s/replicas', async (_req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
|
||||
if (!client) {
|
||||
return res.status(503).json({
|
||||
success: false,
|
||||
error: 'K8s client not available (not running in cluster or no kubeconfig)',
|
||||
replicas: null,
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await client.readNamespacedDeployment({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
|
||||
const deployment = response;
|
||||
res.json({
|
||||
success: true,
|
||||
replicas: {
|
||||
current: deployment.status?.readyReplicas || 0,
|
||||
desired: deployment.spec?.replicas || 0,
|
||||
available: deployment.status?.availableReplicas || 0,
|
||||
updated: deployment.status?.updatedReplicas || 0,
|
||||
},
|
||||
deployment: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
} catch (err: any) {
|
||||
console.error('[Workers] K8s replicas error:', err.body?.message || err.message);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: err.body?.message || err.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/workers/k8s/scale - Scale worker replicas
|
||||
* Body: { replicas: number } - desired replica count (0-20)
|
||||
*/
|
||||
router.post('/k8s/scale', async (req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
|
||||
if (!client) {
|
||||
return res.status(503).json({
|
||||
success: false,
|
||||
error: 'K8s client not available (not running in cluster or no kubeconfig)',
|
||||
});
|
||||
}
|
||||
|
||||
const { replicas } = req.body;
|
||||
|
||||
// Validate replica count
|
||||
if (typeof replicas !== 'number' || replicas < 0 || replicas > 20) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'replicas must be a number between 0 and 20',
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
// Get current state first
|
||||
const currentResponse = await client.readNamespacedDeploymentScale({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
const currentReplicas = currentResponse.spec?.replicas || 0;
|
||||
|
||||
// Update scale using replaceNamespacedDeploymentScale
|
||||
await client.replaceNamespacedDeploymentScale({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
body: {
|
||||
apiVersion: 'autoscaling/v1',
|
||||
kind: 'Scale',
|
||||
metadata: {
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
},
|
||||
spec: {
|
||||
replicas: replicas,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`[Workers] Scaled ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${replicas} replicas`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Scaled from ${currentReplicas} to ${replicas} replicas`,
|
||||
previous: currentReplicas,
|
||||
desired: replicas,
|
||||
deployment: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
} catch (err: any) {
|
||||
console.error('[Workers] K8s scale error:', err.body?.message || err.message);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: err.body?.message || err.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* POST /api/workers/k8s/scale-up - Scale up worker replicas by 1
|
||||
* Convenience endpoint for adding a single worker
|
||||
*/
|
||||
router.post('/k8s/scale-up', async (_req: Request, res: Response) => {
|
||||
const client = getK8sClient();
|
||||
|
||||
if (!client) {
|
||||
return res.status(503).json({
|
||||
success: false,
|
||||
error: 'K8s client not available (not running in cluster or no kubeconfig)',
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
// Get current replica count
|
||||
const currentResponse = await client.readNamespacedDeploymentScale({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
const currentReplicas = currentResponse.spec?.replicas || 0;
|
||||
const newReplicas = currentReplicas + 1;
|
||||
|
||||
// Cap at 20 replicas
|
||||
if (newReplicas > 20) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'Maximum replica count (20) reached',
|
||||
});
|
||||
}
|
||||
|
||||
// Scale up by 1
|
||||
await client.replaceNamespacedDeploymentScale({
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
body: {
|
||||
apiVersion: 'autoscaling/v1',
|
||||
kind: 'Scale',
|
||||
metadata: {
|
||||
name: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
},
|
||||
spec: {
|
||||
replicas: newReplicas,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`[Workers] Scaled up ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${newReplicas} replicas`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
message: `Added worker (${currentReplicas} → ${newReplicas} replicas)`,
|
||||
previous: currentReplicas,
|
||||
desired: newReplicas,
|
||||
deployment: K8S_DEPLOYMENT_NAME,
|
||||
namespace: K8S_NAMESPACE,
|
||||
});
|
||||
} catch (err: any) {
|
||||
console.error('[Workers] K8s scale-up error:', err.body?.message || err.message);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: err.body?.message || err.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// ============================================================
|
||||
// STATIC ROUTES (must come before parameterized routes)
|
||||
// ============================================================
|
||||
|
||||
@@ -16,11 +16,10 @@ import {
|
||||
executeGraphQL,
|
||||
startSession,
|
||||
endSession,
|
||||
setCrawlRotator,
|
||||
getFingerprint,
|
||||
GRAPHQL_HASHES,
|
||||
DUTCHIE_CONFIG,
|
||||
} from '../platforms/dutchie';
|
||||
import { CrawlRotator } from '../services/crawl-rotator';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
@@ -109,27 +108,19 @@ async function main() {
|
||||
|
||||
// ============================================================
|
||||
// STEP 2: Start stealth session
|
||||
// Per workflow-12102025.md: Initialize CrawlRotator and start session with menuUrl
|
||||
// ============================================================
|
||||
console.log('┌─────────────────────────────────────────────────────────────┐');
|
||||
console.log('│ STEP 2: Start Stealth Session │');
|
||||
console.log('└─────────────────────────────────────────────────────────────┘');
|
||||
|
||||
// Per workflow-12102025.md: Initialize CrawlRotator (required for sessions)
|
||||
const rotator = new CrawlRotator();
|
||||
setCrawlRotator(rotator);
|
||||
// Use Arizona timezone for this store
|
||||
const session = startSession(disp.state || 'AZ', 'America/Phoenix');
|
||||
|
||||
// Per workflow-12102025.md: startSession takes menuUrl for dynamic Referer
|
||||
const session = startSession(disp.menu_url);
|
||||
|
||||
const fp = session.fingerprint;
|
||||
const fp = getFingerprint();
|
||||
console.log(` Session ID: ${session.sessionId}`);
|
||||
console.log(` Browser: ${fp.browserName} (${fp.deviceCategory})`);
|
||||
console.log(` User-Agent: ${fp.userAgent.slice(0, 60)}...`);
|
||||
console.log(` Accept-Language: ${fp.acceptLanguage}`);
|
||||
console.log(` Referer: ${session.referer}`);
|
||||
console.log(` DNT: ${fp.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`);
|
||||
console.log(` TLS: ${fp.httpFingerprint.curlImpersonateBinary}`);
|
||||
console.log(` Sec-CH-UA: ${fp.secChUa || '(not set)'}`);
|
||||
console.log('');
|
||||
|
||||
// ============================================================
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
/**
|
||||
* Test script for stealth session management
|
||||
*
|
||||
* Per workflow-12102025.md:
|
||||
* - Tests HTTP fingerprinting (browser-specific headers + ordering)
|
||||
* - Tests UA generation (device distribution, browser filtering)
|
||||
* - Tests dynamic Referer per dispensary
|
||||
* Tests:
|
||||
* 1. Per-session fingerprint rotation
|
||||
* 2. Geographic consistency (timezone → Accept-Language)
|
||||
* 3. Proxy location loading from database
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx src/scripts/test-stealth-session.ts
|
||||
@@ -14,142 +14,104 @@ import {
|
||||
startSession,
|
||||
endSession,
|
||||
getCurrentSession,
|
||||
getFingerprint,
|
||||
getRandomFingerprint,
|
||||
getLocaleForTimezone,
|
||||
buildHeaders,
|
||||
setCrawlRotator,
|
||||
} from '../platforms/dutchie';
|
||||
|
||||
import { CrawlRotator } from '../services/crawl-rotator';
|
||||
import {
|
||||
generateHTTPFingerprint,
|
||||
buildRefererFromMenuUrl,
|
||||
BrowserType,
|
||||
} from '../services/http-fingerprint';
|
||||
|
||||
console.log('='.repeat(60));
|
||||
console.log('STEALTH SESSION TEST (per workflow-12102025.md)');
|
||||
console.log('STEALTH SESSION TEST');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
// Initialize CrawlRotator (required for sessions)
|
||||
console.log('\n[Setup] Initializing CrawlRotator...');
|
||||
const rotator = new CrawlRotator();
|
||||
setCrawlRotator(rotator);
|
||||
console.log(' CrawlRotator initialized');
|
||||
|
||||
// Test 1: HTTP Fingerprint Generation
|
||||
console.log('\n[Test 1] HTTP Fingerprint Generation:');
|
||||
const browsers: BrowserType[] = ['Chrome', 'Firefox', 'Safari', 'Edge'];
|
||||
|
||||
for (const browser of browsers) {
|
||||
const httpFp = generateHTTPFingerprint(browser);
|
||||
console.log(` ${browser}:`);
|
||||
console.log(` TLS binary: ${httpFp.curlImpersonateBinary}`);
|
||||
console.log(` DNT: ${httpFp.hasDNT ? 'enabled' : 'disabled'}`);
|
||||
console.log(` Header order: ${httpFp.headerOrder.slice(0, 5).join(', ')}...`);
|
||||
}
|
||||
|
||||
// Test 2: Dynamic Referer from menu URLs
|
||||
console.log('\n[Test 2] Dynamic Referer from Menu URLs:');
|
||||
const testUrls = [
|
||||
'https://dutchie.com/embedded-menu/harvest-of-tempe',
|
||||
'https://dutchie.com/dispensary/zen-leaf-mesa',
|
||||
'/embedded-menu/deeply-rooted',
|
||||
'/dispensary/curaleaf-phoenix',
|
||||
null,
|
||||
// Test 1: Timezone to Locale mapping
|
||||
console.log('\n[Test 1] Timezone to Locale Mapping:');
|
||||
const testTimezones = [
|
||||
'America/Phoenix',
|
||||
'America/Los_Angeles',
|
||||
'America/New_York',
|
||||
'America/Chicago',
|
||||
undefined,
|
||||
'Invalid/Timezone',
|
||||
];
|
||||
|
||||
for (const url of testUrls) {
|
||||
const referer = buildRefererFromMenuUrl(url);
|
||||
console.log(` ${url || '(null/undefined)'}`);
|
||||
console.log(` → ${referer}`);
|
||||
for (const tz of testTimezones) {
|
||||
const locale = getLocaleForTimezone(tz);
|
||||
console.log(` ${tz || '(undefined)'} → ${locale}`);
|
||||
}
|
||||
|
||||
// Test 3: Session with Dynamic Referer
|
||||
console.log('\n[Test 3] Session with Dynamic Referer:');
|
||||
const testMenuUrl = 'https://dutchie.com/dispensary/harvest-of-tempe';
|
||||
console.log(` Starting session with menuUrl: ${testMenuUrl}`);
|
||||
|
||||
const session1 = startSession(testMenuUrl);
|
||||
console.log(` Session ID: ${session1.sessionId}`);
|
||||
console.log(` Browser: ${session1.fingerprint.browserName}`);
|
||||
console.log(` Device: ${session1.fingerprint.deviceCategory}`);
|
||||
console.log(` Referer: ${session1.referer}`);
|
||||
console.log(` DNT: ${session1.fingerprint.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`);
|
||||
console.log(` TLS: ${session1.fingerprint.httpFingerprint.curlImpersonateBinary}`);
|
||||
|
||||
// Test 4: Build Headers (browser-specific order)
|
||||
console.log('\n[Test 4] Build Headers (browser-specific order):');
|
||||
const { headers, orderedHeaders } = buildHeaders(true, 1000);
|
||||
console.log(` Headers built for ${session1.fingerprint.browserName}:`);
|
||||
console.log(` Order: ${orderedHeaders.join(' → ')}`);
|
||||
console.log(` Sample headers:`);
|
||||
console.log(` User-Agent: ${headers['User-Agent']?.slice(0, 50)}...`);
|
||||
console.log(` Accept: ${headers['Accept']}`);
|
||||
console.log(` Accept-Language: ${headers['Accept-Language']}`);
|
||||
console.log(` Referer: ${headers['Referer']}`);
|
||||
if (headers['sec-ch-ua']) {
|
||||
console.log(` sec-ch-ua: ${headers['sec-ch-ua']}`);
|
||||
}
|
||||
if (headers['DNT']) {
|
||||
console.log(` DNT: ${headers['DNT']}`);
|
||||
// Test 2: Random fingerprint selection
|
||||
console.log('\n[Test 2] Random Fingerprint Selection (5 samples):');
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const fp = getRandomFingerprint();
|
||||
console.log(` ${i + 1}. ${fp.userAgent.slice(0, 60)}...`);
|
||||
}
|
||||
|
||||
// Test 3: Session Management
|
||||
console.log('\n[Test 3] Session Management:');
|
||||
|
||||
// Before session - should use default fingerprint
|
||||
console.log(' Before session:');
|
||||
const beforeFp = getFingerprint();
|
||||
console.log(` getFingerprint(): ${beforeFp.userAgent.slice(0, 50)}...`);
|
||||
console.log(` getCurrentSession(): ${getCurrentSession()}`);
|
||||
|
||||
// Start session with Arizona timezone
|
||||
console.log('\n Starting session (AZ, America/Phoenix):');
|
||||
const session1 = startSession('AZ', 'America/Phoenix');
|
||||
console.log(` Session ID: ${session1.sessionId}`);
|
||||
console.log(` Fingerprint UA: ${session1.fingerprint.userAgent.slice(0, 50)}...`);
|
||||
console.log(` Accept-Language: ${session1.fingerprint.acceptLanguage}`);
|
||||
console.log(` Timezone: ${session1.timezone}`);
|
||||
|
||||
// During session - should use session fingerprint
|
||||
console.log('\n During session:');
|
||||
const duringFp = getFingerprint();
|
||||
console.log(` getFingerprint(): ${duringFp.userAgent.slice(0, 50)}...`);
|
||||
console.log(` Same as session? ${duringFp.userAgent === session1.fingerprint.userAgent}`);
|
||||
|
||||
// Test buildHeaders with session
|
||||
console.log('\n buildHeaders() during session:');
|
||||
const headers = buildHeaders('/embedded-menu/test-store');
|
||||
console.log(` User-Agent: ${headers['user-agent'].slice(0, 50)}...`);
|
||||
console.log(` Accept-Language: ${headers['accept-language']}`);
|
||||
console.log(` Origin: ${headers['origin']}`);
|
||||
console.log(` Referer: ${headers['referer']}`);
|
||||
|
||||
// End session
|
||||
console.log('\n Ending session:');
|
||||
endSession();
|
||||
console.log(` getCurrentSession(): ${getCurrentSession()}`);
|
||||
|
||||
// Test 5: Multiple Sessions (UA variety)
|
||||
console.log('\n[Test 5] Multiple Sessions (UA & fingerprint variety):');
|
||||
const sessions: {
|
||||
browser: string;
|
||||
device: string;
|
||||
hasDNT: boolean;
|
||||
}[] = [];
|
||||
|
||||
// Test 4: Multiple sessions should have different fingerprints
|
||||
console.log('\n[Test 4] Multiple Sessions (fingerprint variety):');
|
||||
const fingerprints: string[] = [];
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const session = startSession(`/dispensary/store-${i}`);
|
||||
sessions.push({
|
||||
browser: session.fingerprint.browserName,
|
||||
device: session.fingerprint.deviceCategory,
|
||||
hasDNT: session.fingerprint.httpFingerprint.hasDNT,
|
||||
});
|
||||
const session = startSession('CA', 'America/Los_Angeles');
|
||||
fingerprints.push(session.fingerprint.userAgent);
|
||||
endSession();
|
||||
}
|
||||
|
||||
// Count distribution
|
||||
const browserCounts: Record<string, number> = {};
|
||||
const deviceCounts: Record<string, number> = {};
|
||||
let dntCount = 0;
|
||||
const uniqueCount = new Set(fingerprints).size;
|
||||
console.log(` 10 sessions created, ${uniqueCount} unique fingerprints`);
|
||||
console.log(` Variety: ${uniqueCount >= 3 ? '✅ Good' : '⚠️ Low - may need more fingerprint options'}`);
|
||||
|
||||
for (const s of sessions) {
|
||||
browserCounts[s.browser] = (browserCounts[s.browser] || 0) + 1;
|
||||
deviceCounts[s.device] = (deviceCounts[s.device] || 0) + 1;
|
||||
if (s.hasDNT) dntCount++;
|
||||
}
|
||||
// Test 5: Geographic consistency check
|
||||
console.log('\n[Test 5] Geographic Consistency:');
|
||||
const geoTests = [
|
||||
{ state: 'AZ', tz: 'America/Phoenix' },
|
||||
{ state: 'CA', tz: 'America/Los_Angeles' },
|
||||
{ state: 'NY', tz: 'America/New_York' },
|
||||
{ state: 'IL', tz: 'America/Chicago' },
|
||||
];
|
||||
|
||||
console.log(` 10 sessions created:`);
|
||||
console.log(` Browsers: ${JSON.stringify(browserCounts)}`);
|
||||
console.log(` Devices: ${JSON.stringify(deviceCounts)}`);
|
||||
console.log(` DNT enabled: ${dntCount}/10 (expected ~30%)`);
|
||||
|
||||
// Test 6: Device distribution check (per workflow-12102025.md: 62/36/2)
|
||||
console.log('\n[Test 6] Device Distribution (larger sample):');
|
||||
const deviceSamples: string[] = [];
|
||||
|
||||
for (let i = 0; i < 100; i++) {
|
||||
const session = startSession();
|
||||
deviceSamples.push(session.fingerprint.deviceCategory);
|
||||
for (const { state, tz } of geoTests) {
|
||||
const session = startSession(state, tz);
|
||||
const consistent = session.fingerprint.acceptLanguage.includes('en-US');
|
||||
console.log(` ${state} (${tz}): Accept-Language=${session.fingerprint.acceptLanguage} ${consistent ? '✅' : '❌'}`);
|
||||
endSession();
|
||||
}
|
||||
|
||||
const mobileCount = deviceSamples.filter(d => d === 'mobile').length;
|
||||
const desktopCount = deviceSamples.filter(d => d === 'desktop').length;
|
||||
const tabletCount = deviceSamples.filter(d => d === 'tablet').length;
|
||||
|
||||
console.log(` 100 sessions (expected: 62% mobile, 36% desktop, 2% tablet):`);
|
||||
console.log(` Mobile: ${mobileCount}%`);
|
||||
console.log(` Desktop: ${desktopCount}%`);
|
||||
console.log(` Tablet: ${tabletCount}%`);
|
||||
console.log(` Distribution: ${Math.abs(mobileCount - 62) < 15 && Math.abs(desktopCount - 36) < 15 ? '✅ Reasonable' : '⚠️ Off target'}`);
|
||||
|
||||
console.log('\n' + '='.repeat(60));
|
||||
console.log('TEST COMPLETE');
|
||||
console.log('='.repeat(60));
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -26,8 +26,6 @@ import {
|
||||
PenetrationDataPoint,
|
||||
BrandMarketPosition,
|
||||
BrandRecVsMedFootprint,
|
||||
BrandPromotionalSummary,
|
||||
BrandPromotionalEvent,
|
||||
} from './types';
|
||||
|
||||
export class BrandPenetrationService {
|
||||
@@ -46,17 +44,16 @@ export class BrandPenetrationService {
|
||||
// Get current brand presence
|
||||
const currentResult = await this.pool.query(`
|
||||
SELECT
|
||||
sp.brand_name_raw AS brand_name,
|
||||
sp.brand_name,
|
||||
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries,
|
||||
COUNT(*) AS total_skus,
|
||||
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus_per_dispensary,
|
||||
ARRAY_AGG(DISTINCT s.code) FILTER (WHERE s.code IS NOT NULL) AS states_present
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
LEFT JOIN states s ON s.id = d.state_id
|
||||
WHERE sp.brand_name_raw = $1
|
||||
LEFT JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.brand_name = $1
|
||||
AND sp.is_in_stock = TRUE
|
||||
GROUP BY sp.brand_name_raw
|
||||
GROUP BY sp.brand_name
|
||||
`, [brandName]);
|
||||
|
||||
if (currentResult.rows.length === 0) {
|
||||
@@ -75,7 +72,7 @@ export class BrandPenetrationService {
|
||||
DATE(sps.captured_at) AS date,
|
||||
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
|
||||
FROM store_product_snapshots sps
|
||||
WHERE sps.brand_name_raw = $1
|
||||
WHERE sps.brand_name = $1
|
||||
AND sps.captured_at >= $2
|
||||
AND sps.captured_at <= $3
|
||||
AND sps.is_in_stock = TRUE
|
||||
@@ -126,9 +123,8 @@ export class BrandPenetrationService {
|
||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||
COUNT(*) AS sku_count
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
WHERE sp.brand_name_raw = $1
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.brand_name = $1
|
||||
AND sp.is_in_stock = TRUE
|
||||
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
||||
),
|
||||
@@ -137,8 +133,7 @@ export class BrandPenetrationService {
|
||||
s.code AS state_code,
|
||||
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.is_in_stock = TRUE
|
||||
GROUP BY s.code
|
||||
)
|
||||
@@ -174,7 +169,7 @@ export class BrandPenetrationService {
|
||||
let filters = '';
|
||||
|
||||
if (options.category) {
|
||||
filters += ` AND sp.category_raw = $${paramIdx}`;
|
||||
filters += ` AND sp.category = $${paramIdx}`;
|
||||
params.push(options.category);
|
||||
paramIdx++;
|
||||
}
|
||||
@@ -188,33 +183,31 @@ export class BrandPenetrationService {
|
||||
const result = await this.pool.query(`
|
||||
WITH brand_metrics AS (
|
||||
SELECT
|
||||
sp.brand_name_raw AS brand_name,
|
||||
sp.category_raw AS category,
|
||||
sp.brand_name,
|
||||
sp.category,
|
||||
s.code AS state_code,
|
||||
COUNT(*) AS sku_count,
|
||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||
AVG(sp.price_rec) AS avg_price
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
WHERE sp.brand_name_raw = $1
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.brand_name = $1
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND sp.category_raw IS NOT NULL
|
||||
AND sp.category IS NOT NULL
|
||||
${filters}
|
||||
GROUP BY sp.brand_name_raw, sp.category_raw, s.code
|
||||
GROUP BY sp.brand_name, sp.category, s.code
|
||||
),
|
||||
category_totals AS (
|
||||
SELECT
|
||||
sp.category_raw AS category,
|
||||
sp.category,
|
||||
s.code AS state_code,
|
||||
COUNT(*) AS total_skus,
|
||||
AVG(sp.price_rec) AS category_avg_price
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.is_in_stock = TRUE
|
||||
AND sp.category_raw IS NOT NULL
|
||||
GROUP BY sp.category_raw, s.code
|
||||
AND sp.category IS NOT NULL
|
||||
GROUP BY sp.category, s.code
|
||||
)
|
||||
SELECT
|
||||
bm.*,
|
||||
@@ -250,9 +243,8 @@ export class BrandPenetrationService {
|
||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
WHERE sp.brand_name_raw = $1
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.brand_name = $1
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND s.recreational_legal = TRUE
|
||||
),
|
||||
@@ -263,9 +255,8 @@ export class BrandPenetrationService {
|
||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
WHERE sp.brand_name_raw = $1
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.brand_name = $1
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND s.medical_legal = TRUE
|
||||
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
||||
@@ -320,24 +311,23 @@ export class BrandPenetrationService {
|
||||
}
|
||||
|
||||
if (category) {
|
||||
filters += ` AND sp.category_raw = $${paramIdx}`;
|
||||
filters += ` AND sp.category = $${paramIdx}`;
|
||||
params.push(category);
|
||||
paramIdx++;
|
||||
}
|
||||
|
||||
const result = await this.pool.query(`
|
||||
SELECT
|
||||
sp.brand_name_raw AS brand_name,
|
||||
sp.brand_name,
|
||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||
COUNT(*) AS sku_count,
|
||||
COUNT(DISTINCT s.code) AS state_count
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
LEFT JOIN states s ON s.id = d.state_id
|
||||
WHERE sp.brand_name_raw IS NOT NULL
|
||||
LEFT JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.brand_name IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
${filters}
|
||||
GROUP BY sp.brand_name_raw
|
||||
GROUP BY sp.brand_name
|
||||
ORDER BY dispensary_count DESC, sku_count DESC
|
||||
LIMIT $1
|
||||
`, params);
|
||||
@@ -368,23 +358,23 @@ export class BrandPenetrationService {
|
||||
const result = await this.pool.query(`
|
||||
WITH start_counts AS (
|
||||
SELECT
|
||||
brand_name_raw AS brand_name,
|
||||
brand_name,
|
||||
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
||||
FROM store_product_snapshots
|
||||
WHERE captured_at >= $1 AND captured_at < $1 + INTERVAL '1 day'
|
||||
AND brand_name_raw IS NOT NULL
|
||||
AND brand_name IS NOT NULL
|
||||
AND is_in_stock = TRUE
|
||||
GROUP BY brand_name_raw
|
||||
GROUP BY brand_name
|
||||
),
|
||||
end_counts AS (
|
||||
SELECT
|
||||
brand_name_raw AS brand_name,
|
||||
brand_name,
|
||||
COUNT(DISTINCT dispensary_id) AS dispensary_count
|
||||
FROM store_product_snapshots
|
||||
WHERE captured_at >= $2 - INTERVAL '1 day' AND captured_at <= $2
|
||||
AND brand_name_raw IS NOT NULL
|
||||
AND brand_name IS NOT NULL
|
||||
AND is_in_stock = TRUE
|
||||
GROUP BY brand_name_raw
|
||||
GROUP BY brand_name
|
||||
)
|
||||
SELECT
|
||||
COALESCE(sc.brand_name, ec.brand_name) AS brand_name,
|
||||
@@ -411,225 +401,6 @@ export class BrandPenetrationService {
|
||||
change_percent: row.change_percent ? parseFloat(row.change_percent) : 0,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get brand promotional history
|
||||
*
|
||||
* Tracks when products went on special, how long, what discount,
|
||||
* and estimated quantity sold during the promotion.
|
||||
*/
|
||||
async getBrandPromotionalHistory(
|
||||
brandName: string,
|
||||
options: { window?: TimeWindow; customRange?: DateRange; stateCode?: string; category?: string } = {}
|
||||
): Promise<BrandPromotionalSummary> {
|
||||
const { window = '90d', customRange, stateCode, category } = options;
|
||||
const { start, end } = getDateRangeFromWindow(window, customRange);
|
||||
|
||||
// Build filters
|
||||
const params: any[] = [brandName, start, end];
|
||||
let paramIdx = 4;
|
||||
let filters = '';
|
||||
|
||||
if (stateCode) {
|
||||
filters += ` AND s.code = $${paramIdx}`;
|
||||
params.push(stateCode);
|
||||
paramIdx++;
|
||||
}
|
||||
|
||||
if (category) {
|
||||
filters += ` AND sp.category_raw = $${paramIdx}`;
|
||||
params.push(category);
|
||||
paramIdx++;
|
||||
}
|
||||
|
||||
// Find promotional events by detecting when is_on_special transitions to TRUE
|
||||
// and tracking until it transitions back to FALSE
|
||||
const eventsResult = await this.pool.query(`
|
||||
WITH snapshot_with_lag AS (
|
||||
SELECT
|
||||
sps.id,
|
||||
sps.store_product_id,
|
||||
sps.dispensary_id,
|
||||
sps.brand_name_raw,
|
||||
sps.name_raw,
|
||||
sps.category_raw,
|
||||
sps.is_on_special,
|
||||
sps.price_rec,
|
||||
sps.price_rec_special,
|
||||
sps.stock_quantity,
|
||||
sps.captured_at,
|
||||
LAG(sps.is_on_special) OVER (
|
||||
PARTITION BY sps.store_product_id
|
||||
ORDER BY sps.captured_at
|
||||
) AS prev_is_on_special,
|
||||
LAG(sps.stock_quantity) OVER (
|
||||
PARTITION BY sps.store_product_id
|
||||
ORDER BY sps.captured_at
|
||||
) AS prev_stock_quantity
|
||||
FROM store_product_snapshots sps
|
||||
JOIN store_products sp ON sp.id = sps.store_product_id
|
||||
JOIN dispensaries dd ON dd.id = sp.dispensary_id
|
||||
LEFT JOIN states s ON s.id = dd.state_id
|
||||
WHERE sps.brand_name_raw = $1
|
||||
AND sps.captured_at >= $2
|
||||
AND sps.captured_at <= $3
|
||||
${filters}
|
||||
),
|
||||
special_starts AS (
|
||||
-- Find when specials START (transition from not-on-special to on-special)
|
||||
SELECT
|
||||
store_product_id,
|
||||
dispensary_id,
|
||||
name_raw,
|
||||
category_raw,
|
||||
captured_at AS special_start,
|
||||
price_rec AS regular_price,
|
||||
price_rec_special AS special_price,
|
||||
stock_quantity AS quantity_at_start
|
||||
FROM snapshot_with_lag
|
||||
WHERE is_on_special = TRUE
|
||||
AND (prev_is_on_special = FALSE OR prev_is_on_special IS NULL)
|
||||
AND price_rec_special IS NOT NULL
|
||||
AND price_rec IS NOT NULL
|
||||
),
|
||||
special_ends AS (
|
||||
-- Find when specials END (transition from on-special to not-on-special)
|
||||
SELECT
|
||||
store_product_id,
|
||||
captured_at AS special_end,
|
||||
prev_stock_quantity AS quantity_at_end
|
||||
FROM snapshot_with_lag
|
||||
WHERE is_on_special = FALSE
|
||||
AND prev_is_on_special = TRUE
|
||||
),
|
||||
matched_events AS (
|
||||
SELECT
|
||||
ss.store_product_id,
|
||||
ss.dispensary_id,
|
||||
ss.name_raw AS product_name,
|
||||
ss.category_raw AS category,
|
||||
ss.special_start,
|
||||
se.special_end,
|
||||
ss.regular_price,
|
||||
ss.special_price,
|
||||
ss.quantity_at_start,
|
||||
COALESCE(se.quantity_at_end, ss.quantity_at_start) AS quantity_at_end
|
||||
FROM special_starts ss
|
||||
LEFT JOIN special_ends se ON se.store_product_id = ss.store_product_id
|
||||
AND se.special_end > ss.special_start
|
||||
AND se.special_end = (
|
||||
SELECT MIN(se2.special_end)
|
||||
FROM special_ends se2
|
||||
WHERE se2.store_product_id = ss.store_product_id
|
||||
AND se2.special_end > ss.special_start
|
||||
)
|
||||
)
|
||||
SELECT
|
||||
me.store_product_id,
|
||||
me.dispensary_id,
|
||||
d.name AS dispensary_name,
|
||||
s.code AS state_code,
|
||||
me.product_name,
|
||||
me.category,
|
||||
me.special_start,
|
||||
me.special_end,
|
||||
EXTRACT(DAY FROM COALESCE(me.special_end, NOW()) - me.special_start)::INT AS duration_days,
|
||||
me.regular_price,
|
||||
me.special_price,
|
||||
ROUND(((me.regular_price - me.special_price) / NULLIF(me.regular_price, 0)) * 100, 1) AS discount_percent,
|
||||
me.quantity_at_start,
|
||||
me.quantity_at_end,
|
||||
GREATEST(0, COALESCE(me.quantity_at_start, 0) - COALESCE(me.quantity_at_end, 0)) AS quantity_sold_estimate
|
||||
FROM matched_events me
|
||||
JOIN dispensaries d ON d.id = me.dispensary_id
|
||||
LEFT JOIN states s ON s.id = d.state_id
|
||||
ORDER BY me.special_start DESC
|
||||
`, params);
|
||||
|
||||
const events: BrandPromotionalEvent[] = eventsResult.rows.map((row: any) => ({
|
||||
product_name: row.product_name,
|
||||
store_product_id: parseInt(row.store_product_id),
|
||||
dispensary_id: parseInt(row.dispensary_id),
|
||||
dispensary_name: row.dispensary_name,
|
||||
state_code: row.state_code || 'Unknown',
|
||||
category: row.category,
|
||||
special_start: row.special_start.toISOString().split('T')[0],
|
||||
special_end: row.special_end ? row.special_end.toISOString().split('T')[0] : null,
|
||||
duration_days: row.duration_days ? parseInt(row.duration_days) : null,
|
||||
regular_price: parseFloat(row.regular_price) || 0,
|
||||
special_price: parseFloat(row.special_price) || 0,
|
||||
discount_percent: parseFloat(row.discount_percent) || 0,
|
||||
quantity_at_start: row.quantity_at_start ? parseInt(row.quantity_at_start) : null,
|
||||
quantity_at_end: row.quantity_at_end ? parseInt(row.quantity_at_end) : null,
|
||||
quantity_sold_estimate: row.quantity_sold_estimate ? parseInt(row.quantity_sold_estimate) : null,
|
||||
}));
|
||||
|
||||
// Calculate summary stats
|
||||
const totalEvents = events.length;
|
||||
const uniqueProducts = new Set(events.map(e => e.store_product_id)).size;
|
||||
const uniqueDispensaries = new Set(events.map(e => e.dispensary_id)).size;
|
||||
const uniqueStates = [...new Set(events.map(e => e.state_code))];
|
||||
|
||||
const avgDiscount = totalEvents > 0
|
||||
? events.reduce((sum, e) => sum + e.discount_percent, 0) / totalEvents
|
||||
: 0;
|
||||
|
||||
const durations = events.filter(e => e.duration_days !== null).map(e => e.duration_days!);
|
||||
const avgDuration = durations.length > 0
|
||||
? durations.reduce((sum, d) => sum + d, 0) / durations.length
|
||||
: null;
|
||||
|
||||
const totalQuantitySold = events
|
||||
.filter(e => e.quantity_sold_estimate !== null)
|
||||
.reduce((sum, e) => sum + (e.quantity_sold_estimate || 0), 0);
|
||||
|
||||
// Calculate frequency
|
||||
const windowDays = Math.ceil((end.getTime() - start.getTime()) / (1000 * 60 * 60 * 24));
|
||||
const weeklyAvg = windowDays > 0 ? (totalEvents / windowDays) * 7 : 0;
|
||||
const monthlyAvg = windowDays > 0 ? (totalEvents / windowDays) * 30 : 0;
|
||||
|
||||
// Group by category
|
||||
const categoryMap = new Map<string, { count: number; discounts: number[]; quantity: number }>();
|
||||
for (const event of events) {
|
||||
const cat = event.category || 'Uncategorized';
|
||||
if (!categoryMap.has(cat)) {
|
||||
categoryMap.set(cat, { count: 0, discounts: [], quantity: 0 });
|
||||
}
|
||||
const entry = categoryMap.get(cat)!;
|
||||
entry.count++;
|
||||
entry.discounts.push(event.discount_percent);
|
||||
if (event.quantity_sold_estimate !== null) {
|
||||
entry.quantity += event.quantity_sold_estimate;
|
||||
}
|
||||
}
|
||||
|
||||
const byCategory = Array.from(categoryMap.entries()).map(([category, data]) => ({
|
||||
category,
|
||||
event_count: data.count,
|
||||
avg_discount_percent: data.discounts.length > 0
|
||||
? Math.round((data.discounts.reduce((a, b) => a + b, 0) / data.discounts.length) * 10) / 10
|
||||
: 0,
|
||||
quantity_sold_estimate: data.quantity > 0 ? data.quantity : null,
|
||||
})).sort((a, b) => b.event_count - a.event_count);
|
||||
|
||||
return {
|
||||
brand_name: brandName,
|
||||
window,
|
||||
total_promotional_events: totalEvents,
|
||||
total_products_on_special: uniqueProducts,
|
||||
total_dispensaries_with_specials: uniqueDispensaries,
|
||||
states_with_specials: uniqueStates,
|
||||
avg_discount_percent: Math.round(avgDiscount * 10) / 10,
|
||||
avg_duration_days: avgDuration !== null ? Math.round(avgDuration * 10) / 10 : null,
|
||||
total_quantity_sold_estimate: totalQuantitySold > 0 ? totalQuantitySold : null,
|
||||
promotional_frequency: {
|
||||
weekly_avg: Math.round(weeklyAvg * 10) / 10,
|
||||
monthly_avg: Math.round(monthlyAvg * 10) / 10,
|
||||
},
|
||||
by_category: byCategory,
|
||||
events,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export default BrandPenetrationService;
|
||||
|
||||
@@ -43,14 +43,14 @@ export class CategoryAnalyticsService {
|
||||
// Get current category metrics
|
||||
const currentResult = await this.pool.query(`
|
||||
SELECT
|
||||
sp.category_raw,
|
||||
sp.category,
|
||||
COUNT(*) AS sku_count,
|
||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||
AVG(sp.price_rec) AS avg_price
|
||||
FROM store_products sp
|
||||
WHERE sp.category_raw = $1
|
||||
WHERE sp.category = $1
|
||||
AND sp.is_in_stock = TRUE
|
||||
GROUP BY sp.category_raw
|
||||
GROUP BY sp.category
|
||||
`, [category]);
|
||||
|
||||
if (currentResult.rows.length === 0) {
|
||||
@@ -70,7 +70,7 @@ export class CategoryAnalyticsService {
|
||||
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count,
|
||||
AVG(sps.price_rec) AS avg_price
|
||||
FROM store_product_snapshots sps
|
||||
WHERE sps.category_raw = $1
|
||||
WHERE sps.category = $1
|
||||
AND sps.captured_at >= $2
|
||||
AND sps.captured_at <= $3
|
||||
AND sps.is_in_stock = TRUE
|
||||
@@ -111,9 +111,8 @@ export class CategoryAnalyticsService {
|
||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||
AVG(sp.price_rec) AS avg_price
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
WHERE sp.category_raw = $1
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.category = $1
|
||||
AND sp.is_in_stock = TRUE
|
||||
GROUP BY s.code, s.name, s.recreational_legal
|
||||
ORDER BY sku_count DESC
|
||||
@@ -155,25 +154,24 @@ export class CategoryAnalyticsService {
|
||||
|
||||
const result = await this.pool.query(`
|
||||
SELECT
|
||||
sp.category_raw,
|
||||
sp.category,
|
||||
COUNT(*) AS sku_count,
|
||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||
COUNT(DISTINCT sp.brand_name_raw) AS brand_count,
|
||||
COUNT(DISTINCT sp.brand_name) AS brand_count,
|
||||
AVG(sp.price_rec) AS avg_price,
|
||||
COUNT(DISTINCT s.code) AS state_count
|
||||
FROM store_products sp
|
||||
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
WHERE sp.category_raw IS NOT NULL
|
||||
LEFT JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.category IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
${stateFilter}
|
||||
GROUP BY sp.category_raw
|
||||
GROUP BY sp.category
|
||||
ORDER BY sku_count DESC
|
||||
LIMIT $1
|
||||
`, params);
|
||||
|
||||
return result.rows.map((row: any) => ({
|
||||
category: row.category_raw,
|
||||
category: row.category,
|
||||
sku_count: parseInt(row.sku_count),
|
||||
dispensary_count: parseInt(row.dispensary_count),
|
||||
brand_count: parseInt(row.brand_count),
|
||||
@@ -190,14 +188,14 @@ export class CategoryAnalyticsService {
|
||||
let categoryFilter = '';
|
||||
|
||||
if (category) {
|
||||
categoryFilter = 'WHERE sp.category_raw = $1';
|
||||
categoryFilter = 'WHERE sp.category = $1';
|
||||
params.push(category);
|
||||
}
|
||||
|
||||
const result = await this.pool.query(`
|
||||
WITH category_stats AS (
|
||||
SELECT
|
||||
sp.category_raw,
|
||||
sp.category,
|
||||
CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END AS legal_type,
|
||||
COUNT(DISTINCT s.code) AS state_count,
|
||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||
@@ -205,14 +203,13 @@ export class CategoryAnalyticsService {
|
||||
AVG(sp.price_rec) AS avg_price,
|
||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
${categoryFilter}
|
||||
${category ? 'AND' : 'WHERE'} sp.category_raw IS NOT NULL
|
||||
${category ? 'AND' : 'WHERE'} sp.category IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND sp.price_rec IS NOT NULL
|
||||
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
||||
GROUP BY sp.category_raw, CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END
|
||||
GROUP BY sp.category, CASE WHEN s.recreational_legal = TRUE THEN 'recreational' ELSE 'medical_only' END
|
||||
),
|
||||
rec_stats AS (
|
||||
SELECT * FROM category_stats WHERE legal_type = 'recreational'
|
||||
@@ -221,7 +218,7 @@ export class CategoryAnalyticsService {
|
||||
SELECT * FROM category_stats WHERE legal_type = 'medical_only'
|
||||
)
|
||||
SELECT
|
||||
COALESCE(r.category_raw, m.category_raw) AS category,
|
||||
COALESCE(r.category, m.category) AS category,
|
||||
r.state_count AS rec_state_count,
|
||||
r.dispensary_count AS rec_dispensary_count,
|
||||
r.sku_count AS rec_sku_count,
|
||||
@@ -238,7 +235,7 @@ export class CategoryAnalyticsService {
|
||||
ELSE NULL
|
||||
END AS price_diff_percent
|
||||
FROM rec_stats r
|
||||
FULL OUTER JOIN med_stats m ON r.category_raw = m.category_raw
|
||||
FULL OUTER JOIN med_stats m ON r.category = m.category
|
||||
ORDER BY COALESCE(r.sku_count, 0) + COALESCE(m.sku_count, 0) DESC
|
||||
`, params);
|
||||
|
||||
@@ -285,7 +282,7 @@ export class CategoryAnalyticsService {
|
||||
COUNT(*) AS sku_count,
|
||||
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
|
||||
FROM store_product_snapshots sps
|
||||
WHERE sps.category_raw = $1
|
||||
WHERE sps.category = $1
|
||||
AND sps.captured_at >= $2
|
||||
AND sps.captured_at <= $3
|
||||
AND sps.is_in_stock = TRUE
|
||||
@@ -338,33 +335,31 @@ export class CategoryAnalyticsService {
|
||||
WITH category_total AS (
|
||||
SELECT COUNT(*) AS total
|
||||
FROM store_products sp
|
||||
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
WHERE sp.category_raw = $1
|
||||
LEFT JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.category = $1
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND sp.brand_name_raw IS NOT NULL
|
||||
AND sp.brand_name IS NOT NULL
|
||||
${stateFilter}
|
||||
)
|
||||
SELECT
|
||||
sp.brand_name_raw,
|
||||
sp.brand_name,
|
||||
COUNT(*) AS sku_count,
|
||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
|
||||
AVG(sp.price_rec) AS avg_price,
|
||||
ROUND(COUNT(*)::NUMERIC * 100 / NULLIF((SELECT total FROM category_total), 0), 2) AS category_share_percent
|
||||
FROM store_products sp
|
||||
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
WHERE sp.category_raw = $1
|
||||
LEFT JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.category = $1
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND sp.brand_name_raw IS NOT NULL
|
||||
AND sp.brand_name IS NOT NULL
|
||||
${stateFilter}
|
||||
GROUP BY sp.brand_name_raw
|
||||
GROUP BY sp.brand_name
|
||||
ORDER BY sku_count DESC
|
||||
LIMIT $2
|
||||
`, params);
|
||||
|
||||
return result.rows.map((row: any) => ({
|
||||
brand_name: row.brand_name_raw,
|
||||
brand_name: row.brand_name,
|
||||
sku_count: parseInt(row.sku_count),
|
||||
dispensary_count: parseInt(row.dispensary_count),
|
||||
avg_price: row.avg_price ? parseFloat(row.avg_price) : null,
|
||||
@@ -426,7 +421,7 @@ export class CategoryAnalyticsService {
|
||||
`, [start, end, limit]);
|
||||
|
||||
return result.rows.map((row: any) => ({
|
||||
category: row.category_raw,
|
||||
category: row.category,
|
||||
start_sku_count: parseInt(row.start_sku_count),
|
||||
end_sku_count: parseInt(row.end_sku_count),
|
||||
growth: parseInt(row.growth),
|
||||
|
||||
@@ -43,9 +43,9 @@ export class PriceAnalyticsService {
|
||||
const productResult = await this.pool.query(`
|
||||
SELECT
|
||||
sp.id,
|
||||
sp.name_raw,
|
||||
sp.brand_name_raw,
|
||||
sp.category_raw,
|
||||
sp.name,
|
||||
sp.brand_name,
|
||||
sp.category,
|
||||
sp.dispensary_id,
|
||||
sp.price_rec,
|
||||
sp.price_med,
|
||||
@@ -53,7 +53,7 @@ export class PriceAnalyticsService {
|
||||
s.code AS state_code
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
LEFT JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.id = $1
|
||||
`, [storeProductId]);
|
||||
|
||||
@@ -133,7 +133,7 @@ export class PriceAnalyticsService {
|
||||
|
||||
const result = await this.pool.query(`
|
||||
SELECT
|
||||
sp.category_raw,
|
||||
sp.category,
|
||||
s.code AS state_code,
|
||||
s.name AS state_name,
|
||||
CASE
|
||||
@@ -148,18 +148,18 @@ export class PriceAnalyticsService {
|
||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
WHERE sp.category_raw = $1
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.category = $1
|
||||
AND sp.price_rec IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
||||
${stateFilter}
|
||||
GROUP BY sp.category_raw, s.code, s.name, s.recreational_legal
|
||||
GROUP BY sp.category, s.code, s.name, s.recreational_legal
|
||||
ORDER BY state_code
|
||||
`, params);
|
||||
|
||||
return result.rows.map((row: any) => ({
|
||||
category: row.category_raw,
|
||||
category: row.category,
|
||||
state_code: row.state_code,
|
||||
state_name: row.state_name,
|
||||
legal_type: row.legal_type,
|
||||
@@ -189,7 +189,7 @@ export class PriceAnalyticsService {
|
||||
|
||||
const result = await this.pool.query(`
|
||||
SELECT
|
||||
sp.brand_name_raw AS category,
|
||||
sp.brand_name AS category,
|
||||
s.code AS state_code,
|
||||
s.name AS state_name,
|
||||
CASE
|
||||
@@ -204,18 +204,18 @@ export class PriceAnalyticsService {
|
||||
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
WHERE sp.brand_name_raw = $1
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE sp.brand_name = $1
|
||||
AND sp.price_rec IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
||||
${stateFilter}
|
||||
GROUP BY sp.brand_name_raw, s.code, s.name, s.recreational_legal
|
||||
GROUP BY sp.brand_name, s.code, s.name, s.recreational_legal
|
||||
ORDER BY state_code
|
||||
`, params);
|
||||
|
||||
return result.rows.map((row: any) => ({
|
||||
category: row.category_raw,
|
||||
category: row.category,
|
||||
state_code: row.state_code,
|
||||
state_name: row.state_name,
|
||||
legal_type: row.legal_type,
|
||||
@@ -254,7 +254,7 @@ export class PriceAnalyticsService {
|
||||
}
|
||||
|
||||
if (category) {
|
||||
filters += ` AND sp.category_raw = $${paramIdx}`;
|
||||
filters += ` AND sp.category = $${paramIdx}`;
|
||||
params.push(category);
|
||||
paramIdx++;
|
||||
}
|
||||
@@ -288,16 +288,15 @@ export class PriceAnalyticsService {
|
||||
)
|
||||
SELECT
|
||||
v.store_product_id,
|
||||
sp.name_raw AS product_name,
|
||||
sp.brand_name_raw,
|
||||
sp.name AS product_name,
|
||||
sp.brand_name,
|
||||
v.change_count,
|
||||
v.avg_change_pct,
|
||||
v.max_change_pct,
|
||||
v.last_change_at
|
||||
FROM volatility v
|
||||
JOIN store_products sp ON sp.id = v.store_product_id
|
||||
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
LEFT JOIN states s ON s.id = sp.state_id
|
||||
WHERE 1=1 ${filters}
|
||||
ORDER BY v.change_count DESC, v.avg_change_pct DESC
|
||||
LIMIT $3
|
||||
@@ -306,7 +305,7 @@ export class PriceAnalyticsService {
|
||||
return result.rows.map((row: any) => ({
|
||||
store_product_id: row.store_product_id,
|
||||
product_name: row.product_name,
|
||||
brand_name: row.brand_name_raw,
|
||||
brand_name: row.brand_name,
|
||||
change_count: parseInt(row.change_count),
|
||||
avg_change_percent: row.avg_change_pct ? parseFloat(row.avg_change_pct) : 0,
|
||||
max_change_percent: row.max_change_pct ? parseFloat(row.max_change_pct) : 0,
|
||||
@@ -328,13 +327,13 @@ export class PriceAnalyticsService {
|
||||
let categoryFilter = '';
|
||||
|
||||
if (category) {
|
||||
categoryFilter = 'WHERE sp.category_raw = $1';
|
||||
categoryFilter = 'WHERE sp.category = $1';
|
||||
params.push(category);
|
||||
}
|
||||
|
||||
const result = await this.pool.query(`
|
||||
SELECT
|
||||
sp.category_raw,
|
||||
sp.category,
|
||||
AVG(sp.price_rec) FILTER (WHERE s.recreational_legal = TRUE) AS rec_avg,
|
||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
|
||||
FILTER (WHERE s.recreational_legal = TRUE) AS rec_median,
|
||||
@@ -344,18 +343,17 @@ export class PriceAnalyticsService {
|
||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)
|
||||
FILTER (WHERE s.medical_legal = TRUE AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)) AS med_median
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
${categoryFilter}
|
||||
${category ? 'AND' : 'WHERE'} sp.price_rec IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND sp.category_raw IS NOT NULL
|
||||
GROUP BY sp.category_raw
|
||||
ORDER BY sp.category_raw
|
||||
AND sp.category IS NOT NULL
|
||||
GROUP BY sp.category
|
||||
ORDER BY sp.category
|
||||
`, params);
|
||||
|
||||
return result.rows.map((row: any) => ({
|
||||
category: row.category_raw,
|
||||
category: row.category,
|
||||
rec_avg: row.rec_avg ? parseFloat(row.rec_avg) : null,
|
||||
rec_median: row.rec_median ? parseFloat(row.rec_median) : null,
|
||||
med_avg: row.med_avg ? parseFloat(row.med_avg) : null,
|
||||
|
||||
@@ -108,14 +108,14 @@ export class StateAnalyticsService {
|
||||
SELECT
|
||||
COUNT(DISTINCT d.id) AS dispensary_count,
|
||||
COUNT(DISTINCT sp.id) AS product_count,
|
||||
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) AS brand_count,
|
||||
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS category_count,
|
||||
COUNT(DISTINCT sp.brand_name) FILTER (WHERE sp.brand_name IS NOT NULL) AS brand_count,
|
||||
COUNT(DISTINCT sp.category) FILTER (WHERE sp.category IS NOT NULL) AS category_count,
|
||||
COUNT(sps.id) AS snapshot_count,
|
||||
MAX(sps.captured_at) AS last_crawl_at
|
||||
FROM states s
|
||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
||||
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
||||
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
||||
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
||||
WHERE s.code = $1
|
||||
`, [stateCode]);
|
||||
|
||||
@@ -129,8 +129,7 @@ export class StateAnalyticsService {
|
||||
MIN(price_rec) AS min_price,
|
||||
MAX(price_rec) AS max_price
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE s.code = $1
|
||||
AND sp.price_rec IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
@@ -141,15 +140,14 @@ export class StateAnalyticsService {
|
||||
// Get top categories
|
||||
const topCategoriesResult = await this.pool.query(`
|
||||
SELECT
|
||||
sp.category_raw,
|
||||
sp.category,
|
||||
COUNT(*) AS count
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE s.code = $1
|
||||
AND sp.category_raw IS NOT NULL
|
||||
AND sp.category IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
GROUP BY sp.category_raw
|
||||
GROUP BY sp.category
|
||||
ORDER BY count DESC
|
||||
LIMIT 10
|
||||
`, [stateCode]);
|
||||
@@ -157,15 +155,14 @@ export class StateAnalyticsService {
|
||||
// Get top brands
|
||||
const topBrandsResult = await this.pool.query(`
|
||||
SELECT
|
||||
sp.brand_name_raw AS brand,
|
||||
sp.brand_name AS brand,
|
||||
COUNT(*) AS count
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE s.code = $1
|
||||
AND sp.brand_name_raw IS NOT NULL
|
||||
AND sp.brand_name IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
GROUP BY sp.brand_name_raw
|
||||
GROUP BY sp.brand_name
|
||||
ORDER BY count DESC
|
||||
LIMIT 10
|
||||
`, [stateCode]);
|
||||
@@ -194,7 +191,7 @@ export class StateAnalyticsService {
|
||||
max_price: pricing.max_price ? parseFloat(pricing.max_price) : null,
|
||||
},
|
||||
top_categories: topCategoriesResult.rows.map((row: any) => ({
|
||||
category: row.category_raw,
|
||||
category: row.category,
|
||||
count: parseInt(row.count),
|
||||
})),
|
||||
top_brands: topBrandsResult.rows.map((row: any) => ({
|
||||
@@ -218,8 +215,8 @@ export class StateAnalyticsService {
|
||||
COUNT(sps.id) AS snapshot_count
|
||||
FROM states s
|
||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
||||
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
||||
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
||||
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
||||
WHERE s.recreational_legal = TRUE
|
||||
GROUP BY s.code, s.name
|
||||
ORDER BY dispensary_count DESC
|
||||
@@ -235,8 +232,8 @@ export class StateAnalyticsService {
|
||||
COUNT(sps.id) AS snapshot_count
|
||||
FROM states s
|
||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
||||
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
||||
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
||||
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
||||
WHERE s.medical_legal = TRUE
|
||||
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
||||
GROUP BY s.code, s.name
|
||||
@@ -298,48 +295,46 @@ export class StateAnalyticsService {
|
||||
let groupBy = 'NULL';
|
||||
|
||||
if (category) {
|
||||
categoryFilter = 'AND sp.category_raw = $1';
|
||||
categoryFilter = 'AND sp.category = $1';
|
||||
params.push(category);
|
||||
groupBy = 'sp.category_raw';
|
||||
groupBy = 'sp.category';
|
||||
} else {
|
||||
groupBy = 'sp.category_raw';
|
||||
groupBy = 'sp.category';
|
||||
}
|
||||
|
||||
const result = await this.pool.query(`
|
||||
WITH rec_prices AS (
|
||||
SELECT
|
||||
${category ? 'sp.category_raw' : 'sp.category_raw'},
|
||||
${category ? 'sp.category' : 'sp.category'},
|
||||
COUNT(DISTINCT s.code) AS state_count,
|
||||
COUNT(*) AS product_count,
|
||||
AVG(sp.price_rec) AS avg_price,
|
||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE s.recreational_legal = TRUE
|
||||
AND sp.price_rec IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND sp.category_raw IS NOT NULL
|
||||
AND sp.category IS NOT NULL
|
||||
${categoryFilter}
|
||||
GROUP BY sp.category_raw
|
||||
GROUP BY sp.category
|
||||
),
|
||||
med_prices AS (
|
||||
SELECT
|
||||
${category ? 'sp.category_raw' : 'sp.category_raw'},
|
||||
${category ? 'sp.category' : 'sp.category'},
|
||||
COUNT(DISTINCT s.code) AS state_count,
|
||||
COUNT(*) AS product_count,
|
||||
AVG(sp.price_rec) AS avg_price,
|
||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
JOIN states s ON s.id = d.state_id
|
||||
JOIN states s ON s.id = sp.state_id
|
||||
WHERE s.medical_legal = TRUE
|
||||
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
|
||||
AND sp.price_rec IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND sp.category_raw IS NOT NULL
|
||||
AND sp.category IS NOT NULL
|
||||
${categoryFilter}
|
||||
GROUP BY sp.category_raw
|
||||
GROUP BY sp.category
|
||||
)
|
||||
SELECT
|
||||
COALESCE(r.category, m.category) AS category,
|
||||
@@ -362,7 +357,7 @@ export class StateAnalyticsService {
|
||||
`, params);
|
||||
|
||||
return result.rows.map((row: any) => ({
|
||||
category: row.category_raw,
|
||||
category: row.category,
|
||||
recreational: {
|
||||
state_count: parseInt(row.rec_state_count) || 0,
|
||||
product_count: parseInt(row.rec_product_count) || 0,
|
||||
@@ -400,12 +395,12 @@ export class StateAnalyticsService {
|
||||
COALESCE(s.medical_legal, FALSE) AS medical_legal,
|
||||
COUNT(DISTINCT d.id) AS dispensary_count,
|
||||
COUNT(DISTINCT sp.id) AS product_count,
|
||||
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) AS brand_count,
|
||||
COUNT(DISTINCT sp.brand_name) FILTER (WHERE sp.brand_name IS NOT NULL) AS brand_count,
|
||||
MAX(sps.captured_at) AS last_crawl_at
|
||||
FROM states s
|
||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
||||
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
||||
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
||||
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
||||
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
||||
ORDER BY dispensary_count DESC, s.name
|
||||
`);
|
||||
@@ -456,8 +451,8 @@ export class StateAnalyticsService {
|
||||
END AS gap_reason
|
||||
FROM states s
|
||||
LEFT JOIN dispensaries d ON d.state_id = s.id
|
||||
LEFT JOIN store_products sp ON sp.dispensary_id = d.id AND sp.is_in_stock = TRUE
|
||||
LEFT JOIN store_product_snapshots sps ON sps.dispensary_id = d.id
|
||||
LEFT JOIN store_products sp ON sp.state_id = s.id AND sp.is_in_stock = TRUE
|
||||
LEFT JOIN store_product_snapshots sps ON sps.state_id = s.id
|
||||
WHERE s.recreational_legal = TRUE OR s.medical_legal = TRUE
|
||||
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
|
||||
HAVING COUNT(DISTINCT d.id) = 0
|
||||
@@ -504,8 +499,7 @@ export class StateAnalyticsService {
|
||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec) AS median_price,
|
||||
COUNT(*) AS product_count
|
||||
FROM states s
|
||||
JOIN dispensaries d ON d.state_id = s.id
|
||||
JOIN store_products sp ON sp.dispensary_id = d.id
|
||||
JOIN store_products sp ON sp.state_id = s.id
|
||||
WHERE sp.price_rec IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND (s.recreational_legal = TRUE OR s.medical_legal = TRUE)
|
||||
|
||||
@@ -89,22 +89,22 @@ export class StoreAnalyticsService {
|
||||
// Get brands added/dropped
|
||||
const brandsResult = await this.pool.query(`
|
||||
WITH start_brands AS (
|
||||
SELECT DISTINCT brand_name_raw
|
||||
SELECT DISTINCT brand_name
|
||||
FROM store_product_snapshots
|
||||
WHERE dispensary_id = $1
|
||||
AND captured_at >= $2::timestamp AND captured_at < $2::timestamp + INTERVAL '1 day'
|
||||
AND brand_name_raw IS NOT NULL
|
||||
AND captured_at >= $2 AND captured_at < $2 + INTERVAL '1 day'
|
||||
AND brand_name IS NOT NULL
|
||||
),
|
||||
end_brands AS (
|
||||
SELECT DISTINCT brand_name_raw
|
||||
SELECT DISTINCT brand_name
|
||||
FROM store_product_snapshots
|
||||
WHERE dispensary_id = $1
|
||||
AND captured_at >= $3::timestamp - INTERVAL '1 day' AND captured_at <= $3::timestamp
|
||||
AND brand_name_raw IS NOT NULL
|
||||
AND captured_at >= $3 - INTERVAL '1 day' AND captured_at <= $3
|
||||
AND brand_name IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
ARRAY(SELECT brand_name_raw FROM end_brands EXCEPT SELECT brand_name_raw FROM start_brands) AS added,
|
||||
ARRAY(SELECT brand_name_raw FROM start_brands EXCEPT SELECT brand_name_raw FROM end_brands) AS dropped
|
||||
ARRAY(SELECT brand_name FROM end_brands EXCEPT SELECT brand_name FROM start_brands) AS added,
|
||||
ARRAY(SELECT brand_name FROM start_brands EXCEPT SELECT brand_name FROM end_brands) AS dropped
|
||||
`, [dispensaryId, start, end]);
|
||||
|
||||
const brands = brandsResult.rows[0] || { added: [], dropped: [] };
|
||||
@@ -184,9 +184,9 @@ export class StoreAnalyticsService {
|
||||
-- Products added
|
||||
SELECT
|
||||
sp.id AS store_product_id,
|
||||
sp.name_raw AS product_name,
|
||||
sp.brand_name_raw,
|
||||
sp.category_raw,
|
||||
sp.name AS product_name,
|
||||
sp.brand_name,
|
||||
sp.category,
|
||||
'added' AS event_type,
|
||||
sp.first_seen_at AS event_date,
|
||||
NULL::TEXT AS old_value,
|
||||
@@ -201,9 +201,9 @@ export class StoreAnalyticsService {
|
||||
-- Stock in/out from snapshots
|
||||
SELECT
|
||||
sps.store_product_id,
|
||||
sp.name_raw AS product_name,
|
||||
sp.brand_name_raw,
|
||||
sp.category_raw,
|
||||
sp.name AS product_name,
|
||||
sp.brand_name,
|
||||
sp.category,
|
||||
CASE
|
||||
WHEN sps.is_in_stock = TRUE AND LAG(sps.is_in_stock) OVER w = FALSE THEN 'stock_in'
|
||||
WHEN sps.is_in_stock = FALSE AND LAG(sps.is_in_stock) OVER w = TRUE THEN 'stock_out'
|
||||
@@ -224,9 +224,9 @@ export class StoreAnalyticsService {
|
||||
-- Price changes from snapshots
|
||||
SELECT
|
||||
sps.store_product_id,
|
||||
sp.name_raw AS product_name,
|
||||
sp.brand_name_raw,
|
||||
sp.category_raw,
|
||||
sp.name AS product_name,
|
||||
sp.brand_name,
|
||||
sp.category,
|
||||
'price_change' AS event_type,
|
||||
sps.captured_at AS event_date,
|
||||
LAG(sps.price_rec::TEXT) OVER w AS old_value,
|
||||
@@ -250,8 +250,8 @@ export class StoreAnalyticsService {
|
||||
return result.rows.map((row: any) => ({
|
||||
store_product_id: row.store_product_id,
|
||||
product_name: row.product_name,
|
||||
brand_name: row.brand_name_raw,
|
||||
category: row.category_raw,
|
||||
brand_name: row.brand_name,
|
||||
category: row.category,
|
||||
event_type: row.event_type,
|
||||
event_date: row.event_date ? row.event_date.toISOString() : null,
|
||||
old_value: row.old_value,
|
||||
@@ -259,122 +259,6 @@ export class StoreAnalyticsService {
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get quantity changes for a store (increases/decreases)
|
||||
* Useful for estimating sales (decreases) or restocks (increases)
|
||||
*
|
||||
* @param direction - 'decrease' for likely sales, 'increase' for restocks, 'all' for both
|
||||
*/
|
||||
async getQuantityChanges(
|
||||
dispensaryId: number,
|
||||
options: {
|
||||
window?: TimeWindow;
|
||||
customRange?: DateRange;
|
||||
direction?: 'increase' | 'decrease' | 'all';
|
||||
limit?: number;
|
||||
} = {}
|
||||
): Promise<{
|
||||
dispensary_id: number;
|
||||
window: TimeWindow;
|
||||
direction: string;
|
||||
total_changes: number;
|
||||
total_units_decreased: number;
|
||||
total_units_increased: number;
|
||||
changes: Array<{
|
||||
store_product_id: number;
|
||||
product_name: string;
|
||||
brand_name: string | null;
|
||||
category: string | null;
|
||||
old_quantity: number;
|
||||
new_quantity: number;
|
||||
quantity_delta: number;
|
||||
direction: 'increase' | 'decrease';
|
||||
captured_at: string;
|
||||
}>;
|
||||
}> {
|
||||
const { window = '7d', customRange, direction = 'all', limit = 100 } = options;
|
||||
const { start, end } = getDateRangeFromWindow(window, customRange);
|
||||
|
||||
// Build direction filter
|
||||
let directionFilter = '';
|
||||
if (direction === 'decrease') {
|
||||
directionFilter = 'AND qty_delta < 0';
|
||||
} else if (direction === 'increase') {
|
||||
directionFilter = 'AND qty_delta > 0';
|
||||
}
|
||||
|
||||
const result = await this.pool.query(`
|
||||
WITH qty_changes AS (
|
||||
SELECT
|
||||
sps.store_product_id,
|
||||
sp.name_raw AS product_name,
|
||||
sp.brand_name_raw AS brand_name,
|
||||
sp.category_raw AS category,
|
||||
LAG(sps.stock_quantity) OVER w AS old_quantity,
|
||||
sps.stock_quantity AS new_quantity,
|
||||
sps.stock_quantity - LAG(sps.stock_quantity) OVER w AS qty_delta,
|
||||
sps.captured_at
|
||||
FROM store_product_snapshots sps
|
||||
JOIN store_products sp ON sp.id = sps.store_product_id
|
||||
WHERE sps.dispensary_id = $1
|
||||
AND sps.captured_at >= $2
|
||||
AND sps.captured_at <= $3
|
||||
AND sps.stock_quantity IS NOT NULL
|
||||
WINDOW w AS (PARTITION BY sps.store_product_id ORDER BY sps.captured_at)
|
||||
)
|
||||
SELECT *
|
||||
FROM qty_changes
|
||||
WHERE old_quantity IS NOT NULL
|
||||
AND qty_delta != 0
|
||||
${directionFilter}
|
||||
ORDER BY captured_at DESC
|
||||
LIMIT $4
|
||||
`, [dispensaryId, start, end, limit]);
|
||||
|
||||
// Calculate totals
|
||||
const totalsResult = await this.pool.query(`
|
||||
WITH qty_changes AS (
|
||||
SELECT
|
||||
sps.stock_quantity - LAG(sps.stock_quantity) OVER w AS qty_delta
|
||||
FROM store_product_snapshots sps
|
||||
WHERE sps.dispensary_id = $1
|
||||
AND sps.captured_at >= $2
|
||||
AND sps.captured_at <= $3
|
||||
AND sps.stock_quantity IS NOT NULL
|
||||
AND sps.store_product_id IS NOT NULL
|
||||
WINDOW w AS (PARTITION BY sps.store_product_id ORDER BY sps.captured_at)
|
||||
)
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE qty_delta != 0) AS total_changes,
|
||||
COALESCE(SUM(ABS(qty_delta)) FILTER (WHERE qty_delta < 0), 0) AS units_decreased,
|
||||
COALESCE(SUM(qty_delta) FILTER (WHERE qty_delta > 0), 0) AS units_increased
|
||||
FROM qty_changes
|
||||
WHERE qty_delta IS NOT NULL
|
||||
`, [dispensaryId, start, end]);
|
||||
|
||||
const totals = totalsResult.rows[0] || {};
|
||||
|
||||
return {
|
||||
dispensary_id: dispensaryId,
|
||||
window,
|
||||
direction,
|
||||
total_changes: parseInt(totals.total_changes) || 0,
|
||||
total_units_decreased: parseInt(totals.units_decreased) || 0,
|
||||
total_units_increased: parseInt(totals.units_increased) || 0,
|
||||
changes: result.rows.map((row: any) => ({
|
||||
store_product_id: row.store_product_id,
|
||||
product_name: row.product_name,
|
||||
brand_name: row.brand_name_raw,
|
||||
category: row.category_raw,
|
||||
old_quantity: row.old_quantity,
|
||||
new_quantity: row.new_quantity,
|
||||
quantity_delta: row.qty_delta,
|
||||
direction: row.qty_delta > 0 ? 'increase' : 'decrease',
|
||||
captured_at: row.captured_at?.toISOString() || null,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get store inventory composition (categories and brands breakdown)
|
||||
*/
|
||||
@@ -415,14 +299,14 @@ export class StoreAnalyticsService {
|
||||
// Get top brands
|
||||
const brandsResult = await this.pool.query(`
|
||||
SELECT
|
||||
brand_name_raw AS brand,
|
||||
brand_name AS brand,
|
||||
COUNT(*) AS count,
|
||||
ROUND(COUNT(*)::NUMERIC * 100 / NULLIF($2, 0), 2) AS percent
|
||||
FROM store_products
|
||||
WHERE dispensary_id = $1
|
||||
AND brand_name_raw IS NOT NULL
|
||||
AND brand_name IS NOT NULL
|
||||
AND is_in_stock = TRUE
|
||||
GROUP BY brand_name_raw
|
||||
GROUP BY brand_name
|
||||
ORDER BY count DESC
|
||||
LIMIT 20
|
||||
`, [dispensaryId, totalProducts]);
|
||||
@@ -432,7 +316,7 @@ export class StoreAnalyticsService {
|
||||
in_stock_count: parseInt(totals.in_stock) || 0,
|
||||
out_of_stock_count: parseInt(totals.out_of_stock) || 0,
|
||||
categories: categoriesResult.rows.map((row: any) => ({
|
||||
category: row.category_raw,
|
||||
category: row.category,
|
||||
count: parseInt(row.count),
|
||||
percent: parseFloat(row.percent) || 0,
|
||||
})),
|
||||
@@ -574,24 +458,23 @@ export class StoreAnalyticsService {
|
||||
),
|
||||
market_prices AS (
|
||||
SELECT
|
||||
sp.category_raw,
|
||||
sp.category,
|
||||
AVG(sp.price_rec) AS market_avg
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
WHERE d.state_id = $2
|
||||
WHERE sp.state_id = $2
|
||||
AND sp.price_rec IS NOT NULL
|
||||
AND sp.is_in_stock = TRUE
|
||||
AND sp.category_raw IS NOT NULL
|
||||
GROUP BY sp.category_raw
|
||||
AND sp.category IS NOT NULL
|
||||
GROUP BY sp.category
|
||||
)
|
||||
SELECT
|
||||
sp.category_raw,
|
||||
sp.category,
|
||||
sp.store_avg AS store_avg_price,
|
||||
mp.market_avg AS market_avg_price,
|
||||
ROUND(((sp.store_avg - mp.market_avg) / NULLIF(mp.market_avg, 0) * 100)::NUMERIC, 2) AS price_vs_market_percent,
|
||||
sp.product_count
|
||||
FROM store_prices sp
|
||||
LEFT JOIN market_prices mp ON mp.category = sp.category_raw
|
||||
LEFT JOIN market_prices mp ON mp.category = sp.category
|
||||
ORDER BY sp.product_count DESC
|
||||
`, [dispensaryId, dispensary.state_id]);
|
||||
|
||||
@@ -603,10 +486,9 @@ export class StoreAnalyticsService {
|
||||
WHERE dispensary_id = $1 AND price_rec IS NOT NULL AND is_in_stock = TRUE
|
||||
),
|
||||
market_avg AS (
|
||||
SELECT AVG(sp.price_rec) AS avg
|
||||
FROM store_products sp
|
||||
JOIN dispensaries d ON d.id = sp.dispensary_id
|
||||
WHERE d.state_id = $2 AND sp.price_rec IS NOT NULL AND sp.is_in_stock = TRUE
|
||||
SELECT AVG(price_rec) AS avg
|
||||
FROM store_products
|
||||
WHERE state_id = $2 AND price_rec IS NOT NULL AND is_in_stock = TRUE
|
||||
)
|
||||
SELECT
|
||||
ROUND(((sa.avg - ma.avg) / NULLIF(ma.avg, 0) * 100)::NUMERIC, 2) AS price_vs_market
|
||||
@@ -617,7 +499,7 @@ export class StoreAnalyticsService {
|
||||
dispensary_id: dispensaryId,
|
||||
dispensary_name: dispensary.name,
|
||||
categories: result.rows.map((row: any) => ({
|
||||
category: row.category_raw,
|
||||
category: row.category,
|
||||
store_avg_price: parseFloat(row.store_avg_price),
|
||||
market_avg_price: row.market_avg_price ? parseFloat(row.market_avg_price) : 0,
|
||||
price_vs_market_percent: row.price_vs_market_percent ? parseFloat(row.price_vs_market_percent) : 0,
|
||||
|
||||
@@ -11,4 +11,3 @@ export { BrandPenetrationService } from './BrandPenetrationService';
|
||||
export { CategoryAnalyticsService } from './CategoryAnalyticsService';
|
||||
export { StoreAnalyticsService } from './StoreAnalyticsService';
|
||||
export { StateAnalyticsService } from './StateAnalyticsService';
|
||||
export { BrandIntelligenceService } from './BrandIntelligenceService';
|
||||
|
||||
@@ -322,48 +322,3 @@ export interface RecVsMedPriceComparison {
|
||||
};
|
||||
price_diff_percent: number | null;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// BRAND PROMOTIONAL ANALYTICS TYPES
|
||||
// ============================================================
|
||||
|
||||
export interface BrandPromotionalEvent {
|
||||
product_name: string;
|
||||
store_product_id: number;
|
||||
dispensary_id: number;
|
||||
dispensary_name: string;
|
||||
state_code: string;
|
||||
category: string | null;
|
||||
special_start: string; // ISO date when special started
|
||||
special_end: string | null; // ISO date when special ended (null if ongoing)
|
||||
duration_days: number | null;
|
||||
regular_price: number;
|
||||
special_price: number;
|
||||
discount_percent: number;
|
||||
quantity_at_start: number | null;
|
||||
quantity_at_end: number | null;
|
||||
quantity_sold_estimate: number | null; // quantity_at_start - quantity_at_end
|
||||
}
|
||||
|
||||
export interface BrandPromotionalSummary {
|
||||
brand_name: string;
|
||||
window: TimeWindow;
|
||||
total_promotional_events: number;
|
||||
total_products_on_special: number;
|
||||
total_dispensaries_with_specials: number;
|
||||
states_with_specials: string[];
|
||||
avg_discount_percent: number;
|
||||
avg_duration_days: number | null;
|
||||
total_quantity_sold_estimate: number | null;
|
||||
promotional_frequency: {
|
||||
weekly_avg: number;
|
||||
monthly_avg: number;
|
||||
};
|
||||
by_category: Array<{
|
||||
category: string;
|
||||
event_count: number;
|
||||
avg_discount_percent: number;
|
||||
quantity_sold_estimate: number | null;
|
||||
}>;
|
||||
events: BrandPromotionalEvent[];
|
||||
}
|
||||
|
||||
@@ -1,53 +1,49 @@
|
||||
/**
|
||||
* Crawl Rotator - Proxy & User Agent Rotation for Crawlers
|
||||
*
|
||||
* Updated: 2025-12-10 per workflow-12102025.md
|
||||
*
|
||||
* KEY BEHAVIORS (per workflow-12102025.md):
|
||||
* 1. Task determines WHAT work to do, proxy determines SESSION IDENTITY
|
||||
* 2. Proxy location (timezone) sets Accept-Language headers (always English)
|
||||
* 3. On 403: immediately get new IP, new fingerprint, retry
|
||||
* 4. After 3 consecutive 403s on same proxy with different fingerprints → disable proxy
|
||||
*
|
||||
* USER-AGENT GENERATION (per workflow-12102025.md):
|
||||
* - Device distribution: Mobile 62%, Desktop 36%, Tablet 2%
|
||||
* - Browser whitelist: Chrome, Safari, Edge, Firefox only
|
||||
* - UA sticks until IP rotates
|
||||
* - Failure = alert admin + stop crawl (no fallback)
|
||||
*
|
||||
* Uses intoli/user-agents for realistic UA generation with daily-updated data.
|
||||
* Manages rotation of proxies and user agents to avoid blocks.
|
||||
* Used by platform-specific crawlers (Dutchie, Jane, etc.)
|
||||
*
|
||||
* Canonical location: src/services/crawl-rotator.ts
|
||||
*/
|
||||
|
||||
import { Pool } from 'pg';
|
||||
import UserAgent from 'user-agents';
|
||||
import {
|
||||
HTTPFingerprint,
|
||||
generateHTTPFingerprint,
|
||||
BrowserType,
|
||||
} from './http-fingerprint';
|
||||
|
||||
// ============================================================
|
||||
// UA CONSTANTS (per workflow-12102025.md)
|
||||
// USER AGENT CONFIGURATION
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Device category distribution (hardcoded)
|
||||
* Mobile: 62%, Desktop: 36%, Tablet: 2%
|
||||
* Modern browser user agents (Chrome, Firefox, Safari, Edge on various platforms)
|
||||
* Updated: 2024
|
||||
*/
|
||||
const DEVICE_WEIGHTS = {
|
||||
mobile: 62,
|
||||
desktop: 36,
|
||||
tablet: 2,
|
||||
} as const;
|
||||
export const USER_AGENTS = [
|
||||
// Chrome on Windows
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Browser whitelist
|
||||
* Only Chrome (67%), Safari (20%), Edge (6%), Firefox (3%)
|
||||
* Samsung Internet, Opera, and other niche browsers are filtered out
|
||||
*/
|
||||
const ALLOWED_BROWSERS = ['Chrome', 'Safari', 'Edge', 'Firefox'] as const;
|
||||
// Chrome on macOS
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
|
||||
|
||||
// Firefox on Windows
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0',
|
||||
|
||||
// Firefox on macOS
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0',
|
||||
|
||||
// Safari on macOS
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
|
||||
|
||||
// Edge on Windows
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
|
||||
|
||||
// Chrome on Linux
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
];
|
||||
|
||||
// ============================================================
|
||||
// PROXY TYPES
|
||||
@@ -65,13 +61,8 @@ export interface Proxy {
|
||||
failureCount: number;
|
||||
successCount: number;
|
||||
avgResponseTimeMs: number | null;
|
||||
maxConnections: number;
|
||||
/**
|
||||
* Per workflow-12102025.md: Track consecutive 403s with different fingerprints.
|
||||
* After 3 consecutive 403s → disable proxy (it's burned).
|
||||
*/
|
||||
consecutive403Count: number;
|
||||
// Location info - determines session headers per workflow-12102025.md
|
||||
maxConnections: number; // Number of concurrent connections allowed (for rotating proxies)
|
||||
// Location info (if known)
|
||||
city?: string;
|
||||
state?: string;
|
||||
country?: string;
|
||||
@@ -86,40 +77,6 @@ export interface ProxyStats {
|
||||
avgSuccessRate: number;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// FINGERPRINT TYPE
|
||||
// Per workflow-12102025.md: Full browser fingerprint from user-agents
|
||||
// ============================================================
|
||||
|
||||
export interface BrowserFingerprint {
|
||||
userAgent: string;
|
||||
platform: string;
|
||||
screenWidth: number;
|
||||
screenHeight: number;
|
||||
viewportWidth: number;
|
||||
viewportHeight: number;
|
||||
deviceCategory: string;
|
||||
browserName: string; // Per workflow-12102025.md: for session logging
|
||||
// Derived headers for anti-detect
|
||||
acceptLanguage: string;
|
||||
secChUa?: string;
|
||||
secChUaPlatform?: string;
|
||||
secChUaMobile?: string;
|
||||
// Per workflow-12102025.md: HTTP Fingerprinting section
|
||||
httpFingerprint: HTTPFingerprint;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Session log entry for debugging blocked sessions
|
||||
*/
|
||||
export interface UASessionLog {
|
||||
deviceCategory: string;
|
||||
browserName: string;
|
||||
userAgent: string;
|
||||
proxyIp: string | null;
|
||||
sessionStartedAt: Date;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// PROXY ROTATOR CLASS
|
||||
// ============================================================
|
||||
@@ -134,6 +91,9 @@ export class ProxyRotator {
|
||||
this.pool = pool || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize with database pool
|
||||
*/
|
||||
setPool(pool: Pool): void {
|
||||
this.pool = pool;
|
||||
}
|
||||
@@ -162,7 +122,6 @@ export class ProxyRotator {
|
||||
0 as "successCount",
|
||||
response_time_ms as "avgResponseTimeMs",
|
||||
COALESCE(max_connections, 1) as "maxConnections",
|
||||
COALESCE(consecutive_403_count, 0) as "consecutive403Count",
|
||||
city,
|
||||
state,
|
||||
country,
|
||||
@@ -175,9 +134,11 @@ export class ProxyRotator {
|
||||
|
||||
this.proxies = result.rows;
|
||||
|
||||
// Calculate total concurrent capacity
|
||||
const totalCapacity = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
|
||||
console.log(`[ProxyRotator] Loaded ${this.proxies.length} active proxies (${totalCapacity} max concurrent connections)`);
|
||||
} catch (error) {
|
||||
// Table might not exist - that's okay
|
||||
console.warn(`[ProxyRotator] Could not load proxies: ${error}`);
|
||||
this.proxies = [];
|
||||
}
|
||||
@@ -189,6 +150,7 @@ export class ProxyRotator {
|
||||
getNext(): Proxy | null {
|
||||
if (this.proxies.length === 0) return null;
|
||||
|
||||
// Round-robin rotation
|
||||
this.currentIndex = (this.currentIndex + 1) % this.proxies.length;
|
||||
this.lastRotation = new Date();
|
||||
|
||||
@@ -223,68 +185,23 @@ export class ProxyRotator {
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark proxy as blocked (403 received)
|
||||
* Per workflow-12102025.md:
|
||||
* - Increment consecutive_403_count
|
||||
* - After 3 consecutive 403s with different fingerprints → disable proxy
|
||||
* - This is separate from general failures (timeouts, etc.)
|
||||
*/
|
||||
async markBlocked(proxyId: number): Promise<boolean> {
|
||||
const proxy = this.proxies.find(p => p.id === proxyId);
|
||||
let shouldDisable = false;
|
||||
|
||||
if (proxy) {
|
||||
proxy.consecutive403Count++;
|
||||
|
||||
// Per workflow-12102025.md: 3 consecutive 403s → proxy is burned
|
||||
if (proxy.consecutive403Count >= 3) {
|
||||
proxy.isActive = false;
|
||||
this.proxies = this.proxies.filter(p => p.id !== proxyId);
|
||||
console.log(`[ProxyRotator] Proxy ${proxyId} DISABLED after ${proxy.consecutive403Count} consecutive 403s (burned)`);
|
||||
shouldDisable = true;
|
||||
} else {
|
||||
console.log(`[ProxyRotator] Proxy ${proxyId} blocked (403 #${proxy.consecutive403Count}/3)`);
|
||||
}
|
||||
}
|
||||
|
||||
// Update database
|
||||
if (this.pool) {
|
||||
try {
|
||||
await this.pool.query(`
|
||||
UPDATE proxies
|
||||
SET
|
||||
consecutive_403_count = COALESCE(consecutive_403_count, 0) + 1,
|
||||
last_failure_at = NOW(),
|
||||
test_result = '403 Forbidden',
|
||||
active = CASE WHEN COALESCE(consecutive_403_count, 0) >= 2 THEN false ELSE active END,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1
|
||||
`, [proxyId]);
|
||||
} catch (err) {
|
||||
console.error(`[ProxyRotator] Failed to update proxy ${proxyId}:`, err);
|
||||
}
|
||||
}
|
||||
|
||||
return shouldDisable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark proxy as failed (general error - timeout, connection error, etc.)
|
||||
* Separate from 403 blocking per workflow-12102025.md
|
||||
* Mark proxy as failed (temporarily remove from rotation)
|
||||
*/
|
||||
async markFailed(proxyId: number, error?: string): Promise<void> {
|
||||
// Update in-memory
|
||||
const proxy = this.proxies.find(p => p.id === proxyId);
|
||||
if (proxy) {
|
||||
proxy.failureCount++;
|
||||
|
||||
// Deactivate if too many general failures
|
||||
// Deactivate if too many failures
|
||||
if (proxy.failureCount >= 5) {
|
||||
proxy.isActive = false;
|
||||
this.proxies = this.proxies.filter(p => p.id !== proxyId);
|
||||
console.log(`[ProxyRotator] Proxy ${proxyId} deactivated after ${proxy.failureCount} general failures`);
|
||||
console.log(`[ProxyRotator] Proxy ${proxyId} deactivated after ${proxy.failureCount} failures`);
|
||||
}
|
||||
}
|
||||
|
||||
// Update database
|
||||
if (this.pool) {
|
||||
try {
|
||||
await this.pool.query(`
|
||||
@@ -303,22 +220,23 @@ export class ProxyRotator {
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark proxy as successful - resets consecutive 403 count
|
||||
* Per workflow-12102025.md: successful request clears the 403 counter
|
||||
* Mark proxy as successful
|
||||
*/
|
||||
async markSuccess(proxyId: number, responseTimeMs?: number): Promise<void> {
|
||||
// Update in-memory
|
||||
const proxy = this.proxies.find(p => p.id === proxyId);
|
||||
if (proxy) {
|
||||
proxy.successCount++;
|
||||
proxy.consecutive403Count = 0; // Reset on success per workflow-12102025.md
|
||||
proxy.lastUsedAt = new Date();
|
||||
if (responseTimeMs !== undefined) {
|
||||
// Rolling average
|
||||
proxy.avgResponseTimeMs = proxy.avgResponseTimeMs
|
||||
? (proxy.avgResponseTimeMs * 0.8) + (responseTimeMs * 0.2)
|
||||
: responseTimeMs;
|
||||
}
|
||||
}
|
||||
|
||||
// Update database
|
||||
if (this.pool) {
|
||||
try {
|
||||
await this.pool.query(`
|
||||
@@ -326,7 +244,6 @@ export class ProxyRotator {
|
||||
SET
|
||||
last_tested_at = NOW(),
|
||||
test_result = 'success',
|
||||
consecutive_403_count = 0,
|
||||
response_time_ms = CASE
|
||||
WHEN response_time_ms IS NULL THEN $2
|
||||
ELSE (response_time_ms * 0.8 + $2 * 0.2)::integer
|
||||
@@ -355,8 +272,8 @@ export class ProxyRotator {
|
||||
*/
|
||||
getStats(): ProxyStats {
|
||||
const totalProxies = this.proxies.length;
|
||||
const activeProxies = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
|
||||
const blockedProxies = this.proxies.filter(p => p.failureCount >= 5 || p.consecutive403Count >= 3).length;
|
||||
const activeProxies = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0); // Total concurrent capacity
|
||||
const blockedProxies = this.proxies.filter(p => p.failureCount >= 5).length;
|
||||
|
||||
const successRates = this.proxies
|
||||
.filter(p => p.successCount + p.failureCount > 0)
|
||||
@@ -368,12 +285,15 @@ export class ProxyRotator {
|
||||
|
||||
return {
|
||||
totalProxies,
|
||||
activeProxies,
|
||||
activeProxies, // Total concurrent capacity across all proxies
|
||||
blockedProxies,
|
||||
avgSuccessRate,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if proxy pool has available proxies
|
||||
*/
|
||||
hasAvailableProxies(): boolean {
|
||||
return this.proxies.length > 0;
|
||||
}
|
||||
@@ -381,194 +301,53 @@ export class ProxyRotator {
|
||||
|
||||
// ============================================================
|
||||
// USER AGENT ROTATOR CLASS
|
||||
// Per workflow-12102025.md: Uses intoli/user-agents for realistic fingerprints
|
||||
// ============================================================
|
||||
|
||||
export class UserAgentRotator {
|
||||
private currentFingerprint: BrowserFingerprint | null = null;
|
||||
private sessionLog: UASessionLog | null = null;
|
||||
private userAgents: string[];
|
||||
private currentIndex: number = 0;
|
||||
private lastRotation: Date = new Date();
|
||||
|
||||
constructor() {
|
||||
// Per workflow-12102025.md: Initialize with first fingerprint
|
||||
this.rotate();
|
||||
constructor(userAgents: string[] = USER_AGENTS) {
|
||||
this.userAgents = userAgents;
|
||||
// Start at random index to avoid patterns
|
||||
this.currentIndex = Math.floor(Math.random() * userAgents.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Roll device category based on distribution
|
||||
* Mobile: 62%, Desktop: 36%, Tablet: 2%
|
||||
* Get next user agent in rotation
|
||||
*/
|
||||
private rollDeviceCategory(): 'mobile' | 'desktop' | 'tablet' {
|
||||
const roll = Math.random() * 100;
|
||||
if (roll < DEVICE_WEIGHTS.mobile) {
|
||||
return 'mobile';
|
||||
} else if (roll < DEVICE_WEIGHTS.mobile + DEVICE_WEIGHTS.desktop) {
|
||||
return 'desktop';
|
||||
} else {
|
||||
return 'tablet';
|
||||
}
|
||||
getNext(): string {
|
||||
this.currentIndex = (this.currentIndex + 1) % this.userAgents.length;
|
||||
this.lastRotation = new Date();
|
||||
return this.userAgents[this.currentIndex];
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Extract browser name from UA string
|
||||
* Get current user agent without rotating
|
||||
*/
|
||||
private extractBrowserName(userAgent: string): string {
|
||||
if (userAgent.includes('Edg/')) return 'Edge';
|
||||
if (userAgent.includes('Firefox/')) return 'Firefox';
|
||||
if (userAgent.includes('Safari/') && !userAgent.includes('Chrome/')) return 'Safari';
|
||||
if (userAgent.includes('Chrome/')) return 'Chrome';
|
||||
return 'Unknown';
|
||||
getCurrent(): string {
|
||||
return this.userAgents[this.currentIndex];
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Check if browser is in whitelist
|
||||
* Get a random user agent
|
||||
*/
|
||||
private isAllowedBrowser(userAgent: string): boolean {
|
||||
const browserName = this.extractBrowserName(userAgent);
|
||||
return ALLOWED_BROWSERS.includes(browserName as typeof ALLOWED_BROWSERS[number]);
|
||||
getRandom(): string {
|
||||
const index = Math.floor(Math.random() * this.userAgents.length);
|
||||
return this.userAgents[index];
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a new random fingerprint
|
||||
* Per workflow-12102025.md:
|
||||
* - Roll device category (62/36/2)
|
||||
* - Filter to top 4 browsers only
|
||||
* - Failure = alert admin + stop (no fallback)
|
||||
* Get total available user agents
|
||||
*/
|
||||
rotate(proxyIp?: string): BrowserFingerprint {
|
||||
// Per workflow-12102025.md: Roll device category
|
||||
const deviceCategory = this.rollDeviceCategory();
|
||||
|
||||
// Per workflow-12102025.md: Generate UA filtered to device category
|
||||
const generator = new UserAgent({ deviceCategory });
|
||||
|
||||
// Per workflow-12102025.md: Try to get an allowed browser (max 50 attempts)
|
||||
let ua: ReturnType<typeof generator>;
|
||||
let attempts = 0;
|
||||
const maxAttempts = 50;
|
||||
|
||||
do {
|
||||
ua = generator();
|
||||
attempts++;
|
||||
} while (!this.isAllowedBrowser(ua.data.userAgent) && attempts < maxAttempts);
|
||||
|
||||
// Per workflow-12102025.md: If we can't get allowed browser, this is a failure
|
||||
if (!this.isAllowedBrowser(ua.data.userAgent)) {
|
||||
const errorMsg = `[UserAgentRotator] CRITICAL: Failed to generate allowed browser after ${maxAttempts} attempts. Device: ${deviceCategory}. Last UA: ${ua.data.userAgent}`;
|
||||
console.error(errorMsg);
|
||||
// Per workflow-12102025.md: Alert admin + stop crawl
|
||||
// TODO: Post alert to admin dashboard
|
||||
throw new Error(errorMsg);
|
||||
}
|
||||
|
||||
const data = ua.data;
|
||||
const browserName = this.extractBrowserName(data.userAgent);
|
||||
|
||||
// Build sec-ch-ua headers from user agent string
|
||||
const secChUa = this.buildSecChUa(data.userAgent, deviceCategory);
|
||||
|
||||
// Per workflow-12102025.md: HTTP Fingerprinting - generate full HTTP fingerprint
|
||||
const httpFingerprint = generateHTTPFingerprint(browserName as BrowserType);
|
||||
|
||||
this.currentFingerprint = {
|
||||
userAgent: data.userAgent,
|
||||
platform: data.platform,
|
||||
screenWidth: data.screenWidth,
|
||||
screenHeight: data.screenHeight,
|
||||
viewportWidth: data.viewportWidth,
|
||||
viewportHeight: data.viewportHeight,
|
||||
deviceCategory: data.deviceCategory,
|
||||
browserName, // Per workflow-12102025.md: for session logging
|
||||
// Per workflow-12102025.md: always English
|
||||
acceptLanguage: 'en-US,en;q=0.9',
|
||||
...secChUa,
|
||||
// Per workflow-12102025.md: HTTP Fingerprinting section
|
||||
httpFingerprint,
|
||||
};
|
||||
|
||||
// Per workflow-12102025.md: Log session data
|
||||
this.sessionLog = {
|
||||
deviceCategory,
|
||||
browserName,
|
||||
userAgent: data.userAgent,
|
||||
proxyIp: proxyIp || null,
|
||||
sessionStartedAt: new Date(),
|
||||
};
|
||||
|
||||
console.log(`[UserAgentRotator] New fingerprint: device=${deviceCategory}, browser=${browserName}, UA=${data.userAgent.slice(0, 50)}...`);
|
||||
return this.currentFingerprint;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current fingerprint without rotating
|
||||
*/
|
||||
getCurrent(): BrowserFingerprint {
|
||||
if (!this.currentFingerprint) {
|
||||
return this.rotate();
|
||||
}
|
||||
return this.currentFingerprint;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a random fingerprint (rotates and returns)
|
||||
*/
|
||||
getRandom(proxyIp?: string): BrowserFingerprint {
|
||||
return this.rotate(proxyIp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Get session log for debugging
|
||||
*/
|
||||
getSessionLog(): UASessionLog | null {
|
||||
return this.sessionLog;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build sec-ch-ua headers from user agent string
|
||||
* Per workflow-12102025.md: Include mobile indicator based on device category
|
||||
*/
|
||||
private buildSecChUa(userAgent: string, deviceCategory: string): { secChUa?: string; secChUaPlatform?: string; secChUaMobile?: string } {
|
||||
const isMobile = deviceCategory === 'mobile' || deviceCategory === 'tablet';
|
||||
|
||||
// Extract Chrome version if present
|
||||
const chromeMatch = userAgent.match(/Chrome\/(\d+)/);
|
||||
const edgeMatch = userAgent.match(/Edg\/(\d+)/);
|
||||
|
||||
if (edgeMatch) {
|
||||
const version = edgeMatch[1];
|
||||
return {
|
||||
secChUa: `"Microsoft Edge";v="${version}", "Chromium";v="${version}", "Not_A Brand";v="24"`,
|
||||
secChUaPlatform: userAgent.includes('Windows') ? '"Windows"' : userAgent.includes('Android') ? '"Android"' : '"macOS"',
|
||||
secChUaMobile: isMobile ? '?1' : '?0',
|
||||
};
|
||||
}
|
||||
|
||||
if (chromeMatch) {
|
||||
const version = chromeMatch[1];
|
||||
let platform = '"Linux"';
|
||||
if (userAgent.includes('Windows')) platform = '"Windows"';
|
||||
else if (userAgent.includes('Mac')) platform = '"macOS"';
|
||||
else if (userAgent.includes('Android')) platform = '"Android"';
|
||||
else if (userAgent.includes('iPhone') || userAgent.includes('iPad')) platform = '"iOS"';
|
||||
|
||||
return {
|
||||
secChUa: `"Google Chrome";v="${version}", "Chromium";v="${version}", "Not_A Brand";v="24"`,
|
||||
secChUaPlatform: platform,
|
||||
secChUaMobile: isMobile ? '?1' : '?0',
|
||||
};
|
||||
}
|
||||
|
||||
// Firefox/Safari don't send sec-ch-ua
|
||||
return {};
|
||||
}
|
||||
|
||||
getCount(): number {
|
||||
return 1; // user-agents generates dynamically
|
||||
return this.userAgents.length;
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// COMBINED ROTATOR
|
||||
// Per workflow-12102025.md: Coordinates proxy + fingerprint rotation
|
||||
// COMBINED ROTATOR (for convenience)
|
||||
// ============================================================
|
||||
|
||||
export class CrawlRotator {
|
||||
@@ -580,51 +359,49 @@ export class CrawlRotator {
|
||||
this.userAgent = new UserAgentRotator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize rotator (load proxies from DB)
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
await this.proxy.loadProxies();
|
||||
}
|
||||
|
||||
/**
|
||||
* Rotate proxy only (get new IP)
|
||||
* Rotate proxy only
|
||||
*/
|
||||
rotateProxy(): Proxy | null {
|
||||
return this.proxy.getNext();
|
||||
}
|
||||
|
||||
/**
|
||||
* Rotate fingerprint only (new UA, screen size, etc.)
|
||||
* Rotate user agent only
|
||||
*/
|
||||
rotateFingerprint(): BrowserFingerprint {
|
||||
return this.userAgent.rotate();
|
||||
rotateUserAgent(): string {
|
||||
return this.userAgent.getNext();
|
||||
}
|
||||
|
||||
/**
|
||||
* Rotate both proxy and fingerprint
|
||||
* Per workflow-12102025.md: called on 403 for fresh identity
|
||||
* Passes proxy IP to UA rotation for session logging
|
||||
* Rotate both proxy and user agent
|
||||
*/
|
||||
rotateBoth(): { proxy: Proxy | null; fingerprint: BrowserFingerprint } {
|
||||
const proxy = this.proxy.getNext();
|
||||
const proxyIp = proxy ? proxy.host : undefined;
|
||||
rotateBoth(): { proxy: Proxy | null; userAgent: string } {
|
||||
return {
|
||||
proxy,
|
||||
fingerprint: this.userAgent.rotate(proxyIp),
|
||||
proxy: this.proxy.getNext(),
|
||||
userAgent: this.userAgent.getNext(),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current proxy and fingerprint without rotating
|
||||
* Get current proxy and user agent without rotating
|
||||
*/
|
||||
getCurrent(): { proxy: Proxy | null; fingerprint: BrowserFingerprint } {
|
||||
getCurrent(): { proxy: Proxy | null; userAgent: string } {
|
||||
return {
|
||||
proxy: this.proxy.getCurrent(),
|
||||
fingerprint: this.userAgent.getCurrent(),
|
||||
userAgent: this.userAgent.getCurrent(),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Record success for current proxy
|
||||
* Per workflow-12102025.md: resets consecutive 403 count
|
||||
*/
|
||||
async recordSuccess(responseTimeMs?: number): Promise<void> {
|
||||
const current = this.proxy.getCurrent();
|
||||
@@ -634,20 +411,7 @@ export class CrawlRotator {
|
||||
}
|
||||
|
||||
/**
|
||||
* Record 403 block for current proxy
|
||||
* Per workflow-12102025.md: increments consecutive_403_count, disables after 3
|
||||
* Returns true if proxy was disabled
|
||||
*/
|
||||
async recordBlock(): Promise<boolean> {
|
||||
const current = this.proxy.getCurrent();
|
||||
if (current) {
|
||||
return await this.proxy.markBlocked(current.id);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record general failure (not 403)
|
||||
* Record failure for current proxy
|
||||
*/
|
||||
async recordFailure(error?: string): Promise<void> {
|
||||
const current = this.proxy.getCurrent();
|
||||
@@ -657,13 +421,14 @@ export class CrawlRotator {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current proxy location info
|
||||
* Per workflow-12102025.md: proxy location determines session headers
|
||||
* Get current proxy location info (for reporting)
|
||||
* Note: For rotating proxies (like IPRoyal), the actual exit location varies per request
|
||||
*/
|
||||
getProxyLocation(): { city?: string; state?: string; country?: string; timezone?: string; isRotating: boolean } | null {
|
||||
const current = this.proxy.getCurrent();
|
||||
if (!current) return null;
|
||||
|
||||
// Check if this is a rotating proxy (max_connections > 1 usually indicates rotating)
|
||||
const isRotating = current.maxConnections > 1;
|
||||
|
||||
return {
|
||||
@@ -674,15 +439,6 @@ export class CrawlRotator {
|
||||
isRotating
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get timezone from current proxy
|
||||
* Per workflow-12102025.md: used for Accept-Language header
|
||||
*/
|
||||
getProxyTimezone(): string | undefined {
|
||||
const current = this.proxy.getCurrent();
|
||||
return current?.timezone;
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
|
||||
@@ -1,315 +0,0 @@
|
||||
/**
|
||||
* HTTP Fingerprinting Service
|
||||
*
|
||||
* Per workflow-12102025.md - HTTP Fingerprinting section:
|
||||
* - Full header set per browser type
|
||||
* - Browser-specific header ordering
|
||||
* - Natural randomization (DNT, Accept quality)
|
||||
* - Dynamic Referer per dispensary
|
||||
*
|
||||
* Canonical location: src/services/http-fingerprint.ts
|
||||
*/
|
||||
|
||||
// ============================================================
|
||||
// TYPES
|
||||
// ============================================================
|
||||
|
||||
export type BrowserType = 'Chrome' | 'Firefox' | 'Safari' | 'Edge';
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Full HTTP fingerprint for a session
|
||||
*/
|
||||
export interface HTTPFingerprint {
|
||||
browserType: BrowserType;
|
||||
headers: Record<string, string>;
|
||||
headerOrder: string[];
|
||||
curlImpersonateBinary: string;
|
||||
hasDNT: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Context for building headers
|
||||
*/
|
||||
export interface HeaderContext {
|
||||
userAgent: string;
|
||||
secChUa?: string;
|
||||
secChUaPlatform?: string;
|
||||
secChUaMobile?: string;
|
||||
referer: string;
|
||||
isPost: boolean;
|
||||
contentLength?: number;
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// CONSTANTS (per workflow-12102025.md)
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: DNT header distribution (~30% of users)
|
||||
*/
|
||||
const DNT_PROBABILITY = 0.30;
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Accept header variations for natural traffic
|
||||
*/
|
||||
const ACCEPT_VARIATIONS = [
|
||||
'application/json, text/plain, */*',
|
||||
'application/json,text/plain,*/*',
|
||||
'*/*',
|
||||
];
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Accept-Language variations
|
||||
*/
|
||||
const ACCEPT_LANGUAGE_VARIATIONS = [
|
||||
'en-US,en;q=0.9',
|
||||
'en-US,en;q=0.8',
|
||||
'en-US;q=0.9,en;q=0.8',
|
||||
];
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: curl-impersonate binaries per browser
|
||||
*/
|
||||
const CURL_IMPERSONATE_BINARIES: Record<BrowserType, string> = {
|
||||
Chrome: 'curl_chrome131',
|
||||
Edge: 'curl_chrome131', // Edge uses Chromium
|
||||
Firefox: 'curl_ff133',
|
||||
Safari: 'curl_safari17',
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// HEADER ORDERING (per workflow-12102025.md)
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Chrome header order for GraphQL requests
|
||||
*/
|
||||
const CHROME_HEADER_ORDER = [
|
||||
'Host',
|
||||
'Connection',
|
||||
'Content-Length',
|
||||
'sec-ch-ua',
|
||||
'DNT',
|
||||
'sec-ch-ua-mobile',
|
||||
'User-Agent',
|
||||
'sec-ch-ua-platform',
|
||||
'Content-Type',
|
||||
'Accept',
|
||||
'Origin',
|
||||
'sec-fetch-site',
|
||||
'sec-fetch-mode',
|
||||
'sec-fetch-dest',
|
||||
'Referer',
|
||||
'Accept-Encoding',
|
||||
'Accept-Language',
|
||||
];
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Firefox header order for GraphQL requests
|
||||
*/
|
||||
const FIREFOX_HEADER_ORDER = [
|
||||
'Host',
|
||||
'User-Agent',
|
||||
'Accept',
|
||||
'Accept-Language',
|
||||
'Accept-Encoding',
|
||||
'Content-Type',
|
||||
'Content-Length',
|
||||
'Origin',
|
||||
'DNT',
|
||||
'Connection',
|
||||
'Referer',
|
||||
'sec-fetch-dest',
|
||||
'sec-fetch-mode',
|
||||
'sec-fetch-site',
|
||||
];
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Safari header order for GraphQL requests
|
||||
*/
|
||||
const SAFARI_HEADER_ORDER = [
|
||||
'Host',
|
||||
'Connection',
|
||||
'Content-Length',
|
||||
'Accept',
|
||||
'User-Agent',
|
||||
'Content-Type',
|
||||
'Origin',
|
||||
'Referer',
|
||||
'Accept-Encoding',
|
||||
'Accept-Language',
|
||||
];
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Edge uses Chrome order (Chromium-based)
|
||||
*/
|
||||
const HEADER_ORDERS: Record<BrowserType, string[]> = {
|
||||
Chrome: CHROME_HEADER_ORDER,
|
||||
Edge: CHROME_HEADER_ORDER,
|
||||
Firefox: FIREFOX_HEADER_ORDER,
|
||||
Safari: SAFARI_HEADER_ORDER,
|
||||
};
|
||||
|
||||
// ============================================================
|
||||
// FINGERPRINT GENERATION
|
||||
// ============================================================
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Generate HTTP fingerprint for a session
|
||||
* Randomization is done once per session for consistency
|
||||
*/
|
||||
export function generateHTTPFingerprint(browserType: BrowserType): HTTPFingerprint {
|
||||
// Per workflow-12102025.md: DNT randomized per session (~30%)
|
||||
const hasDNT = Math.random() < DNT_PROBABILITY;
|
||||
|
||||
return {
|
||||
browserType,
|
||||
headers: {}, // Built dynamically per request
|
||||
headerOrder: HEADER_ORDERS[browserType],
|
||||
curlImpersonateBinary: CURL_IMPERSONATE_BINARIES[browserType],
|
||||
hasDNT,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Build complete headers for a request
|
||||
* Returns headers in browser-specific order
|
||||
*/
|
||||
export function buildOrderedHeaders(
|
||||
fingerprint: HTTPFingerprint,
|
||||
context: HeaderContext
|
||||
): { headers: Record<string, string>; orderedHeaders: string[] } {
|
||||
const { browserType, hasDNT, headerOrder } = fingerprint;
|
||||
const { userAgent, secChUa, secChUaPlatform, secChUaMobile, referer, isPost, contentLength } = context;
|
||||
|
||||
// Per workflow-12102025.md: Natural randomization for Accept
|
||||
const accept = ACCEPT_VARIATIONS[Math.floor(Math.random() * ACCEPT_VARIATIONS.length)];
|
||||
const acceptLanguage = ACCEPT_LANGUAGE_VARIATIONS[Math.floor(Math.random() * ACCEPT_LANGUAGE_VARIATIONS.length)];
|
||||
|
||||
// Build all possible headers
|
||||
const allHeaders: Record<string, string> = {
|
||||
'Connection': 'keep-alive',
|
||||
'User-Agent': userAgent,
|
||||
'Accept': accept,
|
||||
'Accept-Language': acceptLanguage,
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
};
|
||||
|
||||
// Per workflow-12102025.md: POST-only headers
|
||||
if (isPost) {
|
||||
allHeaders['Content-Type'] = 'application/json';
|
||||
allHeaders['Origin'] = 'https://dutchie.com';
|
||||
if (contentLength !== undefined) {
|
||||
allHeaders['Content-Length'] = String(contentLength);
|
||||
}
|
||||
}
|
||||
|
||||
// Per workflow-12102025.md: Dynamic Referer per dispensary
|
||||
allHeaders['Referer'] = referer;
|
||||
|
||||
// Per workflow-12102025.md: DNT randomized per session
|
||||
if (hasDNT) {
|
||||
allHeaders['DNT'] = '1';
|
||||
}
|
||||
|
||||
// Per workflow-12102025.md: Chromium-only headers (Chrome, Edge)
|
||||
if (browserType === 'Chrome' || browserType === 'Edge') {
|
||||
if (secChUa) allHeaders['sec-ch-ua'] = secChUa;
|
||||
if (secChUaMobile) allHeaders['sec-ch-ua-mobile'] = secChUaMobile;
|
||||
if (secChUaPlatform) allHeaders['sec-ch-ua-platform'] = secChUaPlatform;
|
||||
allHeaders['sec-fetch-site'] = 'same-origin';
|
||||
allHeaders['sec-fetch-mode'] = 'cors';
|
||||
allHeaders['sec-fetch-dest'] = 'empty';
|
||||
}
|
||||
|
||||
// Per workflow-12102025.md: Firefox has sec-fetch but no sec-ch
|
||||
if (browserType === 'Firefox') {
|
||||
allHeaders['sec-fetch-site'] = 'same-origin';
|
||||
allHeaders['sec-fetch-mode'] = 'cors';
|
||||
allHeaders['sec-fetch-dest'] = 'empty';
|
||||
}
|
||||
|
||||
// Per workflow-12102025.md: Safari has no sec-* headers
|
||||
|
||||
// Filter to only headers that exist and order them
|
||||
const orderedHeaders: string[] = [];
|
||||
const headers: Record<string, string> = {};
|
||||
|
||||
for (const headerName of headerOrder) {
|
||||
if (allHeaders[headerName]) {
|
||||
orderedHeaders.push(headerName);
|
||||
headers[headerName] = allHeaders[headerName];
|
||||
}
|
||||
}
|
||||
|
||||
return { headers, orderedHeaders };
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Build curl command arguments for headers
|
||||
* Headers are added in browser-specific order
|
||||
*/
|
||||
export function buildCurlHeaderArgs(
|
||||
fingerprint: HTTPFingerprint,
|
||||
context: HeaderContext
|
||||
): string[] {
|
||||
const { headers, orderedHeaders } = buildOrderedHeaders(fingerprint, context);
|
||||
|
||||
const args: string[] = [];
|
||||
for (const headerName of orderedHeaders) {
|
||||
// Skip Host and Content-Length - curl handles these
|
||||
if (headerName === 'Host' || headerName === 'Content-Length') continue;
|
||||
args.push('-H', `${headerName}: ${headers[headerName]}`);
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Extract Referer from dispensary menu_url
|
||||
*/
|
||||
export function buildRefererFromMenuUrl(menuUrl: string | null | undefined): string {
|
||||
if (!menuUrl) {
|
||||
return 'https://dutchie.com/';
|
||||
}
|
||||
|
||||
// Extract slug from menu_url
|
||||
// Formats: /embedded-menu/<slug> or /dispensary/<slug> or full URL
|
||||
let slug: string | null = null;
|
||||
|
||||
const embeddedMatch = menuUrl.match(/\/embedded-menu\/([^/?]+)/);
|
||||
const dispensaryMatch = menuUrl.match(/\/dispensary\/([^/?]+)/);
|
||||
|
||||
if (embeddedMatch) {
|
||||
slug = embeddedMatch[1];
|
||||
} else if (dispensaryMatch) {
|
||||
slug = dispensaryMatch[1];
|
||||
}
|
||||
|
||||
if (slug) {
|
||||
return `https://dutchie.com/dispensary/${slug}`;
|
||||
}
|
||||
|
||||
return 'https://dutchie.com/';
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Get curl-impersonate binary for browser
|
||||
*/
|
||||
export function getCurlBinary(browserType: BrowserType): string {
|
||||
return CURL_IMPERSONATE_BINARIES[browserType];
|
||||
}
|
||||
|
||||
/**
|
||||
* Per workflow-12102025.md: Check if curl-impersonate is available
|
||||
*/
|
||||
export function isCurlImpersonateAvailable(browserType: BrowserType): boolean {
|
||||
const binary = CURL_IMPERSONATE_BINARIES[browserType];
|
||||
try {
|
||||
const { execSync } = require('child_process');
|
||||
execSync(`which ${binary}`, { stdio: 'ignore' });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -39,12 +39,7 @@ export async function cleanupOrphanedJobs(): Promise<void> {
|
||||
|
||||
export type ProxyTestMode = 'all' | 'failed' | 'inactive';
|
||||
|
||||
export interface CreateJobResult {
|
||||
jobId: number;
|
||||
totalProxies: number;
|
||||
}
|
||||
|
||||
export async function createProxyTestJob(mode: ProxyTestMode = 'all', concurrency: number = DEFAULT_CONCURRENCY): Promise<CreateJobResult> {
|
||||
export async function createProxyTestJob(mode: ProxyTestMode = 'all', concurrency: number = DEFAULT_CONCURRENCY): Promise<number> {
|
||||
// Check for existing running jobs first
|
||||
const existingJob = await getActiveProxyTestJob();
|
||||
if (existingJob) {
|
||||
@@ -84,7 +79,7 @@ export async function createProxyTestJob(mode: ProxyTestMode = 'all', concurrenc
|
||||
console.error(`❌ Proxy test job ${jobId} failed:`, err);
|
||||
});
|
||||
|
||||
return { jobId, totalProxies };
|
||||
return jobId;
|
||||
}
|
||||
|
||||
export async function getProxyTestJob(jobId: number): Promise<ProxyTestJob | null> {
|
||||
|
||||
@@ -1,38 +1,116 @@
|
||||
/**
|
||||
* LEGACY SCHEDULER - DEPRECATED 2024-12-10
|
||||
*
|
||||
* DO NOT USE THIS FILE.
|
||||
*
|
||||
* Per TASK_WORKFLOW_2024-12-10.md:
|
||||
* This node-cron scheduler has been replaced by the database-driven
|
||||
* task scheduler in src/services/task-scheduler.ts
|
||||
*
|
||||
* The new scheduler:
|
||||
* - Stores schedules in PostgreSQL (survives restarts)
|
||||
* - Uses SELECT FOR UPDATE SKIP LOCKED (multi-replica safe)
|
||||
* - Creates tasks in worker_tasks table (processed by task-worker.ts)
|
||||
*
|
||||
* This file is kept for reference only. All exports are no-ops.
|
||||
* Legacy code has been removed - see git history for original implementation.
|
||||
*/
|
||||
import cron from 'node-cron';
|
||||
import { pool } from '../db/pool';
|
||||
import { scrapeStore, scrapeCategory } from '../scraper-v2';
|
||||
|
||||
let scheduledJobs: cron.ScheduledTask[] = [];
|
||||
|
||||
async function getSettings(): Promise<{
|
||||
scrapeIntervalHours: number;
|
||||
scrapeSpecialsTime: string;
|
||||
}> {
|
||||
const result = await pool.query(`
|
||||
SELECT key, value FROM settings
|
||||
WHERE key IN ('scrape_interval_hours', 'scrape_specials_time')
|
||||
`);
|
||||
|
||||
const settings: Record<string, string> = {};
|
||||
result.rows.forEach((row: { key: string; value: string }) => {
|
||||
settings[row.key] = row.value;
|
||||
});
|
||||
|
||||
return {
|
||||
scrapeIntervalHours: parseInt(settings.scrape_interval_hours || '4'),
|
||||
scrapeSpecialsTime: settings.scrape_specials_time || '00:01'
|
||||
};
|
||||
}
|
||||
|
||||
async function scrapeAllStores(): Promise<void> {
|
||||
console.log('🔄 Starting scheduled scrape for all stores...');
|
||||
|
||||
const result = await pool.query(`
|
||||
SELECT id, name FROM stores WHERE active = true AND scrape_enabled = true
|
||||
`);
|
||||
|
||||
for (const store of result.rows) {
|
||||
try {
|
||||
console.log(`Scraping store: ${store.name}`);
|
||||
await scrapeStore(store.id);
|
||||
} catch (error) {
|
||||
console.error(`Failed to scrape store ${store.name}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('✅ Scheduled scrape completed');
|
||||
}
|
||||
|
||||
async function scrapeSpecials(): Promise<void> {
|
||||
console.log('🌟 Starting scheduled specials scrape...');
|
||||
|
||||
const result = await pool.query(`
|
||||
SELECT s.id, s.name, c.id as category_id
|
||||
FROM stores s
|
||||
JOIN categories c ON c.store_id = s.id
|
||||
WHERE s.active = true AND s.scrape_enabled = true
|
||||
AND c.slug = 'specials' AND c.scrape_enabled = true
|
||||
`);
|
||||
|
||||
for (const row of result.rows) {
|
||||
try {
|
||||
console.log(`Scraping specials for: ${row.name}`);
|
||||
await scrapeCategory(row.id, row.category_id);
|
||||
} catch (error) {
|
||||
console.error(`Failed to scrape specials for ${row.name}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('✅ Specials scrape completed');
|
||||
}
|
||||
|
||||
// 2024-12-10: All functions are now no-ops
|
||||
export async function startScheduler(): Promise<void> {
|
||||
console.warn('[DEPRECATED] startScheduler() called - use taskScheduler from task-scheduler.ts instead');
|
||||
// Stop any existing jobs
|
||||
stopScheduler();
|
||||
|
||||
const settings = await getSettings();
|
||||
|
||||
// Schedule regular store scrapes (every N hours)
|
||||
const scrapeIntervalCron = `0 */${settings.scrapeIntervalHours} * * *`;
|
||||
const storeJob = cron.schedule(scrapeIntervalCron, scrapeAllStores);
|
||||
scheduledJobs.push(storeJob);
|
||||
console.log(`📅 Scheduled store scraping: every ${settings.scrapeIntervalHours} hours`);
|
||||
|
||||
// Schedule specials scraping (daily at specified time)
|
||||
const [hours, minutes] = settings.scrapeSpecialsTime.split(':');
|
||||
const specialsCron = `${minutes} ${hours} * * *`;
|
||||
const specialsJob = cron.schedule(specialsCron, scrapeSpecials);
|
||||
scheduledJobs.push(specialsJob);
|
||||
console.log(`📅 Scheduled specials scraping: daily at ${settings.scrapeSpecialsTime}`);
|
||||
|
||||
// Initial scrape on startup (after 10 seconds)
|
||||
setTimeout(() => {
|
||||
console.log('🚀 Running initial scrape...');
|
||||
scrapeAllStores().catch(console.error);
|
||||
}, 10000);
|
||||
}
|
||||
|
||||
export function stopScheduler(): void {
|
||||
console.warn('[DEPRECATED] stopScheduler() called - use taskScheduler from task-scheduler.ts instead');
|
||||
scheduledJobs.forEach(job => job.stop());
|
||||
scheduledJobs = [];
|
||||
console.log('🛑 Scheduler stopped');
|
||||
}
|
||||
|
||||
export async function restartScheduler(): Promise<void> {
|
||||
console.warn('[DEPRECATED] restartScheduler() called - use taskScheduler from task-scheduler.ts instead');
|
||||
console.log('🔄 Restarting scheduler...');
|
||||
stopScheduler();
|
||||
await startScheduler();
|
||||
}
|
||||
|
||||
export async function triggerStoreScrape(_storeId: number): Promise<void> {
|
||||
console.warn('[DEPRECATED] triggerStoreScrape() called - use taskService.createTask() instead');
|
||||
// Manual trigger functions for admin
|
||||
export async function triggerStoreScrape(storeId: number): Promise<void> {
|
||||
console.log(`🔧 Manual scrape triggered for store ID: ${storeId}`);
|
||||
await scrapeStore(storeId);
|
||||
}
|
||||
|
||||
export async function triggerAllStoresScrape(): Promise<void> {
|
||||
console.warn('[DEPRECATED] triggerAllStoresScrape() called - use taskScheduler.triggerSchedule() instead');
|
||||
console.log('🔧 Manual scrape triggered for all stores');
|
||||
await scrapeAllStores();
|
||||
}
|
||||
|
||||
@@ -1,375 +0,0 @@
|
||||
/**
|
||||
* Database-Driven Task Scheduler
|
||||
*
|
||||
* Per TASK_WORKFLOW_2024-12-10.md:
|
||||
* - Schedules stored in DB (survives restarts)
|
||||
* - Uses SELECT FOR UPDATE to prevent duplicate execution across replicas
|
||||
* - Polls every 60s to check if schedules are due
|
||||
* - Generates tasks into worker_tasks table for task-worker.ts to process
|
||||
*
|
||||
* 2024-12-10: Created to replace legacy node-cron scheduler
|
||||
*/
|
||||
|
||||
import { pool } from '../db/pool';
|
||||
import { taskService, TaskRole } from '../tasks/task-service';
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Poll interval for checking schedules
|
||||
const POLL_INTERVAL_MS = 60_000; // 60 seconds
|
||||
|
||||
interface TaskSchedule {
|
||||
id: number;
|
||||
name: string;
|
||||
role: TaskRole;
|
||||
enabled: boolean;
|
||||
interval_hours: number;
|
||||
last_run_at: Date | null;
|
||||
next_run_at: Date | null;
|
||||
state_code: string | null;
|
||||
priority: number;
|
||||
}
|
||||
|
||||
class TaskScheduler {
|
||||
private pollTimer: NodeJS.Timeout | null = null;
|
||||
private isRunning = false;
|
||||
|
||||
/**
|
||||
* Start the scheduler
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: Called on API server startup
|
||||
*/
|
||||
async start(): Promise<void> {
|
||||
if (this.isRunning) {
|
||||
console.log('[TaskScheduler] Already running');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('[TaskScheduler] Starting database-driven scheduler...');
|
||||
this.isRunning = true;
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: On startup, recover stale tasks
|
||||
try {
|
||||
const recovered = await taskService.recoverStaleTasks(10);
|
||||
if (recovered > 0) {
|
||||
console.log(`[TaskScheduler] Recovered ${recovered} stale tasks from dead workers`);
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.error('[TaskScheduler] Failed to recover stale tasks:', err.message);
|
||||
}
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Ensure default schedules exist
|
||||
await this.ensureDefaultSchedules();
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Check immediately on startup
|
||||
await this.checkAndRunDueSchedules();
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Then poll every 60 seconds
|
||||
this.pollTimer = setInterval(async () => {
|
||||
await this.checkAndRunDueSchedules();
|
||||
}, POLL_INTERVAL_MS);
|
||||
|
||||
console.log('[TaskScheduler] Started - polling every 60s');
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the scheduler
|
||||
*/
|
||||
stop(): void {
|
||||
if (this.pollTimer) {
|
||||
clearInterval(this.pollTimer);
|
||||
this.pollTimer = null;
|
||||
}
|
||||
this.isRunning = false;
|
||||
console.log('[TaskScheduler] Stopped');
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure default schedules exist in the database
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: Creates schedules if they don't exist
|
||||
*/
|
||||
private async ensureDefaultSchedules(): Promise<void> {
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Default schedules for task generation
|
||||
// NOTE: payload_fetch replaces direct product_refresh - it chains to product_refresh
|
||||
const defaults = [
|
||||
{
|
||||
name: 'payload_fetch_all',
|
||||
role: 'payload_fetch' as TaskRole,
|
||||
interval_hours: 4,
|
||||
priority: 0,
|
||||
description: 'Fetch payloads from Dutchie API for all crawl-enabled stores every 4 hours. Chains to product_refresh.',
|
||||
},
|
||||
{
|
||||
name: 'store_discovery_dutchie',
|
||||
role: 'store_discovery' as TaskRole,
|
||||
interval_hours: 24,
|
||||
priority: 5,
|
||||
description: 'Discover new Dutchie stores daily',
|
||||
},
|
||||
{
|
||||
name: 'analytics_refresh',
|
||||
role: 'analytics_refresh' as TaskRole,
|
||||
interval_hours: 6,
|
||||
priority: 0,
|
||||
description: 'Refresh analytics materialized views every 6 hours',
|
||||
},
|
||||
];
|
||||
|
||||
for (const sched of defaults) {
|
||||
try {
|
||||
await pool.query(`
|
||||
INSERT INTO task_schedules (name, role, interval_hours, priority, description, enabled, next_run_at)
|
||||
VALUES ($1, $2, $3, $4, $5, true, NOW())
|
||||
ON CONFLICT (name) DO NOTHING
|
||||
`, [sched.name, sched.role, sched.interval_hours, sched.priority, sched.description]);
|
||||
} catch (err: any) {
|
||||
// Table may not exist yet - will be created by migration
|
||||
if (!err.message.includes('does not exist')) {
|
||||
console.error(`[TaskScheduler] Failed to create default schedule ${sched.name}:`, err.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for and run any due schedules
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: Uses SELECT FOR UPDATE SKIP LOCKED to prevent duplicates
|
||||
*/
|
||||
private async checkAndRunDueSchedules(): Promise<void> {
|
||||
const client = await pool.connect();
|
||||
|
||||
try {
|
||||
await client.query('BEGIN');
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Atomic claim of due schedules
|
||||
const result = await client.query<TaskSchedule>(`
|
||||
SELECT *
|
||||
FROM task_schedules
|
||||
WHERE enabled = true
|
||||
AND (next_run_at IS NULL OR next_run_at <= NOW())
|
||||
FOR UPDATE SKIP LOCKED
|
||||
`);
|
||||
|
||||
for (const schedule of result.rows) {
|
||||
console.log(`[TaskScheduler] Running schedule: ${schedule.name} (${schedule.role})`);
|
||||
|
||||
try {
|
||||
const tasksCreated = await this.executeSchedule(schedule);
|
||||
console.log(`[TaskScheduler] Schedule ${schedule.name} created ${tasksCreated} tasks`);
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Update last_run_at and calculate next_run_at
|
||||
await client.query(`
|
||||
UPDATE task_schedules
|
||||
SET
|
||||
last_run_at = NOW(),
|
||||
next_run_at = NOW() + ($1 || ' hours')::interval,
|
||||
last_task_count = $2,
|
||||
updated_at = NOW()
|
||||
WHERE id = $3
|
||||
`, [schedule.interval_hours, tasksCreated, schedule.id]);
|
||||
|
||||
} catch (err: any) {
|
||||
console.error(`[TaskScheduler] Schedule ${schedule.name} failed:`, err.message);
|
||||
|
||||
// Still update next_run_at to prevent infinite retry loop
|
||||
await client.query(`
|
||||
UPDATE task_schedules
|
||||
SET
|
||||
next_run_at = NOW() + ($1 || ' hours')::interval,
|
||||
last_error = $2,
|
||||
updated_at = NOW()
|
||||
WHERE id = $3
|
||||
`, [schedule.interval_hours, err.message, schedule.id]);
|
||||
}
|
||||
}
|
||||
|
||||
await client.query('COMMIT');
|
||||
} catch (err: any) {
|
||||
await client.query('ROLLBACK');
|
||||
console.error('[TaskScheduler] Failed to check schedules:', err.message);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a schedule and create tasks
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: Different logic per role
|
||||
*/
|
||||
private async executeSchedule(schedule: TaskSchedule): Promise<number> {
|
||||
switch (schedule.role) {
|
||||
case 'payload_fetch':
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: payload_fetch replaces direct product_refresh
|
||||
return this.generatePayloadFetchTasks(schedule);
|
||||
|
||||
case 'product_refresh':
|
||||
// Legacy - kept for manual triggers, but scheduled crawls use payload_fetch
|
||||
return this.generatePayloadFetchTasks(schedule);
|
||||
|
||||
case 'store_discovery':
|
||||
return this.generateStoreDiscoveryTasks(schedule);
|
||||
|
||||
case 'analytics_refresh':
|
||||
return this.generateAnalyticsRefreshTasks(schedule);
|
||||
|
||||
default:
|
||||
console.warn(`[TaskScheduler] Unknown role: ${schedule.role}`);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate payload_fetch tasks for stores that need crawling
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: payload_fetch hits API, saves to disk, chains to product_refresh
|
||||
*/
|
||||
private async generatePayloadFetchTasks(schedule: TaskSchedule): Promise<number> {
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Find stores needing refresh
|
||||
const result = await pool.query(`
|
||||
SELECT d.id
|
||||
FROM dispensaries d
|
||||
WHERE d.crawl_enabled = true
|
||||
AND d.platform_dispensary_id IS NOT NULL
|
||||
-- No pending/running payload_fetch or product_refresh task already
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM worker_tasks t
|
||||
WHERE t.dispensary_id = d.id
|
||||
AND t.role IN ('payload_fetch', 'product_refresh')
|
||||
AND t.status IN ('pending', 'claimed', 'running')
|
||||
)
|
||||
-- Never fetched OR last fetch > interval ago
|
||||
AND (
|
||||
d.last_fetch_at IS NULL
|
||||
OR d.last_fetch_at < NOW() - ($1 || ' hours')::interval
|
||||
)
|
||||
${schedule.state_code ? 'AND d.state_id = (SELECT id FROM states WHERE code = $2)' : ''}
|
||||
`, schedule.state_code ? [schedule.interval_hours, schedule.state_code] : [schedule.interval_hours]);
|
||||
|
||||
const dispensaryIds = result.rows.map((r: { id: number }) => r.id);
|
||||
|
||||
if (dispensaryIds.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Create payload_fetch tasks (they chain to product_refresh)
|
||||
const tasks = dispensaryIds.map((id: number) => ({
|
||||
role: 'payload_fetch' as TaskRole,
|
||||
dispensary_id: id,
|
||||
priority: schedule.priority,
|
||||
}));
|
||||
|
||||
return taskService.createTasks(tasks);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate store_discovery tasks
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: One task per platform
|
||||
*/
|
||||
private async generateStoreDiscoveryTasks(schedule: TaskSchedule): Promise<number> {
|
||||
// Check if discovery task already pending
|
||||
const existing = await taskService.listTasks({
|
||||
role: 'store_discovery',
|
||||
status: ['pending', 'claimed', 'running'],
|
||||
limit: 1,
|
||||
});
|
||||
|
||||
if (existing.length > 0) {
|
||||
console.log('[TaskScheduler] Store discovery task already pending, skipping');
|
||||
return 0;
|
||||
}
|
||||
|
||||
await taskService.createTask({
|
||||
role: 'store_discovery',
|
||||
platform: 'dutchie',
|
||||
priority: schedule.priority,
|
||||
});
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate analytics_refresh tasks
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: Single task to refresh all MVs
|
||||
*/
|
||||
private async generateAnalyticsRefreshTasks(schedule: TaskSchedule): Promise<number> {
|
||||
// Check if analytics task already pending
|
||||
const existing = await taskService.listTasks({
|
||||
role: 'analytics_refresh',
|
||||
status: ['pending', 'claimed', 'running'],
|
||||
limit: 1,
|
||||
});
|
||||
|
||||
if (existing.length > 0) {
|
||||
console.log('[TaskScheduler] Analytics refresh task already pending, skipping');
|
||||
return 0;
|
||||
}
|
||||
|
||||
await taskService.createTask({
|
||||
role: 'analytics_refresh',
|
||||
priority: schedule.priority,
|
||||
});
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all schedules for dashboard display
|
||||
*/
|
||||
async getSchedules(): Promise<TaskSchedule[]> {
|
||||
try {
|
||||
const result = await pool.query(`
|
||||
SELECT * FROM task_schedules ORDER BY name
|
||||
`);
|
||||
return result.rows as TaskSchedule[];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a schedule
|
||||
*/
|
||||
async updateSchedule(id: number, updates: Partial<TaskSchedule>): Promise<void> {
|
||||
const setClauses: string[] = [];
|
||||
const values: any[] = [];
|
||||
let paramIndex = 1;
|
||||
|
||||
if (updates.enabled !== undefined) {
|
||||
setClauses.push(`enabled = $${paramIndex++}`);
|
||||
values.push(updates.enabled);
|
||||
}
|
||||
if (updates.interval_hours !== undefined) {
|
||||
setClauses.push(`interval_hours = $${paramIndex++}`);
|
||||
values.push(updates.interval_hours);
|
||||
}
|
||||
if (updates.priority !== undefined) {
|
||||
setClauses.push(`priority = $${paramIndex++}`);
|
||||
values.push(updates.priority);
|
||||
}
|
||||
|
||||
if (setClauses.length === 0) return;
|
||||
|
||||
setClauses.push('updated_at = NOW()');
|
||||
values.push(id);
|
||||
|
||||
await pool.query(`
|
||||
UPDATE task_schedules
|
||||
SET ${setClauses.join(', ')}
|
||||
WHERE id = $${paramIndex}
|
||||
`, values);
|
||||
}
|
||||
|
||||
/**
|
||||
* Trigger a schedule to run immediately
|
||||
*/
|
||||
async triggerSchedule(id: number): Promise<number> {
|
||||
const result = await pool.query(`
|
||||
SELECT * FROM task_schedules WHERE id = $1
|
||||
`, [id]);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
throw new Error(`Schedule ${id} not found`);
|
||||
}
|
||||
|
||||
return this.executeSchedule(result.rows[0] as TaskSchedule);
|
||||
}
|
||||
}
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Singleton instance
|
||||
export const taskScheduler = new TaskScheduler();
|
||||
@@ -94,8 +94,7 @@ export async function handleEntryPointDiscovery(ctx: TaskContext): Promise<TaskR
|
||||
// ============================================================
|
||||
// STEP 3: Start stealth session
|
||||
// ============================================================
|
||||
// Per workflow-12102025.md: session identity comes from proxy location, not task params
|
||||
const session = startSession();
|
||||
const session = startSession(dispensary.state || 'AZ', 'America/Phoenix');
|
||||
console.log(`[EntryPointDiscovery] Session started: ${session.sessionId}`);
|
||||
|
||||
try {
|
||||
|
||||
@@ -9,4 +9,3 @@ export { handleProductDiscovery } from './product-discovery';
|
||||
export { handleStoreDiscovery } from './store-discovery';
|
||||
export { handleEntryPointDiscovery } from './entry-point-discovery';
|
||||
export { handleAnalyticsRefresh } from './analytics-refresh';
|
||||
export { handleProxyTest } from './proxy-test';
|
||||
|
||||
@@ -1,221 +0,0 @@
|
||||
/**
|
||||
* Payload Fetch Handler
|
||||
*
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing.
|
||||
*
|
||||
* This handler ONLY:
|
||||
* 1. Hits Dutchie GraphQL API
|
||||
* 2. Saves raw payload to filesystem (gzipped)
|
||||
* 3. Records metadata in raw_crawl_payloads table
|
||||
* 4. Queues a product_refresh task to process the payload
|
||||
*
|
||||
* Benefits of separation:
|
||||
* - Retry-friendly: If normalize fails, re-run refresh without re-crawling
|
||||
* - Faster refreshes: Local file read vs network call
|
||||
* - Replay-able: Run refresh against any historical payload
|
||||
* - Less API pressure: Only this role hits Dutchie
|
||||
*/
|
||||
|
||||
import { TaskContext, TaskResult } from '../task-worker';
|
||||
import {
|
||||
executeGraphQL,
|
||||
startSession,
|
||||
endSession,
|
||||
GRAPHQL_HASHES,
|
||||
DUTCHIE_CONFIG,
|
||||
} from '../../platforms/dutchie';
|
||||
import { saveRawPayload } from '../../utils/payload-storage';
|
||||
import { taskService } from '../task-service';
|
||||
|
||||
export async function handlePayloadFetch(ctx: TaskContext): Promise<TaskResult> {
|
||||
const { pool, task } = ctx;
|
||||
const dispensaryId = task.dispensary_id;
|
||||
|
||||
if (!dispensaryId) {
|
||||
return { success: false, error: 'No dispensary_id specified for payload_fetch task' };
|
||||
}
|
||||
|
||||
try {
|
||||
// ============================================================
|
||||
// STEP 1: Load dispensary info
|
||||
// ============================================================
|
||||
const dispResult = await pool.query(`
|
||||
SELECT
|
||||
id, name, platform_dispensary_id, menu_url, menu_type, city, state
|
||||
FROM dispensaries
|
||||
WHERE id = $1 AND crawl_enabled = true
|
||||
`, [dispensaryId]);
|
||||
|
||||
if (dispResult.rows.length === 0) {
|
||||
return { success: false, error: `Dispensary ${dispensaryId} not found or not crawl_enabled` };
|
||||
}
|
||||
|
||||
const dispensary = dispResult.rows[0];
|
||||
const platformId = dispensary.platform_dispensary_id;
|
||||
|
||||
if (!platformId) {
|
||||
return { success: false, error: `Dispensary ${dispensaryId} has no platform_dispensary_id` };
|
||||
}
|
||||
|
||||
// Extract cName from menu_url
|
||||
const cNameMatch = dispensary.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||
|
||||
console.log(`[PayloadFetch] Starting fetch for ${dispensary.name} (ID: ${dispensaryId})`);
|
||||
console.log(`[PayloadFetch] Platform ID: ${platformId}, cName: ${cName}`);
|
||||
|
||||
// ============================================================
|
||||
// STEP 2: Start stealth session
|
||||
// ============================================================
|
||||
const session = startSession();
|
||||
console.log(`[PayloadFetch] Session started: ${session.sessionId}`);
|
||||
|
||||
await ctx.heartbeat();
|
||||
|
||||
// ============================================================
|
||||
// STEP 3: Fetch products via GraphQL (Status: 'All')
|
||||
// ============================================================
|
||||
const allProducts: any[] = [];
|
||||
let page = 0;
|
||||
let totalCount = 0;
|
||||
const perPage = DUTCHIE_CONFIG.perPage;
|
||||
const maxPages = DUTCHIE_CONFIG.maxPages;
|
||||
|
||||
try {
|
||||
while (page < maxPages) {
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: platformId,
|
||||
pricingType: 'rec',
|
||||
Status: 'All',
|
||||
types: [],
|
||||
useCache: false,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page,
|
||||
perPage,
|
||||
};
|
||||
|
||||
console.log(`[PayloadFetch] Fetching page ${page + 1}...`);
|
||||
|
||||
const result = await executeGraphQL(
|
||||
'FilteredProducts',
|
||||
variables,
|
||||
GRAPHQL_HASHES.FilteredProducts,
|
||||
{ cName, maxRetries: 3 }
|
||||
);
|
||||
|
||||
const data = result?.data?.filteredProducts;
|
||||
if (!data || !data.products) {
|
||||
if (page === 0) {
|
||||
throw new Error('No product data returned from GraphQL');
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
const products = data.products;
|
||||
allProducts.push(...products);
|
||||
|
||||
if (page === 0) {
|
||||
totalCount = data.queryInfo?.totalCount || products.length;
|
||||
console.log(`[PayloadFetch] Total products reported: ${totalCount}`);
|
||||
}
|
||||
|
||||
if (allProducts.length >= totalCount || products.length < perPage) {
|
||||
break;
|
||||
}
|
||||
|
||||
page++;
|
||||
|
||||
if (page < maxPages) {
|
||||
await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs));
|
||||
}
|
||||
|
||||
if (page % 5 === 0) {
|
||||
await ctx.heartbeat();
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[PayloadFetch] Fetched ${allProducts.length} products in ${page + 1} pages`);
|
||||
|
||||
} finally {
|
||||
endSession();
|
||||
}
|
||||
|
||||
if (allProducts.length === 0) {
|
||||
return {
|
||||
success: false,
|
||||
error: 'No products returned from GraphQL',
|
||||
productsProcessed: 0,
|
||||
};
|
||||
}
|
||||
|
||||
await ctx.heartbeat();
|
||||
|
||||
// ============================================================
|
||||
// STEP 4: Save raw payload to filesystem
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Metadata/Payload separation
|
||||
// ============================================================
|
||||
const rawPayload = {
|
||||
dispensaryId,
|
||||
platformId,
|
||||
cName,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
productCount: allProducts.length,
|
||||
products: allProducts,
|
||||
};
|
||||
|
||||
const payloadResult = await saveRawPayload(
|
||||
pool,
|
||||
dispensaryId,
|
||||
rawPayload,
|
||||
null, // crawl_run_id - not using crawl_runs in new system
|
||||
allProducts.length
|
||||
);
|
||||
|
||||
console.log(`[PayloadFetch] Saved payload #${payloadResult.id} (${(payloadResult.sizeBytes / 1024).toFixed(1)}KB)`);
|
||||
|
||||
// ============================================================
|
||||
// STEP 5: Update dispensary last_fetch_at
|
||||
// ============================================================
|
||||
await pool.query(`
|
||||
UPDATE dispensaries
|
||||
SET last_fetch_at = NOW()
|
||||
WHERE id = $1
|
||||
`, [dispensaryId]);
|
||||
|
||||
// ============================================================
|
||||
// STEP 6: Queue product_refresh task to process the payload
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Task chaining
|
||||
// ============================================================
|
||||
await taskService.createTask({
|
||||
role: 'product_refresh',
|
||||
dispensary_id: dispensaryId,
|
||||
priority: task.priority || 0,
|
||||
payload: { payload_id: payloadResult.id },
|
||||
});
|
||||
|
||||
console.log(`[PayloadFetch] Queued product_refresh task for payload #${payloadResult.id}`);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
payloadId: payloadResult.id,
|
||||
productCount: allProducts.length,
|
||||
sizeBytes: payloadResult.sizeBytes,
|
||||
};
|
||||
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||
console.error(`[PayloadFetch] Error for dispensary ${dispensaryId}:`, errorMessage);
|
||||
return {
|
||||
success: false,
|
||||
error: errorMessage,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,31 +1,16 @@
|
||||
/**
|
||||
* Product Discovery Handler
|
||||
*
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: Initial product fetch for newly discovered stores.
|
||||
*
|
||||
* Flow:
|
||||
* 1. Triggered after store_discovery promotes a new dispensary
|
||||
* 2. Chains to payload_fetch to get initial product data
|
||||
* 3. payload_fetch chains to product_refresh for DB upsert
|
||||
*
|
||||
* Chaining:
|
||||
* store_discovery → (newStoreIds) → product_discovery → payload_fetch → product_refresh
|
||||
* Initial product fetch for stores that have 0 products.
|
||||
* Same logic as product_resync, but for initial discovery.
|
||||
*/
|
||||
|
||||
import { TaskContext, TaskResult } from '../task-worker';
|
||||
import { handlePayloadFetch } from './payload-fetch';
|
||||
import { handleProductRefresh } from './product-refresh';
|
||||
|
||||
export async function handleProductDiscovery(ctx: TaskContext): Promise<TaskResult> {
|
||||
const { task } = ctx;
|
||||
const dispensaryId = task.dispensary_id;
|
||||
|
||||
if (!dispensaryId) {
|
||||
return { success: false, error: 'No dispensary_id provided' };
|
||||
}
|
||||
|
||||
console.log(`[ProductDiscovery] Starting initial product discovery for dispensary ${dispensaryId}`);
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Chain to payload_fetch for API → disk
|
||||
// payload_fetch will then chain to product_refresh for disk → DB
|
||||
return handlePayloadFetch(ctx);
|
||||
// Product discovery is essentially the same as refresh for the first time
|
||||
// The main difference is in when this task is triggered (new store vs scheduled)
|
||||
console.log(`[ProductDiscovery] Starting initial product fetch for dispensary ${ctx.task.dispensary_id}`);
|
||||
return handleProductRefresh(ctx);
|
||||
}
|
||||
|
||||
@@ -1,32 +1,33 @@
|
||||
/**
|
||||
* Product Refresh Handler
|
||||
*
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: Processes a locally-stored payload.
|
||||
*
|
||||
* This handler reads from the filesystem (NOT the Dutchie API).
|
||||
* The payload_fetch handler is responsible for API calls.
|
||||
* Re-crawls a store to capture price/stock changes using the GraphQL pipeline.
|
||||
*
|
||||
* Flow:
|
||||
* 1. Load payload from filesystem (by payload_id or latest for dispensary)
|
||||
* 2. Normalize data via DutchieNormalizer
|
||||
* 3. Upsert to store_products and store_product_snapshots
|
||||
* 4. Track missing products (increment consecutive_misses, mark OOS at 3)
|
||||
* 5. Download new product images
|
||||
*
|
||||
* Benefits of separation:
|
||||
* - Retry-friendly: If this fails, re-run without re-crawling
|
||||
* - Replay-able: Run against any historical payload
|
||||
* - Faster: Local file read vs network call
|
||||
* 1. Load dispensary info from database
|
||||
* 2. Start stealth session (fingerprint + optional proxy)
|
||||
* 3. Fetch products via GraphQL (Status: 'All')
|
||||
* 4. Normalize data via DutchieNormalizer
|
||||
* 5. Upsert to store_products and store_product_snapshots
|
||||
* 6. Track missing products (increment consecutive_misses, mark OOS at 3)
|
||||
* 7. Download new product images
|
||||
* 8. End session
|
||||
*/
|
||||
|
||||
import { TaskContext, TaskResult } from '../task-worker';
|
||||
import {
|
||||
executeGraphQL,
|
||||
startSession,
|
||||
endSession,
|
||||
GRAPHQL_HASHES,
|
||||
DUTCHIE_CONFIG,
|
||||
} from '../../platforms/dutchie';
|
||||
import { DutchieNormalizer } from '../../hydration/normalizers/dutchie';
|
||||
import {
|
||||
upsertStoreProducts,
|
||||
createStoreProductSnapshots,
|
||||
downloadProductImages,
|
||||
} from '../../hydration/canonical-upsert';
|
||||
import { loadRawPayloadById, getLatestPayload } from '../../utils/payload-storage';
|
||||
|
||||
const normalizer = new DutchieNormalizer();
|
||||
|
||||
@@ -46,76 +47,129 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
||||
SELECT
|
||||
id, name, platform_dispensary_id, menu_url, menu_type, city, state
|
||||
FROM dispensaries
|
||||
WHERE id = $1
|
||||
WHERE id = $1 AND crawl_enabled = true
|
||||
`, [dispensaryId]);
|
||||
|
||||
if (dispResult.rows.length === 0) {
|
||||
return { success: false, error: `Dispensary ${dispensaryId} not found` };
|
||||
return { success: false, error: `Dispensary ${dispensaryId} not found or not crawl_enabled` };
|
||||
}
|
||||
|
||||
const dispensary = dispResult.rows[0];
|
||||
const platformId = dispensary.platform_dispensary_id;
|
||||
|
||||
// Extract cName from menu_url for image storage context
|
||||
if (!platformId) {
|
||||
return { success: false, error: `Dispensary ${dispensaryId} has no platform_dispensary_id` };
|
||||
}
|
||||
|
||||
// Extract cName from menu_url
|
||||
const cNameMatch = dispensary.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
|
||||
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
|
||||
|
||||
console.log(`[ProductRefresh] Starting refresh for ${dispensary.name} (ID: ${dispensaryId})`);
|
||||
console.log(`[ProductResync] Starting crawl for ${dispensary.name} (ID: ${dispensaryId})`);
|
||||
console.log(`[ProductResync] Platform ID: ${platformId}, cName: ${cName}`);
|
||||
|
||||
// ============================================================
|
||||
// STEP 2: Start stealth session
|
||||
// ============================================================
|
||||
const session = startSession(dispensary.state || 'AZ', 'America/Phoenix');
|
||||
console.log(`[ProductResync] Session started: ${session.sessionId}`);
|
||||
|
||||
await ctx.heartbeat();
|
||||
|
||||
// ============================================================
|
||||
// STEP 2: Load payload from filesystem
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Read local payload, not API
|
||||
// STEP 3: Fetch products via GraphQL (Status: 'All')
|
||||
// ============================================================
|
||||
let payloadData: any;
|
||||
let payloadId: number;
|
||||
const allProducts: any[] = [];
|
||||
let page = 0;
|
||||
let totalCount = 0;
|
||||
const perPage = DUTCHIE_CONFIG.perPage;
|
||||
const maxPages = DUTCHIE_CONFIG.maxPages;
|
||||
|
||||
// Check if specific payload_id was provided (from task chaining)
|
||||
const taskPayload = task.payload as { payload_id?: number } | null;
|
||||
try {
|
||||
while (page < maxPages) {
|
||||
const variables = {
|
||||
includeEnterpriseSpecials: false,
|
||||
productsFilter: {
|
||||
dispensaryId: platformId,
|
||||
pricingType: 'rec',
|
||||
Status: 'All',
|
||||
types: [],
|
||||
useCache: false,
|
||||
isDefaultSort: true,
|
||||
sortBy: 'popularSortIdx',
|
||||
sortDirection: 1,
|
||||
bypassOnlineThresholds: true,
|
||||
isKioskMenu: false,
|
||||
removeProductsBelowOptionThresholds: false,
|
||||
},
|
||||
page,
|
||||
perPage,
|
||||
};
|
||||
|
||||
if (taskPayload?.payload_id) {
|
||||
// Load specific payload (from payload_fetch chaining)
|
||||
const result = await loadRawPayloadById(pool, taskPayload.payload_id);
|
||||
if (!result) {
|
||||
return { success: false, error: `Payload ${taskPayload.payload_id} not found` };
|
||||
console.log(`[ProductResync] Fetching page ${page + 1}...`);
|
||||
|
||||
const result = await executeGraphQL(
|
||||
'FilteredProducts',
|
||||
variables,
|
||||
GRAPHQL_HASHES.FilteredProducts,
|
||||
{ cName, maxRetries: 3 }
|
||||
);
|
||||
|
||||
const data = result?.data?.filteredProducts;
|
||||
if (!data || !data.products) {
|
||||
if (page === 0) {
|
||||
throw new Error('No product data returned from GraphQL');
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
const products = data.products;
|
||||
allProducts.push(...products);
|
||||
|
||||
if (page === 0) {
|
||||
totalCount = data.queryInfo?.totalCount || products.length;
|
||||
console.log(`[ProductResync] Total products reported: ${totalCount}`);
|
||||
}
|
||||
|
||||
if (allProducts.length >= totalCount || products.length < perPage) {
|
||||
break;
|
||||
}
|
||||
|
||||
page++;
|
||||
|
||||
if (page < maxPages) {
|
||||
await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs));
|
||||
}
|
||||
|
||||
if (page % 5 === 0) {
|
||||
await ctx.heartbeat();
|
||||
}
|
||||
}
|
||||
payloadData = result.payload;
|
||||
payloadId = result.metadata.id;
|
||||
console.log(`[ProductRefresh] Loaded specific payload #${payloadId}`);
|
||||
} else {
|
||||
// Load latest payload for this dispensary
|
||||
const result = await getLatestPayload(pool, dispensaryId);
|
||||
if (!result) {
|
||||
return { success: false, error: `No payload found for dispensary ${dispensaryId}` };
|
||||
}
|
||||
payloadData = result.payload;
|
||||
payloadId = result.metadata.id;
|
||||
console.log(`[ProductRefresh] Loaded latest payload #${payloadId} (${result.metadata.fetchedAt})`);
|
||||
|
||||
console.log(`[ProductResync] Fetched ${allProducts.length} products in ${page + 1} pages`);
|
||||
|
||||
} finally {
|
||||
endSession();
|
||||
}
|
||||
|
||||
const allProducts = payloadData.products || [];
|
||||
|
||||
if (allProducts.length === 0) {
|
||||
return {
|
||||
success: false,
|
||||
error: 'Payload contains no products',
|
||||
payloadId,
|
||||
error: 'No products returned from GraphQL',
|
||||
productsProcessed: 0,
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`[ProductRefresh] Processing ${allProducts.length} products from payload #${payloadId}`);
|
||||
|
||||
await ctx.heartbeat();
|
||||
|
||||
// ============================================================
|
||||
// STEP 3: Normalize data
|
||||
// STEP 4: Normalize data
|
||||
// ============================================================
|
||||
console.log(`[ProductRefresh] Normalizing ${allProducts.length} products...`);
|
||||
console.log(`[ProductResync] Normalizing ${allProducts.length} products...`);
|
||||
|
||||
// Build RawPayload for the normalizer
|
||||
const rawPayload = {
|
||||
id: `refresh-${dispensaryId}-${Date.now()}`,
|
||||
id: `resync-${dispensaryId}-${Date.now()}`,
|
||||
dispensary_id: dispensaryId,
|
||||
crawl_run_id: null,
|
||||
platform: 'dutchie',
|
||||
@@ -135,26 +189,25 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
||||
const normalizationResult = normalizer.normalize(rawPayload);
|
||||
|
||||
if (normalizationResult.errors.length > 0) {
|
||||
console.warn(`[ProductRefresh] Normalization warnings: ${normalizationResult.errors.map(e => e.message).join(', ')}`);
|
||||
console.warn(`[ProductResync] Normalization warnings: ${normalizationResult.errors.map(e => e.message).join(', ')}`);
|
||||
}
|
||||
|
||||
if (normalizationResult.products.length === 0) {
|
||||
return {
|
||||
success: false,
|
||||
error: 'Normalization produced no products',
|
||||
payloadId,
|
||||
productsProcessed: 0,
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`[ProductRefresh] Normalized ${normalizationResult.products.length} products`);
|
||||
console.log(`[ProductResync] Normalized ${normalizationResult.products.length} products`);
|
||||
|
||||
await ctx.heartbeat();
|
||||
|
||||
// ============================================================
|
||||
// STEP 4: Upsert to canonical tables
|
||||
// STEP 5: Upsert to canonical tables
|
||||
// ============================================================
|
||||
console.log(`[ProductRefresh] Upserting to store_products...`);
|
||||
console.log(`[ProductResync] Upserting to store_products...`);
|
||||
|
||||
const upsertResult = await upsertStoreProducts(
|
||||
pool,
|
||||
@@ -163,12 +216,12 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
||||
normalizationResult.availability
|
||||
);
|
||||
|
||||
console.log(`[ProductRefresh] Upserted: ${upsertResult.upserted} (${upsertResult.new} new, ${upsertResult.updated} updated)`);
|
||||
console.log(`[ProductResync] Upserted: ${upsertResult.upserted} (${upsertResult.new} new, ${upsertResult.updated} updated)`);
|
||||
|
||||
await ctx.heartbeat();
|
||||
|
||||
// Create snapshots
|
||||
console.log(`[ProductRefresh] Creating snapshots...`);
|
||||
console.log(`[ProductResync] Creating snapshots...`);
|
||||
|
||||
const snapshotsResult = await createStoreProductSnapshots(
|
||||
pool,
|
||||
@@ -179,12 +232,12 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
||||
null // No crawl_run_id in new system
|
||||
);
|
||||
|
||||
console.log(`[ProductRefresh] Created ${snapshotsResult.created} snapshots`);
|
||||
console.log(`[ProductResync] Created ${snapshotsResult.created} snapshots`);
|
||||
|
||||
await ctx.heartbeat();
|
||||
|
||||
// ============================================================
|
||||
// STEP 5: Track missing products (consecutive_misses logic)
|
||||
// STEP 6: Track missing products (consecutive_misses logic)
|
||||
// - Products in feed: reset consecutive_misses to 0
|
||||
// - Products not in feed: increment consecutive_misses
|
||||
// - At 3 consecutive misses: mark as OOS
|
||||
@@ -217,7 +270,7 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
||||
|
||||
const incrementedCount = incrementResult.rowCount || 0;
|
||||
if (incrementedCount > 0) {
|
||||
console.log(`[ProductRefresh] Incremented consecutive_misses for ${incrementedCount} products`);
|
||||
console.log(`[ProductResync] Incremented consecutive_misses for ${incrementedCount} products`);
|
||||
}
|
||||
|
||||
// Mark as OOS any products that hit 3 consecutive misses
|
||||
@@ -233,16 +286,16 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
||||
|
||||
const markedOosCount = oosResult.rowCount || 0;
|
||||
if (markedOosCount > 0) {
|
||||
console.log(`[ProductRefresh] Marked ${markedOosCount} products as OOS (3+ consecutive misses)`);
|
||||
console.log(`[ProductResync] Marked ${markedOosCount} products as OOS (3+ consecutive misses)`);
|
||||
}
|
||||
|
||||
await ctx.heartbeat();
|
||||
|
||||
// ============================================================
|
||||
// STEP 6: Download images for new products
|
||||
// STEP 7: Download images for new products
|
||||
// ============================================================
|
||||
if (upsertResult.productsNeedingImages.length > 0) {
|
||||
console.log(`[ProductRefresh] Downloading images for ${upsertResult.productsNeedingImages.length} products...`);
|
||||
console.log(`[ProductResync] Downloading images for ${upsertResult.productsNeedingImages.length} products...`);
|
||||
|
||||
try {
|
||||
const dispensaryContext = {
|
||||
@@ -256,12 +309,12 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
||||
);
|
||||
} catch (imgError: any) {
|
||||
// Image download errors shouldn't fail the whole task
|
||||
console.warn(`[ProductRefresh] Image download error (non-fatal): ${imgError.message}`);
|
||||
console.warn(`[ProductResync] Image download error (non-fatal): ${imgError.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// STEP 7: Update dispensary last_crawl_at
|
||||
// STEP 8: Update dispensary last_crawl_at
|
||||
// ============================================================
|
||||
await pool.query(`
|
||||
UPDATE dispensaries
|
||||
@@ -269,20 +322,10 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
||||
WHERE id = $1
|
||||
`, [dispensaryId]);
|
||||
|
||||
// ============================================================
|
||||
// STEP 8: Mark payload as processed
|
||||
// ============================================================
|
||||
await pool.query(`
|
||||
UPDATE raw_crawl_payloads
|
||||
SET processed_at = NOW()
|
||||
WHERE id = $1
|
||||
`, [payloadId]);
|
||||
|
||||
console.log(`[ProductRefresh] Completed ${dispensary.name}`);
|
||||
console.log(`[ProductResync] Completed ${dispensary.name}`);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
payloadId,
|
||||
productsProcessed: normalizationResult.products.length,
|
||||
snapshotsCreated: snapshotsResult.created,
|
||||
newProducts: upsertResult.new,
|
||||
@@ -292,7 +335,7 @@ export async function handleProductRefresh(ctx: TaskContext): Promise<TaskResult
|
||||
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||
console.error(`[ProductRefresh] Error for dispensary ${dispensaryId}:`, errorMessage);
|
||||
console.error(`[ProductResync] Error for dispensary ${dispensaryId}:`, errorMessage);
|
||||
return {
|
||||
success: false,
|
||||
error: errorMessage,
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
/**
|
||||
* Proxy Test Handler
|
||||
* Tests proxy connectivity by fetching public IP via ipify
|
||||
*/
|
||||
|
||||
import { TaskContext, TaskResult } from '../task-worker';
|
||||
import { execSync } from 'child_process';
|
||||
|
||||
export async function handleProxyTest(ctx: TaskContext): Promise<TaskResult> {
|
||||
const { pool } = ctx;
|
||||
|
||||
console.log('[ProxyTest] Testing proxy connection...');
|
||||
|
||||
try {
|
||||
// Get active proxy from DB
|
||||
const proxyResult = await pool.query(`
|
||||
SELECT host, port, username, password
|
||||
FROM proxies
|
||||
WHERE is_active = true
|
||||
LIMIT 1
|
||||
`);
|
||||
|
||||
if (proxyResult.rows.length === 0) {
|
||||
return { success: false, error: 'No active proxy configured' };
|
||||
}
|
||||
|
||||
const p = proxyResult.rows[0];
|
||||
const proxyUrl = p.username
|
||||
? `http://${p.username}:${p.password}@${p.host}:${p.port}`
|
||||
: `http://${p.host}:${p.port}`;
|
||||
|
||||
console.log(`[ProxyTest] Using proxy: ${p.host}:${p.port}`);
|
||||
|
||||
// Fetch IP via proxy
|
||||
const cmd = `curl -s --proxy '${proxyUrl}' 'https://api.ipify.org?format=json'`;
|
||||
const output = execSync(cmd, { timeout: 30000 }).toString().trim();
|
||||
const data = JSON.parse(output);
|
||||
|
||||
console.log(`[ProxyTest] Proxy IP: ${data.ip}`);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
proxyIp: data.ip,
|
||||
proxyHost: p.host,
|
||||
proxyPort: p.port,
|
||||
};
|
||||
} catch (error: any) {
|
||||
console.error('[ProxyTest] Error:', error.message);
|
||||
return { success: false, error: error.message };
|
||||
}
|
||||
}
|
||||
@@ -1,16 +1,8 @@
|
||||
/**
|
||||
* Store Discovery Handler
|
||||
*
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: Discovers new stores and returns their IDs for task chaining.
|
||||
*
|
||||
* Flow:
|
||||
* 1. For each active state, run Dutchie discovery
|
||||
* 2. Discover locations via GraphQL
|
||||
* 3. Auto-promote valid locations to dispensaries table
|
||||
* 4. Return newStoreIds[] for chaining to payload_fetch
|
||||
*
|
||||
* Chaining:
|
||||
* store_discovery → (returns newStoreIds) → payload_fetch → product_refresh
|
||||
* Discovers new stores by crawling location APIs and adding them
|
||||
* to discovery_locations table.
|
||||
*/
|
||||
|
||||
import { TaskContext, TaskResult } from '../task-worker';
|
||||
@@ -18,25 +10,23 @@ import { discoverState } from '../../discovery';
|
||||
|
||||
export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult> {
|
||||
const { pool, task } = ctx;
|
||||
const platform = task.platform || 'dutchie';
|
||||
const platform = task.platform || 'default';
|
||||
|
||||
console.log(`[StoreDiscovery] Starting discovery for platform: ${platform}`);
|
||||
|
||||
try {
|
||||
// Get states to discover
|
||||
const statesResult = await pool.query(`
|
||||
SELECT code FROM states WHERE is_active = true ORDER BY code
|
||||
SELECT code FROM states WHERE active = true ORDER BY code
|
||||
`);
|
||||
const stateCodes = statesResult.rows.map(r => r.code);
|
||||
|
||||
if (stateCodes.length === 0) {
|
||||
return { success: true, storesDiscovered: 0, newStoreIds: [], message: 'No active states to discover' };
|
||||
return { success: true, storesDiscovered: 0, message: 'No active states to discover' };
|
||||
}
|
||||
|
||||
let totalDiscovered = 0;
|
||||
let totalPromoted = 0;
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Collect all new store IDs for task chaining
|
||||
const allNewStoreIds: number[] = [];
|
||||
|
||||
// Run discovery for each state
|
||||
for (const stateCode of stateCodes) {
|
||||
@@ -49,13 +39,6 @@ export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult
|
||||
const result = await discoverState(pool, stateCode);
|
||||
totalDiscovered += result.totalLocationsFound || 0;
|
||||
totalPromoted += result.totalLocationsUpserted || 0;
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Collect new IDs for chaining
|
||||
if (result.newDispensaryIds && result.newDispensaryIds.length > 0) {
|
||||
allNewStoreIds.push(...result.newDispensaryIds);
|
||||
console.log(`[StoreDiscovery] ${stateCode}: ${result.newDispensaryIds.length} new stores`);
|
||||
}
|
||||
|
||||
console.log(`[StoreDiscovery] ${stateCode}: found ${result.totalLocationsFound}, upserted ${result.totalLocationsUpserted}`);
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||
@@ -64,15 +47,13 @@ export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[StoreDiscovery] Complete: ${totalDiscovered} discovered, ${totalPromoted} promoted, ${allNewStoreIds.length} new stores`);
|
||||
console.log(`[StoreDiscovery] Complete: ${totalDiscovered} discovered, ${totalPromoted} promoted`);
|
||||
|
||||
return {
|
||||
success: true,
|
||||
storesDiscovered: totalDiscovered,
|
||||
storesPromoted: totalPromoted,
|
||||
statesProcessed: stateCodes.length,
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
|
||||
newStoreIds: allNewStoreIds,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||
@@ -80,7 +61,6 @@ export async function handleStoreDiscovery(ctx: TaskContext): Promise<TaskResult
|
||||
return {
|
||||
success: false,
|
||||
error: errorMessage,
|
||||
newStoreIds: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
/**
|
||||
* Task Pool State
|
||||
*
|
||||
* Shared state for task pool pause/resume functionality.
|
||||
* This is kept separate to avoid circular dependencies between
|
||||
* task-service.ts and routes/tasks.ts.
|
||||
*
|
||||
* State is in-memory and resets on server restart.
|
||||
* By default, the pool is PAUSED (closed) - admin must explicitly start it.
|
||||
* This prevents workers from immediately grabbing tasks on deploy before
|
||||
* the system is ready.
|
||||
*/
|
||||
|
||||
let taskPoolPaused = true;
|
||||
|
||||
export function isTaskPoolPaused(): boolean {
|
||||
return taskPoolPaused;
|
||||
}
|
||||
|
||||
export function pauseTaskPool(): void {
|
||||
taskPoolPaused = true;
|
||||
console.log('[TaskPool] Task pool PAUSED - workers will not pick up new tasks');
|
||||
}
|
||||
|
||||
export function resumeTaskPool(): void {
|
||||
taskPoolPaused = false;
|
||||
console.log('[TaskPool] Task pool RESUMED - workers can pick up tasks');
|
||||
}
|
||||
|
||||
export function getTaskPoolStatus(): { paused: boolean; message: string } {
|
||||
return {
|
||||
paused: taskPoolPaused,
|
||||
message: taskPoolPaused
|
||||
? 'Task pool is paused - workers will not pick up new tasks'
|
||||
: 'Task pool is open - workers are picking up tasks',
|
||||
};
|
||||
}
|
||||
@@ -9,30 +9,13 @@
|
||||
*/
|
||||
|
||||
import { pool } from '../db/pool';
|
||||
import { isTaskPoolPaused } from './task-pool-state';
|
||||
|
||||
// Helper to check if a table exists
|
||||
async function tableExists(tableName: string): Promise<boolean> {
|
||||
const result = await pool.query(`
|
||||
SELECT EXISTS (
|
||||
SELECT FROM information_schema.tables
|
||||
WHERE table_name = $1
|
||||
) as exists
|
||||
`, [tableName]);
|
||||
return result.rows[0].exists;
|
||||
}
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Task roles
|
||||
// payload_fetch: Hits Dutchie API, saves raw payload to filesystem
|
||||
// product_refresh: Reads local payload, normalizes, upserts to DB
|
||||
export type TaskRole =
|
||||
| 'store_discovery'
|
||||
| 'entry_point_discovery'
|
||||
| 'product_discovery'
|
||||
| 'payload_fetch' // NEW: Fetches from API, saves to disk
|
||||
| 'product_refresh' // CHANGED: Now reads from local payload
|
||||
| 'analytics_refresh'
|
||||
| 'proxy_test'; // Tests proxy connectivity via ipify
|
||||
| 'product_refresh'
|
||||
| 'analytics_refresh';
|
||||
|
||||
export type TaskStatus =
|
||||
| 'pending'
|
||||
@@ -61,7 +44,6 @@ export interface WorkerTask {
|
||||
error_message: string | null;
|
||||
retry_count: number;
|
||||
max_retries: number;
|
||||
payload: Record<string, unknown> | null; // Per TASK_WORKFLOW_2024-12-10.md: Task chaining data
|
||||
created_at: Date;
|
||||
updated_at: Date;
|
||||
}
|
||||
@@ -72,7 +54,6 @@ export interface CreateTaskParams {
|
||||
platform?: string;
|
||||
priority?: number;
|
||||
scheduled_for?: Date;
|
||||
payload?: Record<string, unknown>; // Per TASK_WORKFLOW_2024-12-10.md: For task chaining data
|
||||
}
|
||||
|
||||
export interface CapacityMetrics {
|
||||
@@ -104,8 +85,8 @@ class TaskService {
|
||||
*/
|
||||
async createTask(params: CreateTaskParams): Promise<WorkerTask> {
|
||||
const result = await pool.query(
|
||||
`INSERT INTO worker_tasks (role, dispensary_id, platform, priority, scheduled_for, payload)
|
||||
VALUES ($1, $2, $3, $4, $5, $6)
|
||||
`INSERT INTO worker_tasks (role, dispensary_id, platform, priority, scheduled_for)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
RETURNING *`,
|
||||
[
|
||||
params.role,
|
||||
@@ -113,7 +94,6 @@ class TaskService {
|
||||
params.platform ?? null,
|
||||
params.priority ?? 0,
|
||||
params.scheduled_for ?? null,
|
||||
params.payload ? JSON.stringify(params.payload) : null,
|
||||
]
|
||||
);
|
||||
return result.rows[0] as WorkerTask;
|
||||
@@ -151,14 +131,8 @@ class TaskService {
|
||||
/**
|
||||
* Claim a task atomically for a worker
|
||||
* If role is null, claims ANY available task (role-agnostic worker)
|
||||
* Returns null if task pool is paused.
|
||||
*/
|
||||
async claimTask(role: TaskRole | null, workerId: string): Promise<WorkerTask | null> {
|
||||
// Check if task pool is paused - don't claim any tasks
|
||||
if (isTaskPoolPaused()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (role) {
|
||||
// Role-specific claiming - use the SQL function
|
||||
const result = await pool.query(
|
||||
@@ -232,53 +206,15 @@ class TaskService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a task as failed, with auto-retry if under max_retries
|
||||
* Returns true if task was re-queued for retry, false if permanently failed
|
||||
* Mark a task as failed
|
||||
*/
|
||||
async failTask(taskId: number, errorMessage: string): Promise<boolean> {
|
||||
// Get current retry state
|
||||
const result = await pool.query(
|
||||
`SELECT retry_count, max_retries FROM worker_tasks WHERE id = $1`,
|
||||
[taskId]
|
||||
);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const { retry_count, max_retries } = result.rows[0];
|
||||
const newRetryCount = (retry_count || 0) + 1;
|
||||
|
||||
if (newRetryCount < (max_retries || 3)) {
|
||||
// Re-queue for retry - reset to pending with incremented retry_count
|
||||
await pool.query(
|
||||
`UPDATE worker_tasks
|
||||
SET status = 'pending',
|
||||
worker_id = NULL,
|
||||
claimed_at = NULL,
|
||||
started_at = NULL,
|
||||
retry_count = $2,
|
||||
error_message = $3,
|
||||
updated_at = NOW()
|
||||
WHERE id = $1`,
|
||||
[taskId, newRetryCount, `Retry ${newRetryCount}: ${errorMessage}`]
|
||||
);
|
||||
console.log(`[TaskService] Task ${taskId} queued for retry ${newRetryCount}/${max_retries || 3}`);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Max retries exceeded - mark as permanently failed
|
||||
async failTask(taskId: number, errorMessage: string): Promise<void> {
|
||||
await pool.query(
|
||||
`UPDATE worker_tasks
|
||||
SET status = 'failed',
|
||||
completed_at = NOW(),
|
||||
retry_count = $2,
|
||||
error_message = $3
|
||||
SET status = 'failed', completed_at = NOW(), error_message = $2
|
||||
WHERE id = $1`,
|
||||
[taskId, newRetryCount, `Failed after ${newRetryCount} attempts: ${errorMessage}`]
|
||||
[taskId, errorMessage]
|
||||
);
|
||||
console.log(`[TaskService] Task ${taskId} permanently failed after ${newRetryCount} attempts`);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -296,11 +232,6 @@ class TaskService {
|
||||
* List tasks with filters
|
||||
*/
|
||||
async listTasks(filter: TaskFilter = {}): Promise<WorkerTask[]> {
|
||||
// Return empty list if table doesn't exist
|
||||
if (!await tableExists('worker_tasks')) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const conditions: string[] = [];
|
||||
const params: (string | number | string[])[] = [];
|
||||
let paramIndex = 1;
|
||||
@@ -354,41 +285,21 @@ class TaskService {
|
||||
* Get capacity metrics for all roles
|
||||
*/
|
||||
async getCapacityMetrics(): Promise<CapacityMetrics[]> {
|
||||
// Return empty metrics if worker_tasks table doesn't exist
|
||||
if (!await tableExists('worker_tasks')) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await pool.query(
|
||||
`SELECT * FROM v_worker_capacity`
|
||||
);
|
||||
return result.rows as CapacityMetrics[];
|
||||
} catch {
|
||||
// View may not exist
|
||||
return [];
|
||||
}
|
||||
const result = await pool.query(
|
||||
`SELECT * FROM v_worker_capacity`
|
||||
);
|
||||
return result.rows as CapacityMetrics[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get capacity metrics for a specific role
|
||||
*/
|
||||
async getRoleCapacity(role: TaskRole): Promise<CapacityMetrics | null> {
|
||||
// Return null if worker_tasks table doesn't exist
|
||||
if (!await tableExists('worker_tasks')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await pool.query(
|
||||
`SELECT * FROM v_worker_capacity WHERE role = $1`,
|
||||
[role]
|
||||
);
|
||||
return (result.rows[0] as CapacityMetrics) || null;
|
||||
} catch {
|
||||
// View may not exist
|
||||
return null;
|
||||
}
|
||||
const result = await pool.query(
|
||||
`SELECT * FROM v_worker_capacity WHERE role = $1`,
|
||||
[role]
|
||||
);
|
||||
return (result.rows[0] as CapacityMetrics) || null;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -416,17 +327,6 @@ class TaskService {
|
||||
/**
|
||||
* Chain next task after completion
|
||||
* Called automatically when a task completes successfully
|
||||
*
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: Task chaining flow:
|
||||
*
|
||||
* Discovery flow (new stores):
|
||||
* store_discovery → product_discovery → payload_fetch → product_refresh
|
||||
*
|
||||
* Scheduled flow (existing stores):
|
||||
* payload_fetch → product_refresh
|
||||
*
|
||||
* Note: entry_point_discovery is deprecated since platform_dispensary_id
|
||||
* is now resolved during store promotion.
|
||||
*/
|
||||
async chainNextTask(completedTask: WorkerTask): Promise<WorkerTask | null> {
|
||||
if (completedTask.status !== 'completed') {
|
||||
@@ -435,14 +335,12 @@ class TaskService {
|
||||
|
||||
switch (completedTask.role) {
|
||||
case 'store_discovery': {
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: New stores discovered -> create product_discovery tasks
|
||||
// Skip entry_point_discovery since platform_dispensary_id is set during promotion
|
||||
// New stores discovered -> create entry_point_discovery tasks
|
||||
const newStoreIds = (completedTask.result as { newStoreIds?: number[] })?.newStoreIds;
|
||||
if (newStoreIds && newStoreIds.length > 0) {
|
||||
console.log(`[TaskService] Chaining ${newStoreIds.length} product_discovery tasks for new stores`);
|
||||
for (const storeId of newStoreIds) {
|
||||
await this.createTask({
|
||||
role: 'product_discovery',
|
||||
role: 'entry_point_discovery',
|
||||
dispensary_id: storeId,
|
||||
platform: completedTask.platform ?? undefined,
|
||||
priority: 10, // High priority for new stores
|
||||
@@ -453,8 +351,7 @@ class TaskService {
|
||||
}
|
||||
|
||||
case 'entry_point_discovery': {
|
||||
// DEPRECATED: Entry point resolution now happens during store promotion
|
||||
// Kept for backward compatibility with any in-flight tasks
|
||||
// Entry point resolved -> create product_discovery task
|
||||
const success = (completedTask.result as { success?: boolean })?.success;
|
||||
if (success && completedTask.dispensary_id) {
|
||||
return this.createTask({
|
||||
@@ -468,15 +365,8 @@ class TaskService {
|
||||
}
|
||||
|
||||
case 'product_discovery': {
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Product discovery chains internally to payload_fetch
|
||||
// No external chaining needed - handleProductDiscovery calls handlePayloadFetch directly
|
||||
break;
|
||||
}
|
||||
|
||||
case 'payload_fetch': {
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: payload_fetch chains to product_refresh
|
||||
// This is handled internally by the payload_fetch handler via taskService.createTask
|
||||
// No external chaining needed here
|
||||
// Product discovery done -> store is now ready for regular resync
|
||||
// No immediate chaining needed; will be picked up by daily batch generation
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -535,6 +425,12 @@ class TaskService {
|
||||
* Get task counts by status for dashboard
|
||||
*/
|
||||
async getTaskCounts(): Promise<Record<TaskStatus, number>> {
|
||||
const result = await pool.query(
|
||||
`SELECT status, COUNT(*) as count
|
||||
FROM worker_tasks
|
||||
GROUP BY status`
|
||||
);
|
||||
|
||||
const counts: Record<TaskStatus, number> = {
|
||||
pending: 0,
|
||||
claimed: 0,
|
||||
@@ -544,17 +440,6 @@ class TaskService {
|
||||
stale: 0,
|
||||
};
|
||||
|
||||
// Return empty counts if table doesn't exist
|
||||
if (!await tableExists('worker_tasks')) {
|
||||
return counts;
|
||||
}
|
||||
|
||||
const result = await pool.query(
|
||||
`SELECT status, COUNT(*) as count
|
||||
FROM worker_tasks
|
||||
GROUP BY status`
|
||||
);
|
||||
|
||||
for (const row of result.rows) {
|
||||
const typedRow = row as { status: TaskStatus; count: string };
|
||||
counts[typedRow.status] = parseInt(typedRow.count, 10);
|
||||
|
||||
@@ -52,60 +52,16 @@ import { CrawlRotator } from '../services/crawl-rotator';
|
||||
import { setCrawlRotator } from '../platforms/dutchie';
|
||||
|
||||
// Task handlers by role
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: payload_fetch and product_refresh are now separate
|
||||
import { handlePayloadFetch } from './handlers/payload-fetch';
|
||||
import { handleProductRefresh } from './handlers/product-refresh';
|
||||
import { handleProductDiscovery } from './handlers/product-discovery';
|
||||
import { handleStoreDiscovery } from './handlers/store-discovery';
|
||||
import { handleEntryPointDiscovery } from './handlers/entry-point-discovery';
|
||||
import { handleAnalyticsRefresh } from './handlers/analytics-refresh';
|
||||
import { handleProxyTest } from './handlers/proxy-test';
|
||||
|
||||
const POLL_INTERVAL_MS = parseInt(process.env.POLL_INTERVAL_MS || '5000');
|
||||
const HEARTBEAT_INTERVAL_MS = parseInt(process.env.HEARTBEAT_INTERVAL_MS || '30000');
|
||||
const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:3010';
|
||||
|
||||
// =============================================================================
|
||||
// CONCURRENT TASK PROCESSING SETTINGS
|
||||
// =============================================================================
|
||||
// Workers can process multiple tasks simultaneously using async I/O.
|
||||
// This improves throughput for I/O-bound tasks (network calls, DB queries).
|
||||
//
|
||||
// Resource thresholds trigger "backoff" - the worker stops claiming new tasks
|
||||
// but continues processing existing ones until resources return to normal.
|
||||
//
|
||||
// See: docs/WORKER_TASK_ARCHITECTURE.md#concurrent-task-processing
|
||||
// =============================================================================
|
||||
|
||||
// Maximum number of tasks this worker will run concurrently
|
||||
// Tune based on workload: I/O-bound tasks benefit from higher concurrency
|
||||
const MAX_CONCURRENT_TASKS = parseInt(process.env.MAX_CONCURRENT_TASKS || '3');
|
||||
|
||||
// When heap memory usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
|
||||
// Default 85% - gives headroom before OOM
|
||||
const MEMORY_BACKOFF_THRESHOLD = parseFloat(process.env.MEMORY_BACKOFF_THRESHOLD || '0.85');
|
||||
|
||||
// Parse max heap size from NODE_OPTIONS (--max-old-space-size=1500)
|
||||
// This is used as the denominator for memory percentage calculation
|
||||
// V8's heapTotal is dynamic and stays small when idle, causing false high percentages
|
||||
function getMaxHeapSizeMb(): number {
|
||||
const nodeOptions = process.env.NODE_OPTIONS || '';
|
||||
const match = nodeOptions.match(/--max-old-space-size=(\d+)/);
|
||||
if (match) {
|
||||
return parseInt(match[1], 10);
|
||||
}
|
||||
// Fallback: use 512MB if not specified
|
||||
return 512;
|
||||
}
|
||||
const MAX_HEAP_SIZE_MB = getMaxHeapSizeMb();
|
||||
|
||||
// When CPU usage exceeds this threshold (as decimal 0.0-1.0), stop claiming new tasks
|
||||
// Default 90% - allows some burst capacity
|
||||
const CPU_BACKOFF_THRESHOLD = parseFloat(process.env.CPU_BACKOFF_THRESHOLD || '0.90');
|
||||
|
||||
// How long to wait (ms) when in backoff state before rechecking resources
|
||||
const BACKOFF_DURATION_MS = parseInt(process.env.BACKOFF_DURATION_MS || '10000');
|
||||
|
||||
export interface TaskContext {
|
||||
pool: Pool;
|
||||
workerId: string;
|
||||
@@ -124,38 +80,14 @@ export interface TaskResult {
|
||||
|
||||
type TaskHandler = (ctx: TaskContext) => Promise<TaskResult>;
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Handler registry
|
||||
// payload_fetch: Fetches from Dutchie API, saves to disk, chains to product_refresh
|
||||
// product_refresh: Reads local payload, normalizes, upserts to DB
|
||||
const TASK_HANDLERS: Record<TaskRole, TaskHandler> = {
|
||||
payload_fetch: handlePayloadFetch, // NEW: API fetch -> disk
|
||||
product_refresh: handleProductRefresh, // CHANGED: disk -> DB
|
||||
product_refresh: handleProductRefresh,
|
||||
product_discovery: handleProductDiscovery,
|
||||
store_discovery: handleStoreDiscovery,
|
||||
entry_point_discovery: handleEntryPointDiscovery,
|
||||
analytics_refresh: handleAnalyticsRefresh,
|
||||
proxy_test: handleProxyTest, // Tests proxy via ipify
|
||||
};
|
||||
|
||||
/**
|
||||
* Resource usage stats reported to the registry and used for backoff decisions.
|
||||
* These values are included in worker heartbeats and displayed in the UI.
|
||||
*/
|
||||
interface ResourceStats {
|
||||
/** Current heap memory usage as decimal (0.0 to 1.0) */
|
||||
memoryPercent: number;
|
||||
/** Current heap used in MB */
|
||||
memoryMb: number;
|
||||
/** Total heap available in MB */
|
||||
memoryTotalMb: number;
|
||||
/** CPU usage percentage since last check (0 to 100) */
|
||||
cpuPercent: number;
|
||||
/** True if worker is currently in backoff state */
|
||||
isBackingOff: boolean;
|
||||
/** Reason for backoff (e.g., "Memory at 87.3% (threshold: 85%)") */
|
||||
backoffReason: string | null;
|
||||
}
|
||||
|
||||
export class TaskWorker {
|
||||
private pool: Pool;
|
||||
private workerId: string;
|
||||
@@ -164,190 +96,37 @@ export class TaskWorker {
|
||||
private isRunning: boolean = false;
|
||||
private heartbeatInterval: NodeJS.Timeout | null = null;
|
||||
private registryHeartbeatInterval: NodeJS.Timeout | null = null;
|
||||
private currentTask: WorkerTask | null = null;
|
||||
private crawlRotator: CrawlRotator;
|
||||
|
||||
// ==========================================================================
|
||||
// CONCURRENT TASK TRACKING
|
||||
// ==========================================================================
|
||||
// activeTasks: Map of task ID -> task object for all currently running tasks
|
||||
// taskPromises: Map of task ID -> Promise for cleanup when task completes
|
||||
// maxConcurrentTasks: How many tasks this worker will run in parallel
|
||||
// ==========================================================================
|
||||
private activeTasks: Map<number, WorkerTask> = new Map();
|
||||
private taskPromises: Map<number, Promise<void>> = new Map();
|
||||
private maxConcurrentTasks: number = MAX_CONCURRENT_TASKS;
|
||||
|
||||
// ==========================================================================
|
||||
// RESOURCE MONITORING FOR BACKOFF
|
||||
// ==========================================================================
|
||||
// CPU tracking uses differential measurement - we track last values and
|
||||
// calculate percentage based on elapsed time since last check.
|
||||
// ==========================================================================
|
||||
private lastCpuUsage: { user: number; system: number } = { user: 0, system: 0 };
|
||||
private lastCpuCheck: number = Date.now();
|
||||
private isBackingOff: boolean = false;
|
||||
private backoffReason: string | null = null;
|
||||
|
||||
constructor(role: TaskRole | null = null, workerId?: string) {
|
||||
this.pool = getPool();
|
||||
this.role = role;
|
||||
this.workerId = workerId || `worker-${uuidv4().slice(0, 8)}`;
|
||||
this.crawlRotator = new CrawlRotator(this.pool);
|
||||
|
||||
// Initialize CPU tracking
|
||||
const cpuUsage = process.cpuUsage();
|
||||
this.lastCpuUsage = { user: cpuUsage.user, system: cpuUsage.system };
|
||||
this.lastCpuCheck = Date.now();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current resource usage
|
||||
* Memory percentage is calculated against MAX_HEAP_SIZE_MB (from --max-old-space-size)
|
||||
* NOT against V8's dynamic heapTotal which stays small when idle
|
||||
*/
|
||||
private getResourceStats(): ResourceStats {
|
||||
const memUsage = process.memoryUsage();
|
||||
const heapUsedMb = memUsage.heapUsed / 1024 / 1024;
|
||||
// Use MAX_HEAP_SIZE_MB as ceiling, not dynamic heapTotal
|
||||
// V8's heapTotal stays small when idle (e.g., 36MB) causing false 95%+ readings
|
||||
// With --max-old-space-size=1500, we should calculate against 1500MB
|
||||
const memoryPercent = heapUsedMb / MAX_HEAP_SIZE_MB;
|
||||
|
||||
// Calculate CPU usage since last check
|
||||
const cpuUsage = process.cpuUsage();
|
||||
const now = Date.now();
|
||||
const elapsed = now - this.lastCpuCheck;
|
||||
|
||||
let cpuPercent = 0;
|
||||
if (elapsed > 0) {
|
||||
const userDiff = (cpuUsage.user - this.lastCpuUsage.user) / 1000; // microseconds to ms
|
||||
const systemDiff = (cpuUsage.system - this.lastCpuUsage.system) / 1000;
|
||||
cpuPercent = ((userDiff + systemDiff) / elapsed) * 100;
|
||||
}
|
||||
|
||||
// Update last values
|
||||
this.lastCpuUsage = { user: cpuUsage.user, system: cpuUsage.system };
|
||||
this.lastCpuCheck = now;
|
||||
|
||||
return {
|
||||
memoryPercent,
|
||||
memoryMb: Math.round(heapUsedMb),
|
||||
memoryTotalMb: MAX_HEAP_SIZE_MB, // Use max-old-space-size, not dynamic heapTotal
|
||||
cpuPercent: Math.min(100, cpuPercent), // Cap at 100%
|
||||
isBackingOff: this.isBackingOff,
|
||||
backoffReason: this.backoffReason,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we should back off from taking new tasks
|
||||
*/
|
||||
private shouldBackOff(): { backoff: boolean; reason: string | null } {
|
||||
const stats = this.getResourceStats();
|
||||
|
||||
if (stats.memoryPercent > MEMORY_BACKOFF_THRESHOLD) {
|
||||
return { backoff: true, reason: `Memory at ${(stats.memoryPercent * 100).toFixed(1)}% (threshold: ${MEMORY_BACKOFF_THRESHOLD * 100}%)` };
|
||||
}
|
||||
|
||||
if (stats.cpuPercent > CPU_BACKOFF_THRESHOLD * 100) {
|
||||
return { backoff: true, reason: `CPU at ${stats.cpuPercent.toFixed(1)}% (threshold: ${CPU_BACKOFF_THRESHOLD * 100}%)` };
|
||||
}
|
||||
|
||||
return { backoff: false, reason: null };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get count of currently running tasks
|
||||
*/
|
||||
get activeTaskCount(): number {
|
||||
return this.activeTasks.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we can accept more tasks
|
||||
*/
|
||||
private canAcceptMoreTasks(): boolean {
|
||||
return this.activeTasks.size < this.maxConcurrentTasks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize stealth systems (proxy rotation, fingerprints)
|
||||
* Called once on worker startup before processing any tasks.
|
||||
*
|
||||
* IMPORTANT: Proxies are REQUIRED. Workers will wait until proxies are available.
|
||||
* Workers listen for PostgreSQL NOTIFY 'proxy_added' to wake up immediately when proxies are added.
|
||||
* IMPORTANT: Proxies are REQUIRED. Workers will fail to start if no proxies available.
|
||||
*/
|
||||
private async initializeStealth(): Promise<void> {
|
||||
const MAX_WAIT_MINUTES = 60;
|
||||
const POLL_INTERVAL_MS = 30000; // 30 seconds fallback polling
|
||||
const maxAttempts = (MAX_WAIT_MINUTES * 60 * 1000) / POLL_INTERVAL_MS;
|
||||
let attempts = 0;
|
||||
let notifyClient: any = null;
|
||||
// Load proxies from database
|
||||
await this.crawlRotator.initialize();
|
||||
|
||||
// Set up PostgreSQL LISTEN for proxy notifications
|
||||
try {
|
||||
notifyClient = await this.pool.connect();
|
||||
await notifyClient.query('LISTEN proxy_added');
|
||||
console.log(`[TaskWorker] Listening for proxy_added notifications...`);
|
||||
} catch (err: any) {
|
||||
console.log(`[TaskWorker] Could not set up LISTEN (will poll): ${err.message}`);
|
||||
const stats = this.crawlRotator.proxy.getStats();
|
||||
if (stats.activeProxies === 0) {
|
||||
throw new Error('No active proxies available. Workers MUST use proxies for all requests. Add proxies to the database before starting workers.');
|
||||
}
|
||||
|
||||
// Create a promise that resolves when notified
|
||||
let notifyResolve: (() => void) | null = null;
|
||||
if (notifyClient) {
|
||||
notifyClient.on('notification', (msg: any) => {
|
||||
if (msg.channel === 'proxy_added') {
|
||||
console.log(`[TaskWorker] Received proxy_added notification!`);
|
||||
if (notifyResolve) notifyResolve();
|
||||
}
|
||||
});
|
||||
}
|
||||
console.log(`[TaskWorker] Loaded ${stats.activeProxies} proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
|
||||
|
||||
try {
|
||||
while (attempts < maxAttempts) {
|
||||
try {
|
||||
// Load proxies from database
|
||||
await this.crawlRotator.initialize();
|
||||
// Wire rotator to Dutchie client - proxies will be used for ALL requests
|
||||
setCrawlRotator(this.crawlRotator);
|
||||
|
||||
const stats = this.crawlRotator.proxy.getStats();
|
||||
if (stats.activeProxies > 0) {
|
||||
console.log(`[TaskWorker] Loaded ${stats.activeProxies} proxies (${stats.avgSuccessRate.toFixed(1)}% avg success rate)`);
|
||||
|
||||
// Wire rotator to Dutchie client - proxies will be used for ALL requests
|
||||
setCrawlRotator(this.crawlRotator);
|
||||
|
||||
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, proxy REQUIRED for all requests`);
|
||||
return;
|
||||
}
|
||||
|
||||
attempts++;
|
||||
console.log(`[TaskWorker] No active proxies available (attempt ${attempts}). Waiting for proxies...`);
|
||||
|
||||
// Wait for either notification or timeout
|
||||
await new Promise<void>((resolve) => {
|
||||
notifyResolve = resolve;
|
||||
setTimeout(resolve, POLL_INTERVAL_MS);
|
||||
});
|
||||
} catch (error: any) {
|
||||
attempts++;
|
||||
console.log(`[TaskWorker] Error loading proxies (attempt ${attempts}): ${error.message}. Retrying...`);
|
||||
await this.sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`No active proxies available after waiting ${MAX_WAIT_MINUTES} minutes. Add proxies to the database.`);
|
||||
} finally {
|
||||
// Clean up LISTEN connection
|
||||
if (notifyClient) {
|
||||
try {
|
||||
await notifyClient.query('UNLISTEN proxy_added');
|
||||
notifyClient.release();
|
||||
} catch {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log(`[TaskWorker] Stealth initialized: ${this.crawlRotator.userAgent.getCount()} fingerprints, proxy REQUIRED for all requests`);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -410,32 +189,21 @@ export class TaskWorker {
|
||||
const memUsage = process.memoryUsage();
|
||||
const cpuUsage = process.cpuUsage();
|
||||
const proxyLocation = this.crawlRotator.getProxyLocation();
|
||||
const resourceStats = this.getResourceStats();
|
||||
|
||||
// Get array of active task IDs
|
||||
const activeTaskIds = Array.from(this.activeTasks.keys());
|
||||
|
||||
await fetch(`${API_BASE_URL}/api/worker-registry/heartbeat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
worker_id: this.workerId,
|
||||
current_task_id: activeTaskIds[0] || null, // Primary task for backwards compat
|
||||
current_task_ids: activeTaskIds, // All active tasks
|
||||
active_task_count: this.activeTasks.size,
|
||||
max_concurrent_tasks: this.maxConcurrentTasks,
|
||||
status: this.activeTasks.size > 0 ? 'active' : 'idle',
|
||||
current_task_id: this.currentTask?.id || null,
|
||||
status: this.currentTask ? 'active' : 'idle',
|
||||
resources: {
|
||||
memory_mb: Math.round(memUsage.heapUsed / 1024 / 1024),
|
||||
memory_total_mb: Math.round(memUsage.heapTotal / 1024 / 1024),
|
||||
memory_rss_mb: Math.round(memUsage.rss / 1024 / 1024),
|
||||
memory_percent: Math.round(resourceStats.memoryPercent * 100),
|
||||
cpu_user_ms: Math.round(cpuUsage.user / 1000),
|
||||
cpu_system_ms: Math.round(cpuUsage.system / 1000),
|
||||
cpu_percent: Math.round(resourceStats.cpuPercent),
|
||||
proxy_location: proxyLocation,
|
||||
is_backing_off: this.isBackingOff,
|
||||
backoff_reason: this.backoffReason,
|
||||
}
|
||||
})
|
||||
});
|
||||
@@ -497,85 +265,20 @@ export class TaskWorker {
|
||||
this.startRegistryHeartbeat();
|
||||
|
||||
const roleMsg = this.role ? `for role: ${this.role}` : '(role-agnostic - any task)';
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg} (max ${this.maxConcurrentTasks} concurrent tasks)`);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting ${roleMsg}`);
|
||||
|
||||
while (this.isRunning) {
|
||||
try {
|
||||
await this.mainLoop();
|
||||
await this.processNextTask();
|
||||
} catch (error: any) {
|
||||
console.error(`[TaskWorker] Loop error:`, error.message);
|
||||
await this.sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for any remaining tasks to complete
|
||||
if (this.taskPromises.size > 0) {
|
||||
console.log(`[TaskWorker] Waiting for ${this.taskPromises.size} active tasks to complete...`);
|
||||
await Promise.allSettled(this.taskPromises.values());
|
||||
}
|
||||
|
||||
console.log(`[TaskWorker] Worker ${this.workerId} stopped`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Main loop - tries to fill up to maxConcurrentTasks
|
||||
*/
|
||||
private async mainLoop(): Promise<void> {
|
||||
// Check resource usage and backoff if needed
|
||||
const { backoff, reason } = this.shouldBackOff();
|
||||
if (backoff) {
|
||||
if (!this.isBackingOff) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} backing off: ${reason}`);
|
||||
}
|
||||
this.isBackingOff = true;
|
||||
this.backoffReason = reason;
|
||||
await this.sleep(BACKOFF_DURATION_MS);
|
||||
return;
|
||||
}
|
||||
|
||||
// Clear backoff state
|
||||
if (this.isBackingOff) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} resuming normal operation`);
|
||||
this.isBackingOff = false;
|
||||
this.backoffReason = null;
|
||||
}
|
||||
|
||||
// Check for decommission signal
|
||||
const shouldDecommission = await this.checkDecommission();
|
||||
if (shouldDecommission) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} received decommission signal - waiting for ${this.activeTasks.size} tasks to complete`);
|
||||
// Stop accepting new tasks, wait for current to finish
|
||||
this.isRunning = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Try to claim more tasks if we have capacity
|
||||
if (this.canAcceptMoreTasks()) {
|
||||
const task = await taskService.claimTask(this.role, this.workerId);
|
||||
|
||||
if (task) {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} claimed task ${task.id} (${task.role}) [${this.activeTasks.size + 1}/${this.maxConcurrentTasks}]`);
|
||||
this.activeTasks.set(task.id, task);
|
||||
|
||||
// Start task in background (don't await)
|
||||
const taskPromise = this.executeTask(task);
|
||||
this.taskPromises.set(task.id, taskPromise);
|
||||
|
||||
// Clean up when done
|
||||
taskPromise.finally(() => {
|
||||
this.activeTasks.delete(task.id);
|
||||
this.taskPromises.delete(task.id);
|
||||
});
|
||||
|
||||
// Immediately try to claim more tasks (don't wait for poll interval)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// No task claimed or at capacity - wait before next poll
|
||||
await this.sleep(POLL_INTERVAL_MS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the worker
|
||||
*/
|
||||
@@ -588,10 +291,23 @@ export class TaskWorker {
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a single task (runs concurrently with other tasks)
|
||||
* Process the next available task
|
||||
*/
|
||||
private async executeTask(task: WorkerTask): Promise<void> {
|
||||
console.log(`[TaskWorker] ${this.friendlyName} starting task ${task.id} (${task.role}) for dispensary ${task.dispensary_id || 'N/A'}`);
|
||||
private async processNextTask(): Promise<void> {
|
||||
// Try to claim a task
|
||||
const task = await taskService.claimTask(this.role, this.workerId);
|
||||
|
||||
if (!task) {
|
||||
// No tasks available, wait and retry
|
||||
await this.sleep(POLL_INTERVAL_MS);
|
||||
return;
|
||||
}
|
||||
|
||||
this.currentTask = task;
|
||||
console.log(`[TaskWorker] Claimed task ${task.id} (${task.role}) for dispensary ${task.dispensary_id || 'N/A'}`);
|
||||
|
||||
// Start heartbeat
|
||||
this.startHeartbeat(task.id);
|
||||
|
||||
try {
|
||||
// Mark as running
|
||||
@@ -620,7 +336,7 @@ export class TaskWorker {
|
||||
// Mark as completed
|
||||
await taskService.completeTask(task.id, result);
|
||||
await this.reportTaskCompletion(true);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} completed task ${task.id} [${this.activeTasks.size}/${this.maxConcurrentTasks} active]`);
|
||||
console.log(`[TaskWorker] ${this.friendlyName} completed task ${task.id}`);
|
||||
|
||||
// Chain next task if applicable
|
||||
const chainedTask = await taskService.chainNextTask({
|
||||
@@ -642,35 +358,9 @@ export class TaskWorker {
|
||||
await taskService.failTask(task.id, error.message);
|
||||
await this.reportTaskCompletion(false);
|
||||
console.error(`[TaskWorker] ${this.friendlyName} task ${task.id} error:`, error.message);
|
||||
}
|
||||
// Note: cleanup (removing from activeTasks) is handled in mainLoop's finally block
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if this worker has been flagged for decommission
|
||||
* Returns true if worker should stop after current task
|
||||
*/
|
||||
private async checkDecommission(): Promise<boolean> {
|
||||
try {
|
||||
// Check worker_registry for decommission flag
|
||||
const result = await this.pool.query(
|
||||
`SELECT decommission_requested, decommission_reason
|
||||
FROM worker_registry
|
||||
WHERE worker_id = $1`,
|
||||
[this.workerId]
|
||||
);
|
||||
|
||||
if (result.rows.length > 0 && result.rows[0].decommission_requested) {
|
||||
const reason = result.rows[0].decommission_reason || 'No reason provided';
|
||||
console.log(`[TaskWorker] Decommission requested: ${reason}`);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error: any) {
|
||||
// If we can't check, continue running
|
||||
console.warn(`[TaskWorker] Could not check decommission status: ${error.message}`);
|
||||
return false;
|
||||
} finally {
|
||||
this.stopHeartbeat();
|
||||
this.currentTask = null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -707,25 +397,12 @@ export class TaskWorker {
|
||||
/**
|
||||
* Get worker info
|
||||
*/
|
||||
getInfo(): {
|
||||
workerId: string;
|
||||
role: TaskRole | null;
|
||||
isRunning: boolean;
|
||||
activeTaskIds: number[];
|
||||
activeTaskCount: number;
|
||||
maxConcurrentTasks: number;
|
||||
isBackingOff: boolean;
|
||||
backoffReason: string | null;
|
||||
} {
|
||||
getInfo(): { workerId: string; role: TaskRole | null; isRunning: boolean; currentTaskId: number | null } {
|
||||
return {
|
||||
workerId: this.workerId,
|
||||
role: this.role,
|
||||
isRunning: this.isRunning,
|
||||
activeTaskIds: Array.from(this.activeTasks.keys()),
|
||||
activeTaskCount: this.activeTasks.size,
|
||||
maxConcurrentTasks: this.maxConcurrentTasks,
|
||||
isBackingOff: this.isBackingOff,
|
||||
backoffReason: this.backoffReason,
|
||||
currentTaskId: this.currentTask?.id || null,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -737,13 +414,11 @@ export class TaskWorker {
|
||||
async function main(): Promise<void> {
|
||||
const role = process.env.WORKER_ROLE as TaskRole | undefined;
|
||||
|
||||
// Per TASK_WORKFLOW_2024-12-10.md: Valid task roles
|
||||
const validRoles: TaskRole[] = [
|
||||
'store_discovery',
|
||||
'entry_point_discovery',
|
||||
'product_discovery',
|
||||
'payload_fetch', // NEW: Fetches from API, saves to disk
|
||||
'product_refresh', // CHANGED: Reads from disk, processes to DB
|
||||
'product_refresh',
|
||||
'analytics_refresh',
|
||||
];
|
||||
|
||||
|
||||
49
backend/src/types/user-agents.d.ts
vendored
49
backend/src/types/user-agents.d.ts
vendored
@@ -1,49 +0,0 @@
|
||||
/**
|
||||
* Type declarations for user-agents npm package
|
||||
* Per workflow-12102025.md: Used for realistic UA generation with market-share weighting
|
||||
*/
|
||||
|
||||
declare module 'user-agents' {
|
||||
interface UserAgentData {
|
||||
userAgent: string;
|
||||
platform: string;
|
||||
screenWidth: number;
|
||||
screenHeight: number;
|
||||
viewportWidth: number;
|
||||
viewportHeight: number;
|
||||
deviceCategory: 'desktop' | 'mobile' | 'tablet';
|
||||
appName: string;
|
||||
connection?: {
|
||||
downlink: number;
|
||||
effectiveType: string;
|
||||
rtt: number;
|
||||
};
|
||||
}
|
||||
|
||||
interface UserAgentOptions {
|
||||
deviceCategory?: 'desktop' | 'mobile' | 'tablet';
|
||||
platform?: RegExp | string;
|
||||
screenWidth?: RegExp | { min?: number; max?: number };
|
||||
screenHeight?: RegExp | { min?: number; max?: number };
|
||||
}
|
||||
|
||||
interface UserAgentInstance {
|
||||
data: UserAgentData;
|
||||
toString(): string;
|
||||
random(): UserAgentInstance;
|
||||
}
|
||||
|
||||
class UserAgent {
|
||||
constructor(options?: UserAgentOptions | UserAgentOptions[]);
|
||||
data: UserAgentData;
|
||||
toString(): string;
|
||||
random(): UserAgentInstance;
|
||||
}
|
||||
|
||||
// Make it callable
|
||||
interface UserAgent {
|
||||
(): UserAgentInstance;
|
||||
}
|
||||
|
||||
export default UserAgent;
|
||||
}
|
||||
@@ -1,406 +0,0 @@
|
||||
/**
|
||||
* Payload Storage Utility
|
||||
*
|
||||
* Per TASK_WORKFLOW_2024-12-10.md: Store raw GraphQL payloads for historical analysis.
|
||||
*
|
||||
* Design Pattern: Metadata/Payload Separation
|
||||
* - Metadata in PostgreSQL (raw_crawl_payloads table): Small, indexed, queryable
|
||||
* - Payload on filesystem: Gzipped JSON at storage_path
|
||||
*
|
||||
* Storage structure:
|
||||
* /storage/payloads/{year}/{month}/{day}/store_{dispensary_id}_{timestamp}.json.gz
|
||||
*
|
||||
* Benefits:
|
||||
* - Compare any two crawls to see what changed
|
||||
* - Replay/re-normalize historical data if logic changes
|
||||
* - Debug issues by seeing exactly what the API returned
|
||||
* - DB stays small, backups stay fast
|
||||
* - ~90% compression (1.5MB -> 150KB per crawl)
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as zlib from 'zlib';
|
||||
import { promisify } from 'util';
|
||||
import { Pool } from 'pg';
|
||||
import * as crypto from 'crypto';
|
||||
|
||||
const gzip = promisify(zlib.gzip);
|
||||
const gunzip = promisify(zlib.gunzip);
|
||||
|
||||
// Base path for payload storage (matches image storage pattern)
|
||||
const PAYLOAD_BASE_PATH = process.env.PAYLOAD_STORAGE_PATH || './storage/payloads';
|
||||
|
||||
/**
|
||||
* Result from saving a payload
|
||||
*/
|
||||
export interface SavePayloadResult {
|
||||
id: number;
|
||||
storagePath: string;
|
||||
sizeBytes: number;
|
||||
sizeBytesRaw: number;
|
||||
checksum: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result from loading a payload
|
||||
*/
|
||||
export interface LoadPayloadResult {
|
||||
payload: any;
|
||||
metadata: {
|
||||
id: number;
|
||||
dispensaryId: number;
|
||||
crawlRunId: number | null;
|
||||
productCount: number;
|
||||
fetchedAt: Date;
|
||||
storagePath: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate storage path for a payload
|
||||
*
|
||||
* Format: /storage/payloads/{year}/{month}/{day}/store_{dispensary_id}_{timestamp}.json.gz
|
||||
*/
|
||||
function generateStoragePath(dispensaryId: number, timestamp: Date): string {
|
||||
const year = timestamp.getFullYear();
|
||||
const month = String(timestamp.getMonth() + 1).padStart(2, '0');
|
||||
const day = String(timestamp.getDate()).padStart(2, '0');
|
||||
const ts = timestamp.getTime();
|
||||
|
||||
return path.join(
|
||||
PAYLOAD_BASE_PATH,
|
||||
String(year),
|
||||
month,
|
||||
day,
|
||||
`store_${dispensaryId}_${ts}.json.gz`
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure directory exists for a file path
|
||||
*/
|
||||
async function ensureDir(filePath: string): Promise<void> {
|
||||
const dir = path.dirname(filePath);
|
||||
await fs.promises.mkdir(dir, { recursive: true });
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate SHA256 checksum of data
|
||||
*/
|
||||
function calculateChecksum(data: Buffer): string {
|
||||
return crypto.createHash('sha256').update(data).digest('hex');
|
||||
}
|
||||
|
||||
/**
|
||||
* Save a raw crawl payload to filesystem and record metadata in DB
|
||||
*
|
||||
* @param pool - Database connection pool
|
||||
* @param dispensaryId - ID of the dispensary
|
||||
* @param payload - Raw JSON payload from GraphQL
|
||||
* @param crawlRunId - Optional crawl_run ID for linking
|
||||
* @param productCount - Number of products in payload
|
||||
* @returns SavePayloadResult with file info and DB record ID
|
||||
*/
|
||||
export async function saveRawPayload(
|
||||
pool: Pool,
|
||||
dispensaryId: number,
|
||||
payload: any,
|
||||
crawlRunId: number | null = null,
|
||||
productCount: number = 0
|
||||
): Promise<SavePayloadResult> {
|
||||
const timestamp = new Date();
|
||||
const storagePath = generateStoragePath(dispensaryId, timestamp);
|
||||
|
||||
// Serialize and compress
|
||||
const jsonStr = JSON.stringify(payload);
|
||||
const rawSize = Buffer.byteLength(jsonStr, 'utf8');
|
||||
const compressed = await gzip(Buffer.from(jsonStr, 'utf8'));
|
||||
const compressedSize = compressed.length;
|
||||
const checksum = calculateChecksum(compressed);
|
||||
|
||||
// Write to filesystem
|
||||
await ensureDir(storagePath);
|
||||
await fs.promises.writeFile(storagePath, compressed);
|
||||
|
||||
// Record metadata in DB
|
||||
const result = await pool.query(`
|
||||
INSERT INTO raw_crawl_payloads (
|
||||
crawl_run_id,
|
||||
dispensary_id,
|
||||
storage_path,
|
||||
product_count,
|
||||
size_bytes,
|
||||
size_bytes_raw,
|
||||
fetched_at,
|
||||
checksum_sha256
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||
RETURNING id
|
||||
`, [
|
||||
crawlRunId,
|
||||
dispensaryId,
|
||||
storagePath,
|
||||
productCount,
|
||||
compressedSize,
|
||||
rawSize,
|
||||
timestamp,
|
||||
checksum
|
||||
]);
|
||||
|
||||
console.log(`[PayloadStorage] Saved payload for store ${dispensaryId}: ${storagePath} (${(compressedSize / 1024).toFixed(1)}KB compressed, ${(rawSize / 1024).toFixed(1)}KB raw)`);
|
||||
|
||||
return {
|
||||
id: result.rows[0].id,
|
||||
storagePath,
|
||||
sizeBytes: compressedSize,
|
||||
sizeBytesRaw: rawSize,
|
||||
checksum
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a raw payload from filesystem by metadata ID
|
||||
*
|
||||
* @param pool - Database connection pool
|
||||
* @param payloadId - ID from raw_crawl_payloads table
|
||||
* @returns LoadPayloadResult with parsed payload and metadata
|
||||
*/
|
||||
export async function loadRawPayloadById(
|
||||
pool: Pool,
|
||||
payloadId: number
|
||||
): Promise<LoadPayloadResult | null> {
|
||||
const result = await pool.query(`
|
||||
SELECT id, dispensary_id, crawl_run_id, storage_path, product_count, fetched_at
|
||||
FROM raw_crawl_payloads
|
||||
WHERE id = $1
|
||||
`, [payloadId]);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const row = result.rows[0];
|
||||
const payload = await loadPayloadFromPath(row.storage_path);
|
||||
|
||||
return {
|
||||
payload,
|
||||
metadata: {
|
||||
id: row.id,
|
||||
dispensaryId: row.dispensary_id,
|
||||
crawlRunId: row.crawl_run_id,
|
||||
productCount: row.product_count,
|
||||
fetchedAt: row.fetched_at,
|
||||
storagePath: row.storage_path
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a raw payload directly from filesystem path
|
||||
*
|
||||
* @param storagePath - Path to gzipped JSON file
|
||||
* @returns Parsed JSON payload
|
||||
*/
|
||||
export async function loadPayloadFromPath(storagePath: string): Promise<any> {
|
||||
const compressed = await fs.promises.readFile(storagePath);
|
||||
const decompressed = await gunzip(compressed);
|
||||
return JSON.parse(decompressed.toString('utf8'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the latest payload for a dispensary
|
||||
*
|
||||
* @param pool - Database connection pool
|
||||
* @param dispensaryId - ID of the dispensary
|
||||
* @returns LoadPayloadResult or null if none exists
|
||||
*/
|
||||
export async function getLatestPayload(
|
||||
pool: Pool,
|
||||
dispensaryId: number
|
||||
): Promise<LoadPayloadResult | null> {
|
||||
const result = await pool.query(`
|
||||
SELECT id, dispensary_id, crawl_run_id, storage_path, product_count, fetched_at
|
||||
FROM raw_crawl_payloads
|
||||
WHERE dispensary_id = $1
|
||||
ORDER BY fetched_at DESC
|
||||
LIMIT 1
|
||||
`, [dispensaryId]);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const row = result.rows[0];
|
||||
const payload = await loadPayloadFromPath(row.storage_path);
|
||||
|
||||
return {
|
||||
payload,
|
||||
metadata: {
|
||||
id: row.id,
|
||||
dispensaryId: row.dispensary_id,
|
||||
crawlRunId: row.crawl_run_id,
|
||||
productCount: row.product_count,
|
||||
fetchedAt: row.fetched_at,
|
||||
storagePath: row.storage_path
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get two payloads for comparison (latest and previous, or by IDs)
|
||||
*
|
||||
* @param pool - Database connection pool
|
||||
* @param dispensaryId - ID of the dispensary
|
||||
* @param limit - Number of recent payloads to retrieve (default 2)
|
||||
* @returns Array of LoadPayloadResult, most recent first
|
||||
*/
|
||||
export async function getRecentPayloads(
|
||||
pool: Pool,
|
||||
dispensaryId: number,
|
||||
limit: number = 2
|
||||
): Promise<LoadPayloadResult[]> {
|
||||
const result = await pool.query(`
|
||||
SELECT id, dispensary_id, crawl_run_id, storage_path, product_count, fetched_at
|
||||
FROM raw_crawl_payloads
|
||||
WHERE dispensary_id = $1
|
||||
ORDER BY fetched_at DESC
|
||||
LIMIT $2
|
||||
`, [dispensaryId, limit]);
|
||||
|
||||
const payloads: LoadPayloadResult[] = [];
|
||||
|
||||
for (const row of result.rows) {
|
||||
const payload = await loadPayloadFromPath(row.storage_path);
|
||||
payloads.push({
|
||||
payload,
|
||||
metadata: {
|
||||
id: row.id,
|
||||
dispensaryId: row.dispensary_id,
|
||||
crawlRunId: row.crawl_run_id,
|
||||
productCount: row.product_count,
|
||||
fetchedAt: row.fetched_at,
|
||||
storagePath: row.storage_path
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return payloads;
|
||||
}
|
||||
|
||||
/**
|
||||
* List payload metadata without loading files (for browsing/pagination)
|
||||
*
|
||||
* @param pool - Database connection pool
|
||||
* @param options - Query options
|
||||
* @returns Array of metadata rows
|
||||
*/
|
||||
export async function listPayloadMetadata(
|
||||
pool: Pool,
|
||||
options: {
|
||||
dispensaryId?: number;
|
||||
startDate?: Date;
|
||||
endDate?: Date;
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
} = {}
|
||||
): Promise<Array<{
|
||||
id: number;
|
||||
dispensaryId: number;
|
||||
crawlRunId: number | null;
|
||||
storagePath: string;
|
||||
productCount: number;
|
||||
sizeBytes: number;
|
||||
sizeBytesRaw: number;
|
||||
fetchedAt: Date;
|
||||
}>> {
|
||||
const conditions: string[] = [];
|
||||
const params: any[] = [];
|
||||
let paramIndex = 1;
|
||||
|
||||
if (options.dispensaryId) {
|
||||
conditions.push(`dispensary_id = $${paramIndex++}`);
|
||||
params.push(options.dispensaryId);
|
||||
}
|
||||
|
||||
if (options.startDate) {
|
||||
conditions.push(`fetched_at >= $${paramIndex++}`);
|
||||
params.push(options.startDate);
|
||||
}
|
||||
|
||||
if (options.endDate) {
|
||||
conditions.push(`fetched_at <= $${paramIndex++}`);
|
||||
params.push(options.endDate);
|
||||
}
|
||||
|
||||
const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
||||
const limit = options.limit || 50;
|
||||
const offset = options.offset || 0;
|
||||
|
||||
params.push(limit, offset);
|
||||
|
||||
const result = await pool.query(`
|
||||
SELECT
|
||||
id,
|
||||
dispensary_id,
|
||||
crawl_run_id,
|
||||
storage_path,
|
||||
product_count,
|
||||
size_bytes,
|
||||
size_bytes_raw,
|
||||
fetched_at
|
||||
FROM raw_crawl_payloads
|
||||
${whereClause}
|
||||
ORDER BY fetched_at DESC
|
||||
LIMIT $${paramIndex++} OFFSET $${paramIndex}
|
||||
`, params);
|
||||
|
||||
return result.rows.map(row => ({
|
||||
id: row.id,
|
||||
dispensaryId: row.dispensary_id,
|
||||
crawlRunId: row.crawl_run_id,
|
||||
storagePath: row.storage_path,
|
||||
productCount: row.product_count,
|
||||
sizeBytes: row.size_bytes,
|
||||
sizeBytesRaw: row.size_bytes_raw,
|
||||
fetchedAt: row.fetched_at
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete old payloads (for retention policy)
|
||||
*
|
||||
* @param pool - Database connection pool
|
||||
* @param olderThan - Delete payloads older than this date
|
||||
* @returns Number of payloads deleted
|
||||
*/
|
||||
export async function deleteOldPayloads(
|
||||
pool: Pool,
|
||||
olderThan: Date
|
||||
): Promise<number> {
|
||||
// Get paths first
|
||||
const result = await pool.query(`
|
||||
SELECT id, storage_path FROM raw_crawl_payloads
|
||||
WHERE fetched_at < $1
|
||||
`, [olderThan]);
|
||||
|
||||
// Delete files
|
||||
for (const row of result.rows) {
|
||||
try {
|
||||
await fs.promises.unlink(row.storage_path);
|
||||
} catch (err: any) {
|
||||
if (err.code !== 'ENOENT') {
|
||||
console.warn(`[PayloadStorage] Failed to delete ${row.storage_path}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Delete DB records
|
||||
await pool.query(`
|
||||
DELETE FROM raw_crawl_payloads
|
||||
WHERE fetched_at < $1
|
||||
`, [olderThan]);
|
||||
|
||||
console.log(`[PayloadStorage] Deleted ${result.rows.length} payloads older than ${olderThan.toISOString()}`);
|
||||
|
||||
return result.rows.length;
|
||||
}
|
||||
@@ -6,8 +6,8 @@ WORKDIR /app
|
||||
# Copy package files
|
||||
COPY package*.json ./
|
||||
|
||||
# Install dependencies (npm install is more forgiving than npm ci)
|
||||
RUN npm install
|
||||
# Install dependencies
|
||||
RUN npm ci
|
||||
|
||||
# Copy source files
|
||||
COPY . .
|
||||
|
||||
4
cannaiq/dist/index.html
vendored
4
cannaiq/dist/index.html
vendored
@@ -7,8 +7,8 @@
|
||||
<title>CannaIQ - Cannabis Menu Intelligence Platform</title>
|
||||
<meta name="description" content="CannaIQ provides real-time cannabis dispensary menu data, product tracking, and analytics for dispensaries across Arizona." />
|
||||
<meta name="keywords" content="cannabis, dispensary, menu, products, analytics, Arizona" />
|
||||
<script type="module" crossorigin src="/assets/index-Dq9S0rVi.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="/assets/index-DhM09B-d.css">
|
||||
<script type="module" crossorigin src="/assets/index-BML8-px1.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="/assets/index-B2gR-58G.css">
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
|
||||
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>CannaIQ - Cannabis Menu Intelligence Platform</title>
|
||||
<meta name="description" content="CannaIQ provides real-time cannabis dispensary menu data, product tracking, and analytics for dispensaries across Arizona." />
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
<svg viewBox="0 0 32 32" xmlns="http://www.w3.org/2000/svg">
|
||||
<rect width="32" height="32" rx="6" fill="#059669"/>
|
||||
<path d="M16 6C12.5 6 9.5 7.5 7.5 10L16 16L24.5 10C22.5 7.5 19.5 6 16 6Z" fill="white"/>
|
||||
<path d="M7.5 10C6 12 5 14.5 5 17C5 22.5 10 26 16 26C22 26 27 22.5 27 17C27 14.5 26 12 24.5 10L16 16L7.5 10Z" fill="white" fill-opacity="0.7"/>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 360 B |
@@ -8,7 +8,6 @@ import { ProductDetail } from './pages/ProductDetail';
|
||||
import { Stores } from './pages/Stores';
|
||||
import { Dispensaries } from './pages/Dispensaries';
|
||||
import { DispensaryDetail } from './pages/DispensaryDetail';
|
||||
import { DispensarySchedule } from './pages/DispensarySchedule';
|
||||
import { StoreDetail } from './pages/StoreDetail';
|
||||
import { StoreBrands } from './pages/StoreBrands';
|
||||
import { StoreSpecials } from './pages/StoreSpecials';
|
||||
@@ -47,6 +46,7 @@ import CrossStateCompare from './pages/CrossStateCompare';
|
||||
import StateDetail from './pages/StateDetail';
|
||||
import { Discovery } from './pages/Discovery';
|
||||
import { WorkersDashboard } from './pages/WorkersDashboard';
|
||||
import { JobQueue } from './pages/JobQueue';
|
||||
import TasksDashboard from './pages/TasksDashboard';
|
||||
import { ScraperOverviewDashboard } from './pages/ScraperOverviewDashboard';
|
||||
import { SeoOrchestrator } from './pages/admin/seo/SeoOrchestrator';
|
||||
@@ -66,7 +66,6 @@ export default function App() {
|
||||
<Route path="/stores" element={<PrivateRoute><Stores /></PrivateRoute>} />
|
||||
<Route path="/dispensaries" element={<PrivateRoute><Dispensaries /></PrivateRoute>} />
|
||||
<Route path="/dispensaries/:state/:city/:slug" element={<PrivateRoute><DispensaryDetail /></PrivateRoute>} />
|
||||
<Route path="/dispensaries/:state/:city/:slug/schedule" element={<PrivateRoute><DispensarySchedule /></PrivateRoute>} />
|
||||
<Route path="/stores/:state/:storeName/:slug/brands" element={<PrivateRoute><StoreBrands /></PrivateRoute>} />
|
||||
<Route path="/stores/:state/:storeName/:slug/specials" element={<PrivateRoute><StoreSpecials /></PrivateRoute>} />
|
||||
<Route path="/stores/:state/:storeName/:slug" element={<PrivateRoute><StoreDetail /></PrivateRoute>} />
|
||||
@@ -124,6 +123,8 @@ export default function App() {
|
||||
<Route path="/discovery" element={<PrivateRoute><Discovery /></PrivateRoute>} />
|
||||
{/* Workers Dashboard */}
|
||||
<Route path="/workers" element={<PrivateRoute><WorkersDashboard /></PrivateRoute>} />
|
||||
{/* Job Queue Management */}
|
||||
<Route path="/job-queue" element={<PrivateRoute><JobQueue /></PrivateRoute>} />
|
||||
{/* Task Queue Dashboard */}
|
||||
<Route path="/tasks" element={<PrivateRoute><TasksDashboard /></PrivateRoute>} />
|
||||
{/* Scraper Overview Dashboard (new primary) */}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import { ReactNode, useEffect, useState, useRef } from 'react';
|
||||
import { useNavigate, useLocation, Link } from 'react-router-dom';
|
||||
import { ReactNode, useEffect, useState } from 'react';
|
||||
import { useNavigate, useLocation } from 'react-router-dom';
|
||||
import { useAuthStore } from '../store/authStore';
|
||||
import { api } from '../lib/api';
|
||||
import { StateSelector } from './StateSelector';
|
||||
import {
|
||||
LayoutDashboard,
|
||||
Building2,
|
||||
@@ -47,8 +48,8 @@ interface NavLinkProps {
|
||||
|
||||
function NavLink({ to, icon, label, isActive }: NavLinkProps) {
|
||||
return (
|
||||
<Link
|
||||
to={to}
|
||||
<a
|
||||
href={to}
|
||||
className={`flex items-center gap-3 px-3 py-2 rounded-lg text-sm font-medium transition-colors ${
|
||||
isActive
|
||||
? 'bg-emerald-50 text-emerald-700'
|
||||
@@ -57,7 +58,7 @@ function NavLink({ to, icon, label, isActive }: NavLinkProps) {
|
||||
>
|
||||
<span className={`flex-shrink-0 ${isActive ? 'text-emerald-600' : 'text-gray-400'}`}>{icon}</span>
|
||||
<span>{label}</span>
|
||||
</Link>
|
||||
</a>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -85,8 +86,6 @@ export function Layout({ children }: LayoutProps) {
|
||||
const { user, logout } = useAuthStore();
|
||||
const [versionInfo, setVersionInfo] = useState<VersionInfo | null>(null);
|
||||
const [sidebarOpen, setSidebarOpen] = useState(false);
|
||||
const navRef = useRef<HTMLElement>(null);
|
||||
const scrollPositionRef = useRef<number>(0);
|
||||
|
||||
useEffect(() => {
|
||||
const fetchVersion = async () => {
|
||||
@@ -112,34 +111,16 @@ export function Layout({ children }: LayoutProps) {
|
||||
return location.pathname.startsWith(path);
|
||||
};
|
||||
|
||||
// Save scroll position before route change
|
||||
useEffect(() => {
|
||||
const nav = navRef.current;
|
||||
if (nav) {
|
||||
const handleScroll = () => {
|
||||
scrollPositionRef.current = nav.scrollTop;
|
||||
};
|
||||
nav.addEventListener('scroll', handleScroll);
|
||||
return () => nav.removeEventListener('scroll', handleScroll);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Restore scroll position after route change and close mobile sidebar
|
||||
// Close sidebar on route change (mobile)
|
||||
useEffect(() => {
|
||||
setSidebarOpen(false);
|
||||
// Restore scroll position after render
|
||||
requestAnimationFrame(() => {
|
||||
if (navRef.current) {
|
||||
navRef.current.scrollTop = scrollPositionRef.current;
|
||||
}
|
||||
});
|
||||
}, [location.pathname]);
|
||||
|
||||
const sidebarContent = (
|
||||
<>
|
||||
{/* Logo/Brand */}
|
||||
<div className="px-6 py-5 border-b border-gray-200">
|
||||
<Link to="/dashboard" className="flex items-center gap-3 hover:opacity-80 transition-opacity">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="w-8 h-8 bg-emerald-600 rounded-lg flex items-center justify-center">
|
||||
<svg viewBox="0 0 24 24" className="w-5 h-5 text-white" fill="currentColor">
|
||||
<path d="M12 2C8.5 2 5.5 3.5 3.5 6L12 12L20.5 6C18.5 3.5 15.5 2 12 2Z" />
|
||||
@@ -150,17 +131,21 @@ export function Layout({ children }: LayoutProps) {
|
||||
<span className="text-lg font-bold text-gray-900">CannaIQ</span>
|
||||
{versionInfo && (
|
||||
<p className="text-xs text-gray-400">
|
||||
{versionInfo.git_sha || 'dev'}
|
||||
v{versionInfo.version} ({versionInfo.git_sha}) {versionInfo.build_time !== 'unknown' && `- ${new Date(versionInfo.build_time).toLocaleDateString()}`}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</Link>
|
||||
</div>
|
||||
<p className="text-xs text-gray-500 mt-2 truncate">{user?.email}</p>
|
||||
</div>
|
||||
|
||||
{/* State Selector */}
|
||||
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50">
|
||||
<StateSelector showLabel={false} />
|
||||
</div>
|
||||
|
||||
{/* Navigation */}
|
||||
<nav ref={navRef} className="flex-1 px-3 py-4 space-y-6 overflow-y-auto">
|
||||
<nav className="flex-1 px-3 py-4 space-y-6 overflow-y-auto">
|
||||
<NavSection title="Main">
|
||||
<NavLink to="/dashboard" icon={<LayoutDashboard className="w-4 h-4" />} label="Dashboard" isActive={isActive('/dashboard', true)} />
|
||||
<NavLink to="/dispensaries" icon={<Building2 className="w-4 h-4" />} label="Dispensaries" isActive={isActive('/dispensaries')} />
|
||||
@@ -179,7 +164,8 @@ export function Layout({ children }: LayoutProps) {
|
||||
<NavLink to="/admin/orchestrator" icon={<Activity className="w-4 h-4" />} label="Orchestrator" isActive={isActive('/admin/orchestrator')} />
|
||||
<NavLink to="/users" icon={<UserCog className="w-4 h-4" />} label="Users" isActive={isActive('/users')} />
|
||||
<NavLink to="/workers" icon={<Users className="w-4 h-4" />} label="Workers" isActive={isActive('/workers')} />
|
||||
<NavLink to="/tasks" icon={<ListChecks className="w-4 h-4" />} label="Tasks" isActive={isActive('/tasks')} />
|
||||
<NavLink to="/job-queue" icon={<ListOrdered className="w-4 h-4" />} label="Job Queue" isActive={isActive('/job-queue')} />
|
||||
<NavLink to="/tasks" icon={<ListChecks className="w-4 h-4" />} label="Task Queue" isActive={isActive('/tasks')} />
|
||||
<NavLink to="/admin/seo" icon={<FileText className="w-4 h-4" />} label="SEO Pages" isActive={isActive('/admin/seo')} />
|
||||
<NavLink to="/proxies" icon={<Shield className="w-4 h-4" />} label="Proxies" isActive={isActive('/proxies')} />
|
||||
<NavLink to="/api-permissions" icon={<Key className="w-4 h-4" />} label="API Keys" isActive={isActive('/api-permissions')} />
|
||||
@@ -228,7 +214,7 @@ export function Layout({ children }: LayoutProps) {
|
||||
<button onClick={() => setSidebarOpen(true)} className="p-2 -ml-2 rounded-lg hover:bg-gray-100">
|
||||
<Menu className="w-5 h-5 text-gray-600" />
|
||||
</button>
|
||||
<Link to="/dashboard" className="flex items-center gap-2 hover:opacity-80 transition-opacity">
|
||||
<div className="flex items-center gap-2">
|
||||
<div className="w-6 h-6 bg-emerald-600 rounded flex items-center justify-center">
|
||||
<svg viewBox="0 0 24 24" className="w-4 h-4 text-white" fill="currentColor">
|
||||
<path d="M12 2C8.5 2 5.5 3.5 3.5 6L12 12L20.5 6C18.5 3.5 15.5 2 12 2Z" />
|
||||
@@ -236,7 +222,7 @@ export function Layout({ children }: LayoutProps) {
|
||||
</svg>
|
||||
</div>
|
||||
<span className="font-semibold text-gray-900">CannaIQ</span>
|
||||
</Link>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Page content */}
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
import { useState, useEffect, useRef } from 'react';
|
||||
import { api } from '../lib/api';
|
||||
import { Shield, X, Loader2 } from 'lucide-react';
|
||||
|
||||
interface PasswordConfirmModalProps {
|
||||
isOpen: boolean;
|
||||
onClose: () => void;
|
||||
onConfirm: () => void;
|
||||
title: string;
|
||||
description: string;
|
||||
}
|
||||
|
||||
export function PasswordConfirmModal({
|
||||
isOpen,
|
||||
onClose,
|
||||
onConfirm,
|
||||
title,
|
||||
description,
|
||||
}: PasswordConfirmModalProps) {
|
||||
const [password, setPassword] = useState('');
|
||||
const [error, setError] = useState('');
|
||||
const [loading, setLoading] = useState(false);
|
||||
const inputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (isOpen) {
|
||||
setPassword('');
|
||||
setError('');
|
||||
// Focus the input when modal opens
|
||||
setTimeout(() => inputRef.current?.focus(), 100);
|
||||
}
|
||||
}, [isOpen]);
|
||||
|
||||
const handleSubmit = async (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
if (!password.trim()) {
|
||||
setError('Password is required');
|
||||
return;
|
||||
}
|
||||
|
||||
setLoading(true);
|
||||
setError('');
|
||||
|
||||
try {
|
||||
const result = await api.verifyPassword(password);
|
||||
if (result.verified) {
|
||||
onConfirm();
|
||||
onClose();
|
||||
} else {
|
||||
setError('Invalid password');
|
||||
}
|
||||
} catch (err: any) {
|
||||
setError(err.message || 'Verification failed');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
if (!isOpen) return null;
|
||||
|
||||
return (
|
||||
<div className="fixed inset-0 z-50 flex items-center justify-center">
|
||||
{/* Backdrop */}
|
||||
<div
|
||||
className="absolute inset-0 bg-black bg-opacity-50"
|
||||
onClick={onClose}
|
||||
/>
|
||||
|
||||
{/* Modal */}
|
||||
<div className="relative bg-white rounded-lg shadow-xl max-w-md w-full mx-4">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between px-6 py-4 border-b border-gray-200">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="p-2 bg-amber-100 rounded-lg">
|
||||
<Shield className="w-5 h-5 text-amber-600" />
|
||||
</div>
|
||||
<h3 className="text-lg font-semibold text-gray-900">{title}</h3>
|
||||
</div>
|
||||
<button
|
||||
onClick={onClose}
|
||||
className="p-1 hover:bg-gray-100 rounded-lg transition-colors"
|
||||
>
|
||||
<X className="w-5 h-5 text-gray-500" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Body */}
|
||||
<form onSubmit={handleSubmit}>
|
||||
<div className="px-6 py-4">
|
||||
<p className="text-gray-600 mb-4">{description}</p>
|
||||
|
||||
<div className="space-y-2">
|
||||
<label
|
||||
htmlFor="password"
|
||||
className="block text-sm font-medium text-gray-700"
|
||||
>
|
||||
Enter your password to continue
|
||||
</label>
|
||||
<input
|
||||
ref={inputRef}
|
||||
type="password"
|
||||
id="password"
|
||||
value={password}
|
||||
onChange={(e) => setPassword(e.target.value)}
|
||||
className="w-full px-4 py-2 border border-gray-300 rounded-lg focus:ring-2 focus:ring-emerald-500 focus:border-emerald-500"
|
||||
placeholder="Password"
|
||||
disabled={loading}
|
||||
/>
|
||||
{error && (
|
||||
<p className="text-sm text-red-600">{error}</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Footer */}
|
||||
<div className="flex justify-end gap-3 px-6 py-4 border-t border-gray-200 bg-gray-50 rounded-b-lg">
|
||||
<button
|
||||
type="button"
|
||||
onClick={onClose}
|
||||
disabled={loading}
|
||||
className="px-4 py-2 text-gray-700 hover:bg-gray-100 rounded-lg transition-colors"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
type="submit"
|
||||
disabled={loading}
|
||||
className="px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 transition-colors disabled:opacity-50 flex items-center gap-2"
|
||||
>
|
||||
{loading && <Loader2 className="w-4 h-4 animate-spin" />}
|
||||
Confirm
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -69,13 +69,6 @@ class ApiClient {
|
||||
return { data };
|
||||
}
|
||||
|
||||
async delete<T = any>(endpoint: string): Promise<{ data: T }> {
|
||||
const data = await this.request<T>(endpoint, {
|
||||
method: 'DELETE',
|
||||
});
|
||||
return { data };
|
||||
}
|
||||
|
||||
// Auth
|
||||
async login(email: string, password: string) {
|
||||
return this.request<{ token: string; user: any }>('/api/auth/login', {
|
||||
@@ -84,13 +77,6 @@ class ApiClient {
|
||||
});
|
||||
}
|
||||
|
||||
async verifyPassword(password: string) {
|
||||
return this.request<{ verified: boolean; error?: string }>('/api/auth/verify-password', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ password }),
|
||||
});
|
||||
}
|
||||
|
||||
async getMe() {
|
||||
return this.request<{ user: any }>('/api/auth/me');
|
||||
}
|
||||
@@ -327,7 +313,7 @@ class ApiClient {
|
||||
}
|
||||
|
||||
async testAllProxies() {
|
||||
return this.request<{ jobId: number; total: number; message: string }>('/api/proxies/test-all', {
|
||||
return this.request<{ jobId: number; message: string }>('/api/proxies/test-all', {
|
||||
method: 'POST',
|
||||
});
|
||||
}
|
||||
@@ -990,47 +976,6 @@ class ApiClient {
|
||||
}>(`/api/markets/stores/${id}/categories`);
|
||||
}
|
||||
|
||||
async getStoreCrawlHistory(id: number, limit = 50) {
|
||||
return this.request<{
|
||||
dispensary: {
|
||||
id: number;
|
||||
name: string;
|
||||
dba_name: string | null;
|
||||
slug: string;
|
||||
state: string;
|
||||
city: string;
|
||||
menu_type: string | null;
|
||||
platform_dispensary_id: string | null;
|
||||
last_menu_scrape: string | null;
|
||||
} | null;
|
||||
history: Array<{
|
||||
id: number;
|
||||
runId: string | null;
|
||||
profileKey: string | null;
|
||||
crawlerModule: string | null;
|
||||
stateAtStart: string | null;
|
||||
stateAtEnd: string | null;
|
||||
totalSteps: number;
|
||||
durationMs: number | null;
|
||||
success: boolean;
|
||||
errorMessage: string | null;
|
||||
productsFound: number | null;
|
||||
startedAt: string | null;
|
||||
completedAt: string | null;
|
||||
}>;
|
||||
nextSchedule: {
|
||||
scheduleId: number;
|
||||
jobName: string;
|
||||
enabled: boolean;
|
||||
baseIntervalMinutes: number;
|
||||
jitterMinutes: number;
|
||||
nextRunAt: string | null;
|
||||
lastRunAt: string | null;
|
||||
lastStatus: string | null;
|
||||
} | null;
|
||||
}>(`/api/markets/stores/${id}/crawl-history?limit=${limit}`);
|
||||
}
|
||||
|
||||
// Global Brands/Categories (from v_brands/v_categories views)
|
||||
async getMarketBrands(params?: { limit?: number; offset?: number }) {
|
||||
const searchParams = new URLSearchParams();
|
||||
@@ -1566,11 +1511,10 @@ class ApiClient {
|
||||
}
|
||||
|
||||
// Intelligence API
|
||||
async getIntelligenceBrands(params?: { limit?: number; offset?: number; state?: string }) {
|
||||
async getIntelligenceBrands(params?: { limit?: number; offset?: number }) {
|
||||
const searchParams = new URLSearchParams();
|
||||
if (params?.limit) searchParams.append('limit', params.limit.toString());
|
||||
if (params?.offset) searchParams.append('offset', params.offset.toString());
|
||||
if (params?.state) searchParams.append('state', params.state);
|
||||
const queryString = searchParams.toString() ? `?${searchParams.toString()}` : '';
|
||||
return this.request<{
|
||||
brands: Array<{
|
||||
@@ -1585,10 +1529,7 @@ class ApiClient {
|
||||
}>(`/api/admin/intelligence/brands${queryString}`);
|
||||
}
|
||||
|
||||
async getIntelligencePricing(params?: { state?: string }) {
|
||||
const searchParams = new URLSearchParams();
|
||||
if (params?.state) searchParams.append('state', params.state);
|
||||
const queryString = searchParams.toString() ? `?${searchParams.toString()}` : '';
|
||||
async getIntelligencePricing() {
|
||||
return this.request<{
|
||||
byCategory: Array<{
|
||||
category: string;
|
||||
@@ -1604,7 +1545,7 @@ class ApiClient {
|
||||
maxPrice: number;
|
||||
totalProducts: number;
|
||||
};
|
||||
}>(`/api/admin/intelligence/pricing${queryString}`);
|
||||
}>('/api/admin/intelligence/pricing');
|
||||
}
|
||||
|
||||
async getIntelligenceStoreActivity(params?: { state?: string; chainId?: number; limit?: number }) {
|
||||
@@ -2936,46 +2877,6 @@ class ApiClient {
|
||||
`/api/tasks/store/${dispensaryId}/active`
|
||||
);
|
||||
}
|
||||
|
||||
// Task Pool Control
|
||||
async getTaskPoolStatus() {
|
||||
return this.request<{ success: boolean; paused: boolean; message: string }>(
|
||||
'/api/tasks/pool/status'
|
||||
);
|
||||
}
|
||||
|
||||
async pauseTaskPool() {
|
||||
return this.request<{ success: boolean; paused: boolean; message: string }>(
|
||||
'/api/tasks/pool/pause',
|
||||
{ method: 'POST' }
|
||||
);
|
||||
}
|
||||
|
||||
async resumeTaskPool() {
|
||||
return this.request<{ success: boolean; paused: boolean; message: string }>(
|
||||
'/api/tasks/pool/resume',
|
||||
{ method: 'POST' }
|
||||
);
|
||||
}
|
||||
|
||||
// K8s Worker Control
|
||||
async getK8sWorkers() {
|
||||
return this.request<{
|
||||
success: boolean;
|
||||
available: boolean;
|
||||
replicas: number;
|
||||
readyReplicas: number;
|
||||
availableReplicas?: number;
|
||||
error?: string;
|
||||
}>('/api/k8s/workers');
|
||||
}
|
||||
|
||||
async scaleK8sWorkers(replicas: number) {
|
||||
return this.request<{ success: boolean; replicas: number; message?: string; error?: string }>(
|
||||
'/api/k8s/workers/scale',
|
||||
{ method: 'POST', body: JSON.stringify({ replicas }) }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export const api = new ApiClient(API_URL);
|
||||
|
||||
@@ -2,7 +2,7 @@ import { useEffect, useState, useRef } from 'react';
|
||||
import { Layout } from '../components/Layout';
|
||||
import { api } from '../lib/api';
|
||||
import { Toast } from '../components/Toast';
|
||||
import { Key, Plus, Copy, Check, X, Trash2, Power, PowerOff, Store, Globe, Shield, Clock, Eye, EyeOff, Search, ChevronDown, Pencil } from 'lucide-react';
|
||||
import { Key, Plus, Copy, Check, X, Trash2, Power, PowerOff, Store, Globe, Shield, Clock, Eye, EyeOff, Search, ChevronDown } from 'lucide-react';
|
||||
|
||||
interface ApiPermission {
|
||||
id: number;
|
||||
@@ -161,12 +161,6 @@ export function ApiPermissions() {
|
||||
allowed_ips: '',
|
||||
allowed_domains: '',
|
||||
});
|
||||
const [editingPermission, setEditingPermission] = useState<ApiPermission | null>(null);
|
||||
const [editForm, setEditForm] = useState({
|
||||
user_name: '',
|
||||
allowed_ips: '',
|
||||
allowed_domains: '',
|
||||
});
|
||||
const [notification, setNotification] = useState<{ message: string; type: 'success' | 'error' | 'info' } | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
@@ -246,33 +240,6 @@ export function ApiPermissions() {
|
||||
}
|
||||
};
|
||||
|
||||
const handleEdit = (perm: ApiPermission) => {
|
||||
setEditingPermission(perm);
|
||||
setEditForm({
|
||||
user_name: perm.user_name,
|
||||
allowed_ips: perm.allowed_ips || '',
|
||||
allowed_domains: perm.allowed_domains || '',
|
||||
});
|
||||
};
|
||||
|
||||
const handleSaveEdit = async (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
if (!editingPermission) return;
|
||||
|
||||
try {
|
||||
await api.updateApiPermission(editingPermission.id, {
|
||||
user_name: editForm.user_name,
|
||||
allowed_ips: editForm.allowed_ips || undefined,
|
||||
allowed_domains: editForm.allowed_domains || undefined,
|
||||
});
|
||||
setNotification({ message: 'API key updated successfully', type: 'success' });
|
||||
setEditingPermission(null);
|
||||
loadPermissions();
|
||||
} catch (error: any) {
|
||||
setNotification({ message: 'Failed to update permission: ' + error.message, type: 'error' });
|
||||
}
|
||||
};
|
||||
|
||||
const copyToClipboard = async (text: string, id: number) => {
|
||||
await navigator.clipboard.writeText(text);
|
||||
setCopiedId(id);
|
||||
@@ -527,36 +494,21 @@ export function ApiPermissions() {
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Allowed Domains - Always show */}
|
||||
<div className="mt-3 text-xs">
|
||||
<span className="text-gray-500 flex items-center gap-1">
|
||||
<Globe className="w-3 h-3" />
|
||||
Domains:{' '}
|
||||
{perm.allowed_domains ? (
|
||||
<span className="text-gray-700 font-mono">
|
||||
{perm.allowed_domains.split('\n').filter(d => d.trim()).join(', ')}
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-amber-600">Any domain (no restriction)</span>
|
||||
{/* Restrictions */}
|
||||
{(perm.allowed_ips || perm.allowed_domains) && (
|
||||
<div className="flex gap-4 mt-3 text-xs text-gray-500">
|
||||
{perm.allowed_ips && (
|
||||
<span>IPs: {perm.allowed_ips.split('\n').length} allowed</span>
|
||||
)}
|
||||
</span>
|
||||
{perm.allowed_ips && (
|
||||
<span className="text-gray-500 ml-4">
|
||||
IPs: {perm.allowed_ips.split('\n').filter(ip => ip.trim()).length} allowed
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
{perm.allowed_domains && (
|
||||
<span>Domains: {perm.allowed_domains.split('\n').length} allowed</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Actions */}
|
||||
<div className="flex items-center gap-2 ml-4">
|
||||
<button
|
||||
onClick={() => handleEdit(perm)}
|
||||
className="p-2 text-blue-600 hover:bg-blue-50 rounded-lg transition-colors"
|
||||
title="Edit"
|
||||
>
|
||||
<Pencil className="w-5 h-5" />
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleToggle(perm.id)}
|
||||
className={`p-2 rounded-lg transition-colors ${
|
||||
@@ -582,86 +534,6 @@ export function ApiPermissions() {
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Edit Modal */}
|
||||
{editingPermission && (
|
||||
<div className="fixed inset-0 bg-black/50 flex items-center justify-center z-50">
|
||||
<div className="bg-white rounded-xl shadow-xl max-w-lg w-full mx-4 max-h-[90vh] overflow-y-auto">
|
||||
<div className="px-6 py-4 border-b border-gray-200">
|
||||
<h2 className="text-lg font-semibold text-gray-900 flex items-center gap-2">
|
||||
<Pencil className="w-5 h-5 text-blue-600" />
|
||||
Edit API Key
|
||||
</h2>
|
||||
<p className="text-sm text-gray-500 mt-1">
|
||||
{editingPermission.store_name}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<form onSubmit={handleSaveEdit} className="p-6 space-y-5">
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
Label / Website Name
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={editForm.user_name}
|
||||
onChange={(e) => setEditForm({ ...editForm, user_name: e.target.value })}
|
||||
className="w-full px-4 py-2.5 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent"
|
||||
required
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
<Globe className="w-4 h-4 inline mr-1" />
|
||||
Allowed Domains
|
||||
</label>
|
||||
<textarea
|
||||
value={editForm.allowed_domains}
|
||||
onChange={(e) => setEditForm({ ...editForm, allowed_domains: e.target.value })}
|
||||
rows={4}
|
||||
className="w-full px-4 py-2.5 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent font-mono text-sm"
|
||||
placeholder="example.com *.example.com subdomain.example.com"
|
||||
/>
|
||||
<p className="text-xs text-gray-500 mt-1">
|
||||
One domain per line. Use * for wildcards (e.g., *.example.com). Leave empty to allow any domain.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
<Shield className="w-4 h-4 inline mr-1" />
|
||||
Allowed IP Addresses
|
||||
</label>
|
||||
<textarea
|
||||
value={editForm.allowed_ips}
|
||||
onChange={(e) => setEditForm({ ...editForm, allowed_ips: e.target.value })}
|
||||
rows={3}
|
||||
className="w-full px-4 py-2.5 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent font-mono text-sm"
|
||||
placeholder="192.168.1.1 10.0.0.0/8"
|
||||
/>
|
||||
<p className="text-xs text-gray-500 mt-1">One per line. CIDR notation supported. Leave empty to allow any IP.</p>
|
||||
</div>
|
||||
|
||||
<div className="flex gap-3 pt-2">
|
||||
<button
|
||||
type="submit"
|
||||
className="flex-1 px-5 py-2.5 bg-blue-600 text-white rounded-lg hover:bg-blue-700 transition-colors"
|
||||
>
|
||||
Save Changes
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setEditingPermission(null)}
|
||||
className="px-5 py-2.5 bg-gray-100 text-gray-700 rounded-lg hover:bg-gray-200 transition-colors"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</Layout>
|
||||
);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { useEffect, useState } from 'react';
|
||||
import { Layout } from '../components/Layout';
|
||||
import { HealthPanel } from '../components/HealthPanel';
|
||||
import { api } from '../lib/api';
|
||||
import { useNavigate } from 'react-router-dom';
|
||||
import {
|
||||
@@ -41,6 +42,7 @@ export function Dashboard() {
|
||||
const [activity, setActivity] = useState<any>(null);
|
||||
const [nationalStats, setNationalStats] = useState<any>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [refreshing, setRefreshing] = useState(false);
|
||||
const [pendingChangesCount, setPendingChangesCount] = useState(0);
|
||||
const [showNotification, setShowNotification] = useState(false);
|
||||
const [taskCounts, setTaskCounts] = useState<Record<string, number> | null>(null);
|
||||
@@ -91,7 +93,10 @@ export function Dashboard() {
|
||||
}
|
||||
};
|
||||
|
||||
const loadData = async () => {
|
||||
const loadData = async (isRefresh = false) => {
|
||||
if (isRefresh) {
|
||||
setRefreshing(true);
|
||||
}
|
||||
try {
|
||||
// Fetch dashboard data (primary data source)
|
||||
const dashboard = await api.getMarketDashboard();
|
||||
@@ -153,6 +158,7 @@ export function Dashboard() {
|
||||
console.error('Failed to load dashboard:', error);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
setRefreshing(false);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -265,11 +271,24 @@ export function Dashboard() {
|
||||
|
||||
<div className="space-y-8">
|
||||
{/* Header */}
|
||||
<div>
|
||||
<h1 className="text-xl sm:text-2xl font-semibold text-gray-900">Dashboard</h1>
|
||||
<p className="text-sm text-gray-500 mt-1">Monitor your dispensary data aggregation</p>
|
||||
<div className="flex flex-col sm:flex-row sm:justify-between sm:items-center gap-4">
|
||||
<div>
|
||||
<h1 className="text-xl sm:text-2xl font-semibold text-gray-900">Dashboard</h1>
|
||||
<p className="text-sm text-gray-500 mt-1">Monitor your dispensary data aggregation</p>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => loadData(true)}
|
||||
disabled={refreshing}
|
||||
className="inline-flex items-center justify-center gap-2 px-4 py-2 bg-white border border-gray-200 rounded-lg hover:bg-gray-50 transition-colors text-sm font-medium text-gray-700 self-start sm:self-auto disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
<RefreshCw className={`w-4 h-4 ${refreshing ? 'animate-spin' : ''}`} />
|
||||
{refreshing ? 'Refreshing...' : 'Refresh'}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* System Health */}
|
||||
<HealthPanel showQueues={false} refreshInterval={60000} />
|
||||
|
||||
{/* Stats Grid */}
|
||||
<div className="grid grid-cols-2 lg:grid-cols-3 gap-3 sm:gap-6">
|
||||
{/* Products */}
|
||||
|
||||
@@ -161,6 +161,23 @@ export function Dispensaries() {
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
Filter by Status
|
||||
</label>
|
||||
<select
|
||||
value={filterStatus}
|
||||
onChange={(e) => handleStatusFilter(e.target.value)}
|
||||
className={`w-full px-3 py-2 border rounded-lg focus:ring-2 focus:ring-blue-500 focus:border-blue-500 ${
|
||||
filterStatus === 'dropped' ? 'border-red-300 bg-red-50' : 'border-gray-300'
|
||||
}`}
|
||||
>
|
||||
<option value="">All Statuses</option>
|
||||
<option value="open">Open</option>
|
||||
<option value="dropped">Dropped (Needs Review)</option>
|
||||
<option value="closed">Closed</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -204,6 +204,47 @@ export function DispensaryDetail() {
|
||||
Back to Dispensaries
|
||||
</button>
|
||||
|
||||
{/* Update Dropdown */}
|
||||
<div className="relative">
|
||||
<button
|
||||
onClick={() => setShowUpdateDropdown(!showUpdateDropdown)}
|
||||
disabled={isUpdating}
|
||||
className="flex items-center gap-2 px-4 py-2 text-sm font-medium text-white bg-blue-600 hover:bg-blue-700 rounded-lg disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
<RefreshCw className={`w-4 h-4 ${isUpdating ? 'animate-spin' : ''}`} />
|
||||
{isUpdating ? 'Updating...' : 'Update'}
|
||||
{!isUpdating && <ChevronDown className="w-4 h-4" />}
|
||||
</button>
|
||||
|
||||
{showUpdateDropdown && !isUpdating && (
|
||||
<div className="absolute right-0 mt-2 w-48 bg-white rounded-lg shadow-lg border border-gray-200 z-10">
|
||||
<button
|
||||
onClick={() => handleUpdate('products')}
|
||||
className="w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-gray-100 rounded-t-lg"
|
||||
>
|
||||
Products
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleUpdate('brands')}
|
||||
className="w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
Brands
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleUpdate('specials')}
|
||||
className="w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
Specials
|
||||
</button>
|
||||
<button
|
||||
onClick={() => handleUpdate('all')}
|
||||
className="w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-gray-100 rounded-b-lg border-t border-gray-200"
|
||||
>
|
||||
All
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Dispensary Header */}
|
||||
@@ -225,7 +266,7 @@ export function DispensaryDetail() {
|
||||
<div className="flex items-center gap-2 text-sm text-gray-600 bg-gray-50 px-4 py-2 rounded-lg">
|
||||
<Calendar className="w-4 h-4" />
|
||||
<div>
|
||||
<span className="font-medium">Last Updated:</span>
|
||||
<span className="font-medium">Last Crawl Date:</span>
|
||||
<span className="ml-2">
|
||||
{dispensary.last_menu_scrape
|
||||
? new Date(dispensary.last_menu_scrape).toLocaleDateString('en-US', {
|
||||
@@ -290,7 +331,7 @@ export function DispensaryDetail() {
|
||||
</a>
|
||||
)}
|
||||
<Link
|
||||
to={`/dispensaries/${state}/${city}/${slug}/schedule`}
|
||||
to="/schedule"
|
||||
className="flex items-center gap-2 text-sm text-blue-600 hover:text-blue-800"
|
||||
>
|
||||
<Clock className="w-4 h-4" />
|
||||
@@ -492,31 +533,57 @@ export function DispensaryDetail() {
|
||||
`$${product.regular_price}`
|
||||
) : '-'}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
{product.quantity != null ? product.quantity : '-'}
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.quantity != null ? (
|
||||
<span className={`badge badge-sm ${product.quantity > 0 ? 'badge-info' : 'badge-error'}`}>
|
||||
{product.quantity}
|
||||
</span>
|
||||
) : '-'}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
{product.thc_percentage ? `${product.thc_percentage}%` : '-'}
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.thc_percentage ? (
|
||||
<span className="badge badge-success badge-sm">{product.thc_percentage}%</span>
|
||||
) : '-'}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
{product.cbd_percentage ? `${product.cbd_percentage}%` : '-'}
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.cbd_percentage ? (
|
||||
<span className="badge badge-info badge-sm">{product.cbd_percentage}%</span>
|
||||
) : '-'}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
{product.strain_type || '-'}
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.strain_type ? (
|
||||
<span className="badge badge-ghost badge-sm">{product.strain_type}</span>
|
||||
) : '-'}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
{product.in_stock ? 'Yes' : product.in_stock === false ? 'No' : '-'}
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.in_stock ? (
|
||||
<span className="badge badge-success badge-sm">Yes</span>
|
||||
) : product.in_stock === false ? (
|
||||
<span className="badge badge-error badge-sm">No</span>
|
||||
) : '-'}
|
||||
</td>
|
||||
<td className="whitespace-nowrap text-xs text-gray-500">
|
||||
{product.updated_at ? formatDate(product.updated_at) : '-'}
|
||||
</td>
|
||||
<td>
|
||||
<button
|
||||
onClick={() => navigate(`/products/${product.id}`)}
|
||||
className="btn btn-xs btn-ghost text-gray-500 hover:text-gray-700"
|
||||
>
|
||||
Details
|
||||
</button>
|
||||
<div className="flex gap-1">
|
||||
{product.dutchie_url && (
|
||||
<a
|
||||
href={product.dutchie_url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="btn btn-xs btn-outline"
|
||||
>
|
||||
Dutchie
|
||||
</a>
|
||||
)}
|
||||
<button
|
||||
onClick={() => navigate(`/products/${product.id}`)}
|
||||
className="btn btn-xs btn-primary"
|
||||
>
|
||||
Details
|
||||
</button>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
|
||||
@@ -1,378 +0,0 @@
|
||||
import { useEffect, useState } from 'react';
|
||||
import { useParams, useNavigate, Link } from 'react-router-dom';
|
||||
import { Layout } from '../components/Layout';
|
||||
import { api } from '../lib/api';
|
||||
import {
|
||||
ArrowLeft,
|
||||
Clock,
|
||||
Calendar,
|
||||
CheckCircle,
|
||||
XCircle,
|
||||
AlertCircle,
|
||||
Package,
|
||||
Timer,
|
||||
Building2,
|
||||
} from 'lucide-react';
|
||||
|
||||
interface CrawlHistoryItem {
|
||||
id: number;
|
||||
runId: string | null;
|
||||
profileKey: string | null;
|
||||
crawlerModule: string | null;
|
||||
stateAtStart: string | null;
|
||||
stateAtEnd: string | null;
|
||||
totalSteps: number;
|
||||
durationMs: number | null;
|
||||
success: boolean;
|
||||
errorMessage: string | null;
|
||||
productsFound: number | null;
|
||||
startedAt: string | null;
|
||||
completedAt: string | null;
|
||||
}
|
||||
|
||||
interface NextSchedule {
|
||||
scheduleId: number;
|
||||
jobName: string;
|
||||
enabled: boolean;
|
||||
baseIntervalMinutes: number;
|
||||
jitterMinutes: number;
|
||||
nextRunAt: string | null;
|
||||
lastRunAt: string | null;
|
||||
lastStatus: string | null;
|
||||
}
|
||||
|
||||
interface Dispensary {
|
||||
id: number;
|
||||
name: string;
|
||||
dba_name: string | null;
|
||||
slug: string;
|
||||
state: string;
|
||||
city: string;
|
||||
menu_type: string | null;
|
||||
platform_dispensary_id: string | null;
|
||||
last_menu_scrape: string | null;
|
||||
}
|
||||
|
||||
export function DispensarySchedule() {
|
||||
const { state, city, slug } = useParams();
|
||||
const navigate = useNavigate();
|
||||
const [dispensary, setDispensary] = useState<Dispensary | null>(null);
|
||||
const [history, setHistory] = useState<CrawlHistoryItem[]>([]);
|
||||
const [nextSchedule, setNextSchedule] = useState<NextSchedule | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
loadScheduleData();
|
||||
}, [slug]);
|
||||
|
||||
const loadScheduleData = async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
// First get the dispensary to get the ID
|
||||
const dispData = await api.getDispensary(slug!);
|
||||
if (dispData?.id) {
|
||||
const data = await api.getStoreCrawlHistory(dispData.id);
|
||||
setDispensary(data.dispensary);
|
||||
setHistory(data.history || []);
|
||||
setNextSchedule(data.nextSchedule);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to load schedule data:', error);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const formatDate = (dateStr: string | null) => {
|
||||
if (!dateStr) return 'Never';
|
||||
const date = new Date(dateStr);
|
||||
return date.toLocaleDateString('en-US', {
|
||||
year: 'numeric',
|
||||
month: 'short',
|
||||
day: 'numeric',
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
});
|
||||
};
|
||||
|
||||
const formatTimeAgo = (dateStr: string | null) => {
|
||||
if (!dateStr) return 'Never';
|
||||
const date = new Date(dateStr);
|
||||
const now = new Date();
|
||||
const diffMs = now.getTime() - date.getTime();
|
||||
const diffMinutes = Math.floor(diffMs / (1000 * 60));
|
||||
const diffHours = Math.floor(diffMs / (1000 * 60 * 60));
|
||||
const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
|
||||
|
||||
if (diffMinutes < 1) return 'Just now';
|
||||
if (diffMinutes < 60) return `${diffMinutes}m ago`;
|
||||
if (diffHours < 24) return `${diffHours}h ago`;
|
||||
if (diffDays === 1) return 'Yesterday';
|
||||
if (diffDays < 7) return `${diffDays} days ago`;
|
||||
return date.toLocaleDateString();
|
||||
};
|
||||
|
||||
const formatTimeUntil = (dateStr: string | null) => {
|
||||
if (!dateStr) return 'Not scheduled';
|
||||
const date = new Date(dateStr);
|
||||
const now = new Date();
|
||||
const diffMs = date.getTime() - now.getTime();
|
||||
|
||||
if (diffMs < 0) return 'Overdue';
|
||||
|
||||
const diffMinutes = Math.floor(diffMs / (1000 * 60));
|
||||
const diffHours = Math.floor(diffMinutes / 60);
|
||||
|
||||
if (diffMinutes < 60) return `in ${diffMinutes}m`;
|
||||
return `in ${diffHours}h ${diffMinutes % 60}m`;
|
||||
};
|
||||
|
||||
const formatDuration = (ms: number | null) => {
|
||||
if (!ms) return '-';
|
||||
if (ms < 1000) return `${ms}ms`;
|
||||
const seconds = Math.floor(ms / 1000);
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
if (minutes < 1) return `${seconds}s`;
|
||||
return `${minutes}m ${seconds % 60}s`;
|
||||
};
|
||||
|
||||
const formatInterval = (baseMinutes: number, jitterMinutes: number) => {
|
||||
const hours = Math.floor(baseMinutes / 60);
|
||||
const mins = baseMinutes % 60;
|
||||
let base = hours > 0 ? `${hours}h` : '';
|
||||
if (mins > 0) base += `${mins}m`;
|
||||
return `Every ${base} (+/- ${jitterMinutes}m jitter)`;
|
||||
};
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<Layout>
|
||||
<div className="text-center py-12">
|
||||
<div className="inline-block animate-spin rounded-full h-8 w-8 border-4 border-gray-400 border-t-transparent"></div>
|
||||
<p className="mt-2 text-sm text-gray-600">Loading schedule...</p>
|
||||
</div>
|
||||
</Layout>
|
||||
);
|
||||
}
|
||||
|
||||
if (!dispensary) {
|
||||
return (
|
||||
<Layout>
|
||||
<div className="text-center py-12">
|
||||
<p className="text-gray-600">Dispensary not found</p>
|
||||
</div>
|
||||
</Layout>
|
||||
);
|
||||
}
|
||||
|
||||
// Stats from history
|
||||
const successCount = history.filter(h => h.success).length;
|
||||
const failureCount = history.filter(h => !h.success).length;
|
||||
const lastSuccess = history.find(h => h.success);
|
||||
const avgDuration = history.length > 0
|
||||
? Math.round(history.reduce((sum, h) => sum + (h.durationMs || 0), 0) / history.length)
|
||||
: 0;
|
||||
|
||||
return (
|
||||
<Layout>
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between gap-4">
|
||||
<button
|
||||
onClick={() => navigate(`/dispensaries/${state}/${city}/${slug}`)}
|
||||
className="flex items-center gap-2 text-sm text-gray-600 hover:text-gray-900"
|
||||
>
|
||||
<ArrowLeft className="w-4 h-4" />
|
||||
Back to {dispensary.dba_name || dispensary.name}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Dispensary Info */}
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-6">
|
||||
<div className="flex items-start gap-4">
|
||||
<div className="p-3 bg-blue-50 rounded-lg">
|
||||
<Building2 className="w-8 h-8 text-blue-600" />
|
||||
</div>
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">
|
||||
{dispensary.dba_name || dispensary.name}
|
||||
</h1>
|
||||
<p className="text-sm text-gray-600 mt-1">
|
||||
{dispensary.city}, {dispensary.state} - Crawl Schedule & History
|
||||
</p>
|
||||
<div className="flex items-center gap-4 mt-2 text-sm text-gray-500">
|
||||
<span>Slug: {dispensary.slug}</span>
|
||||
{dispensary.menu_type && (
|
||||
<span className="px-2 py-0.5 bg-gray-100 rounded text-xs">
|
||||
{dispensary.menu_type}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Next Scheduled Crawl */}
|
||||
{nextSchedule && (
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-6">
|
||||
<h2 className="text-lg font-semibold text-gray-900 mb-4 flex items-center gap-2">
|
||||
<Clock className="w-5 h-5 text-blue-500" />
|
||||
Upcoming Schedule
|
||||
</h2>
|
||||
<div className="grid grid-cols-4 gap-6">
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Next Run</p>
|
||||
<p className="text-xl font-semibold text-blue-600">
|
||||
{formatTimeUntil(nextSchedule.nextRunAt)}
|
||||
</p>
|
||||
<p className="text-xs text-gray-400">
|
||||
{formatDate(nextSchedule.nextRunAt)}
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Interval</p>
|
||||
<p className="text-lg font-medium">
|
||||
{formatInterval(nextSchedule.baseIntervalMinutes, nextSchedule.jitterMinutes)}
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Last Run</p>
|
||||
<p className="text-lg font-medium">
|
||||
{formatTimeAgo(nextSchedule.lastRunAt)}
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Last Status</p>
|
||||
<p className={`text-lg font-medium ${
|
||||
nextSchedule.lastStatus === 'success' ? 'text-green-600' :
|
||||
nextSchedule.lastStatus === 'error' ? 'text-red-600' : 'text-gray-600'
|
||||
}`}>
|
||||
{nextSchedule.lastStatus || '-'}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Stats Summary */}
|
||||
<div className="grid grid-cols-4 gap-4">
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<CheckCircle className="w-8 h-8 text-green-500" />
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Successful Runs</p>
|
||||
<p className="text-2xl font-bold text-green-600">{successCount}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<XCircle className="w-8 h-8 text-red-500" />
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Failed Runs</p>
|
||||
<p className="text-2xl font-bold text-red-600">{failureCount}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<Timer className="w-8 h-8 text-blue-500" />
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Avg Duration</p>
|
||||
<p className="text-2xl font-bold">{formatDuration(avgDuration)}</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<Package className="w-8 h-8 text-purple-500" />
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Last Products Found</p>
|
||||
<p className="text-2xl font-bold">
|
||||
{lastSuccess?.productsFound?.toLocaleString() || '-'}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Crawl History Table */}
|
||||
<div className="bg-white rounded-lg border border-gray-200">
|
||||
<div className="p-4 border-b border-gray-200">
|
||||
<h2 className="text-lg font-semibold text-gray-900 flex items-center gap-2">
|
||||
<Calendar className="w-5 h-5 text-gray-500" />
|
||||
Crawl History
|
||||
</h2>
|
||||
</div>
|
||||
<div className="overflow-x-auto">
|
||||
<table className="table table-sm w-full">
|
||||
<thead className="bg-gray-50">
|
||||
<tr>
|
||||
<th>Status</th>
|
||||
<th>Started</th>
|
||||
<th>Duration</th>
|
||||
<th className="text-right">Products</th>
|
||||
<th>State</th>
|
||||
<th>Error</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{history.length === 0 ? (
|
||||
<tr>
|
||||
<td colSpan={6} className="text-center py-8 text-gray-500">
|
||||
No crawl history available
|
||||
</td>
|
||||
</tr>
|
||||
) : (
|
||||
history.map((item) => (
|
||||
<tr key={item.id} className="hover:bg-gray-50">
|
||||
<td>
|
||||
<span className={`inline-flex items-center gap-1 px-2 py-1 rounded text-xs font-medium ${
|
||||
item.success
|
||||
? 'bg-green-100 text-green-700'
|
||||
: 'bg-red-100 text-red-700'
|
||||
}`}>
|
||||
{item.success ? (
|
||||
<CheckCircle className="w-3 h-3" />
|
||||
) : (
|
||||
<XCircle className="w-3 h-3" />
|
||||
)}
|
||||
{item.success ? 'Success' : 'Failed'}
|
||||
</span>
|
||||
</td>
|
||||
<td>
|
||||
<div className="text-sm">{formatDate(item.startedAt)}</div>
|
||||
<div className="text-xs text-gray-400">{formatTimeAgo(item.startedAt)}</div>
|
||||
</td>
|
||||
<td className="font-mono text-sm">
|
||||
{formatDuration(item.durationMs)}
|
||||
</td>
|
||||
<td className="text-right font-mono text-sm">
|
||||
{item.productsFound?.toLocaleString() || '-'}
|
||||
</td>
|
||||
<td className="text-sm text-gray-600">
|
||||
{item.stateAtEnd || item.stateAtStart || '-'}
|
||||
</td>
|
||||
<td className="max-w-[200px]">
|
||||
{item.errorMessage ? (
|
||||
<span
|
||||
className="text-xs text-red-600 truncate block cursor-help"
|
||||
title={item.errorMessage}
|
||||
>
|
||||
{item.errorMessage.substring(0, 50)}...
|
||||
</span>
|
||||
) : '-'}
|
||||
</td>
|
||||
</tr>
|
||||
))
|
||||
)}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Layout>
|
||||
);
|
||||
}
|
||||
|
||||
export default DispensarySchedule;
|
||||
@@ -3,16 +3,15 @@ import { useNavigate } from 'react-router-dom';
|
||||
import { Layout } from '../components/Layout';
|
||||
import { api } from '../lib/api';
|
||||
import { trackProductClick } from '../lib/analytics';
|
||||
import { useStateFilter } from '../hooks/useStateFilter';
|
||||
import {
|
||||
Building2,
|
||||
MapPin,
|
||||
Package,
|
||||
DollarSign,
|
||||
RefreshCw,
|
||||
Search,
|
||||
TrendingUp,
|
||||
BarChart3,
|
||||
ChevronDown,
|
||||
} from 'lucide-react';
|
||||
|
||||
interface BrandData {
|
||||
@@ -26,28 +25,19 @@ interface BrandData {
|
||||
|
||||
export function IntelligenceBrands() {
|
||||
const navigate = useNavigate();
|
||||
const { selectedState, setSelectedState, stateParam, stateLabel, isAllStates } = useStateFilter();
|
||||
const [availableStates, setAvailableStates] = useState<string[]>([]);
|
||||
const [brands, setBrands] = useState<BrandData[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [searchTerm, setSearchTerm] = useState('');
|
||||
const [sortBy, setSortBy] = useState<'stores' | 'skus' | 'name' | 'states'>('stores');
|
||||
const [sortBy, setSortBy] = useState<'stores' | 'skus' | 'name'>('stores');
|
||||
|
||||
useEffect(() => {
|
||||
loadBrands();
|
||||
}, [stateParam]);
|
||||
|
||||
useEffect(() => {
|
||||
// Load available states
|
||||
api.getOrchestratorStates().then(data => {
|
||||
setAvailableStates(data.states?.map((s: any) => s.state) || []);
|
||||
}).catch(console.error);
|
||||
}, []);
|
||||
|
||||
const loadBrands = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const data = await api.getIntelligenceBrands({ limit: 500, state: stateParam });
|
||||
const data = await api.getIntelligenceBrands({ limit: 500 });
|
||||
setBrands(data.brands || []);
|
||||
} catch (error) {
|
||||
console.error('Failed to load brands:', error);
|
||||
@@ -68,8 +58,6 @@ export function IntelligenceBrands() {
|
||||
return b.skuCount - a.skuCount;
|
||||
case 'name':
|
||||
return a.brandName.localeCompare(b.brandName);
|
||||
case 'states':
|
||||
return b.states.length - a.states.length;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
@@ -101,60 +89,35 @@ export function IntelligenceBrands() {
|
||||
<Layout>
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex flex-col gap-4 sm:flex-row sm:items-center sm:justify-between">
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">Brands Intelligence</h1>
|
||||
<p className="text-sm text-gray-600 mt-1">
|
||||
Brand penetration and pricing analytics across markets
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex flex-wrap gap-2 items-center">
|
||||
{/* State Selector */}
|
||||
<div className="dropdown dropdown-end">
|
||||
<button tabIndex={0} className="btn btn-sm gap-2 bg-emerald-50 border-emerald-200 hover:bg-emerald-100">
|
||||
{stateLabel}
|
||||
<ChevronDown className="w-4 h-4" />
|
||||
</button>
|
||||
<ul tabIndex={0} className="dropdown-content z-50 menu p-2 shadow-lg bg-white rounded-box w-44 max-h-60 overflow-y-auto border border-gray-200">
|
||||
<li>
|
||||
<a onClick={() => setSelectedState(null)} className={isAllStates ? 'active bg-emerald-100' : ''}>
|
||||
All States
|
||||
</a>
|
||||
</li>
|
||||
<div className="divider my-1"></div>
|
||||
{availableStates.map((state) => (
|
||||
<li key={state}>
|
||||
<a onClick={() => setSelectedState(state)} className={selectedState === state ? 'active bg-emerald-100' : ''}>
|
||||
{state}
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{/* Page Navigation */}
|
||||
<div className="flex gap-1">
|
||||
<button
|
||||
className="btn btn-sm gap-1 bg-emerald-600 text-white hover:bg-emerald-700 border-emerald-600"
|
||||
>
|
||||
<Building2 className="w-4 h-4" />
|
||||
<span>Brands</span>
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/stores')}
|
||||
className="btn btn-sm gap-1 bg-white border-gray-300 text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
<MapPin className="w-4 h-4" />
|
||||
<span>Stores</span>
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/pricing')}
|
||||
className="btn btn-sm gap-1 bg-white border-gray-300 text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
<DollarSign className="w-4 h-4" />
|
||||
<span>Pricing</span>
|
||||
</button>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/pricing')}
|
||||
className="btn btn-sm btn-outline gap-1"
|
||||
>
|
||||
<DollarSign className="w-4 h-4" />
|
||||
Pricing
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/stores')}
|
||||
className="btn btn-sm btn-outline gap-1"
|
||||
>
|
||||
<MapPin className="w-4 h-4" />
|
||||
Stores
|
||||
</button>
|
||||
<button
|
||||
onClick={loadBrands}
|
||||
className="btn btn-sm btn-outline gap-2"
|
||||
>
|
||||
<RefreshCw className="w-4 h-4" />
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -206,32 +169,28 @@ export function IntelligenceBrands() {
|
||||
|
||||
{/* Top Brands Chart */}
|
||||
<div className="bg-white rounded-lg border border-gray-200 p-4">
|
||||
<h3 className="text-lg font-semibold text-gray-900 flex items-center gap-2 mb-4">
|
||||
<BarChart3 className="w-5 h-5 text-emerald-500" />
|
||||
<h3 className="text-lg font-semibold text-gray-900 mb-4 flex items-center gap-2">
|
||||
<BarChart3 className="w-5 h-5 text-blue-500" />
|
||||
Top 10 Brands by Store Count
|
||||
</h3>
|
||||
<div className="space-y-2">
|
||||
{topBrands.map((brand) => {
|
||||
const barWidth = Math.min((brand.storeCount / maxStoreCount) * 100, 100);
|
||||
return (
|
||||
<div key={brand.brandName} className="flex items-center gap-3">
|
||||
<span className="text-sm font-medium w-28 truncate shrink-0" title={brand.brandName}>
|
||||
{brand.brandName}
|
||||
</span>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="bg-gray-100 rounded h-5 overflow-hidden">
|
||||
<div
|
||||
className="bg-gradient-to-r from-emerald-400 to-emerald-500 h-5 rounded transition-all"
|
||||
style={{ width: `${barWidth}%` }}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<span className="text-sm font-mono font-semibold text-emerald-600 w-16 text-right shrink-0">
|
||||
{brand.storeCount}
|
||||
</span>
|
||||
{topBrands.map((brand, idx) => (
|
||||
<div key={brand.brandName} className="flex items-center gap-3">
|
||||
<span className="text-sm text-gray-500 w-6">{idx + 1}.</span>
|
||||
<span className="text-sm font-medium w-40 truncate" title={brand.brandName}>
|
||||
{brand.brandName}
|
||||
</span>
|
||||
<div className="flex-1 bg-gray-100 rounded-full h-4 relative">
|
||||
<div
|
||||
className="bg-blue-500 rounded-full h-4"
|
||||
style={{ width: `${(brand.storeCount / maxStoreCount) * 100}%` }}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
<span className="text-sm text-gray-600 w-16 text-right">
|
||||
{brand.storeCount} stores
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -254,7 +213,6 @@ export function IntelligenceBrands() {
|
||||
>
|
||||
<option value="stores">Sort by Stores</option>
|
||||
<option value="skus">Sort by SKUs</option>
|
||||
<option value="states">Sort by States</option>
|
||||
<option value="name">Sort by Name</option>
|
||||
</select>
|
||||
<span className="text-sm text-gray-500">
|
||||
|
||||
@@ -2,16 +2,15 @@ import { useEffect, useState } from 'react';
|
||||
import { useNavigate } from 'react-router-dom';
|
||||
import { Layout } from '../components/Layout';
|
||||
import { api } from '../lib/api';
|
||||
import { useStateFilter } from '../hooks/useStateFilter';
|
||||
import {
|
||||
DollarSign,
|
||||
Building2,
|
||||
MapPin,
|
||||
Package,
|
||||
RefreshCw,
|
||||
TrendingUp,
|
||||
TrendingDown,
|
||||
BarChart3,
|
||||
ChevronDown,
|
||||
} from 'lucide-react';
|
||||
|
||||
interface CategoryPricing {
|
||||
@@ -32,27 +31,18 @@ interface OverallPricing {
|
||||
|
||||
export function IntelligencePricing() {
|
||||
const navigate = useNavigate();
|
||||
const { selectedState, setSelectedState, stateParam, stateLabel, isAllStates } = useStateFilter();
|
||||
const [availableStates, setAvailableStates] = useState<string[]>([]);
|
||||
const [categories, setCategories] = useState<CategoryPricing[]>([]);
|
||||
const [overall, setOverall] = useState<OverallPricing | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
loadPricing();
|
||||
}, [stateParam]);
|
||||
|
||||
useEffect(() => {
|
||||
// Load available states
|
||||
api.getOrchestratorStates().then(data => {
|
||||
setAvailableStates(data.states?.map((s: any) => s.state) || []);
|
||||
}).catch(console.error);
|
||||
}, []);
|
||||
|
||||
const loadPricing = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
const data = await api.getIntelligencePricing({ state: stateParam });
|
||||
const data = await api.getIntelligencePricing();
|
||||
setCategories(data.byCategory || []);
|
||||
setOverall(data.overall || null);
|
||||
} catch (error) {
|
||||
@@ -86,60 +76,35 @@ export function IntelligencePricing() {
|
||||
<Layout>
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex flex-col gap-4 sm:flex-row sm:items-center sm:justify-between">
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">Pricing Intelligence</h1>
|
||||
<p className="text-sm text-gray-600 mt-1">
|
||||
Price distribution and trends by category
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex flex-wrap gap-2 items-center">
|
||||
{/* State Selector */}
|
||||
<div className="dropdown dropdown-end">
|
||||
<button tabIndex={0} className="btn btn-sm gap-2 bg-emerald-50 border-emerald-200 hover:bg-emerald-100">
|
||||
{stateLabel}
|
||||
<ChevronDown className="w-4 h-4" />
|
||||
</button>
|
||||
<ul tabIndex={0} className="dropdown-content z-50 menu p-2 shadow-lg bg-white rounded-box w-44 max-h-60 overflow-y-auto border border-gray-200">
|
||||
<li>
|
||||
<a onClick={() => setSelectedState(null)} className={isAllStates ? 'active bg-emerald-100' : ''}>
|
||||
All States
|
||||
</a>
|
||||
</li>
|
||||
<div className="divider my-1"></div>
|
||||
{availableStates.map((state) => (
|
||||
<li key={state}>
|
||||
<a onClick={() => setSelectedState(state)} className={selectedState === state ? 'active bg-emerald-100' : ''}>
|
||||
{state}
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{/* Page Navigation */}
|
||||
<div className="flex gap-1">
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/brands')}
|
||||
className="btn btn-sm gap-1 bg-white border-gray-300 text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
<Building2 className="w-4 h-4" />
|
||||
<span>Brands</span>
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/stores')}
|
||||
className="btn btn-sm gap-1 bg-white border-gray-300 text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
<MapPin className="w-4 h-4" />
|
||||
<span>Stores</span>
|
||||
</button>
|
||||
<button
|
||||
className="btn btn-sm gap-1 bg-emerald-600 text-white hover:bg-emerald-700 border-emerald-600"
|
||||
>
|
||||
<DollarSign className="w-4 h-4" />
|
||||
<span>Pricing</span>
|
||||
</button>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/brands')}
|
||||
className="btn btn-sm btn-outline gap-1"
|
||||
>
|
||||
<Building2 className="w-4 h-4" />
|
||||
Brands
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/stores')}
|
||||
className="btn btn-sm btn-outline gap-1"
|
||||
>
|
||||
<MapPin className="w-4 h-4" />
|
||||
Stores
|
||||
</button>
|
||||
<button
|
||||
onClick={loadPricing}
|
||||
className="btn btn-sm btn-outline gap-2"
|
||||
>
|
||||
<RefreshCw className="w-4 h-4" />
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -185,7 +150,7 @@ export function IntelligencePricing() {
|
||||
<div>
|
||||
<p className="text-sm text-gray-500">Products Priced</p>
|
||||
<p className="text-2xl font-bold">
|
||||
{(overall.totalProducts || 0).toLocaleString()}
|
||||
{overall.totalProducts.toLocaleString()}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -199,29 +164,43 @@ export function IntelligencePricing() {
|
||||
<BarChart3 className="w-5 h-5 text-green-500" />
|
||||
Average Price by Category
|
||||
</h3>
|
||||
<div className="space-y-2">
|
||||
{sortedCategories.slice(0, 12).map((cat) => {
|
||||
const maxPrice = Math.max(...sortedCategories.map(c => c.avgPrice || 0), 1);
|
||||
const barWidth = Math.min(((cat.avgPrice || 0) / maxPrice) * 100, 100);
|
||||
return (
|
||||
<div key={cat.category} className="flex items-center gap-3">
|
||||
<span className="text-sm font-medium w-28 truncate shrink-0" title={cat.category}>
|
||||
{cat.category || 'Unknown'}
|
||||
</span>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="bg-gray-100 rounded h-5 overflow-hidden">
|
||||
<div
|
||||
className="bg-gradient-to-r from-emerald-400 to-emerald-500 h-5 rounded transition-all"
|
||||
style={{ width: `${barWidth}%` }}
|
||||
/>
|
||||
</div>
|
||||
<div className="space-y-3">
|
||||
{sortedCategories.map((cat) => (
|
||||
<div key={cat.category} className="flex items-center gap-3">
|
||||
<span className="text-sm font-medium w-32 truncate" title={cat.category}>
|
||||
{cat.category || 'Unknown'}
|
||||
</span>
|
||||
<div className="flex-1 relative">
|
||||
{/* Price range bar */}
|
||||
<div className="bg-gray-100 rounded-full h-6 relative">
|
||||
{/* Min-Max range */}
|
||||
<div
|
||||
className="absolute top-0 h-6 bg-blue-100 rounded-full"
|
||||
style={{
|
||||
left: `${(cat.minPrice / (overall?.maxPrice || 100)) * 100}%`,
|
||||
width: `${((cat.maxPrice - cat.minPrice) / (overall?.maxPrice || 100)) * 100}%`,
|
||||
}}
|
||||
/>
|
||||
{/* Average marker */}
|
||||
<div
|
||||
className="absolute top-0 h-6 w-1 bg-green-500 rounded"
|
||||
style={{ left: `${(cat.avgPrice / (overall?.maxPrice || 100)) * 100}%` }}
|
||||
/>
|
||||
</div>
|
||||
<span className="text-sm font-mono font-semibold text-emerald-600 w-16 text-right shrink-0">
|
||||
{formatPrice(cat.avgPrice)}
|
||||
</div>
|
||||
<div className="flex gap-4 text-xs w-48">
|
||||
<span className="text-gray-500">
|
||||
Min: <span className="text-blue-600 font-mono">{formatPrice(cat.minPrice)}</span>
|
||||
</span>
|
||||
<span className="text-gray-500">
|
||||
Avg: <span className="text-green-600 font-mono font-bold">{formatPrice(cat.avgPrice)}</span>
|
||||
</span>
|
||||
<span className="text-gray-500">
|
||||
Max: <span className="text-orange-600 font-mono">{formatPrice(cat.maxPrice)}</span>
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -257,7 +236,7 @@ export function IntelligencePricing() {
|
||||
<span className="font-medium">{cat.category || 'Unknown'}</span>
|
||||
</td>
|
||||
<td className="text-center">
|
||||
<span className="font-mono">{(cat.productCount || 0).toLocaleString()}</span>
|
||||
<span className="font-mono">{cat.productCount.toLocaleString()}</span>
|
||||
</td>
|
||||
<td className="text-right">
|
||||
<span className="font-mono text-blue-600">{formatPrice(cat.minPrice)}</span>
|
||||
|
||||
@@ -8,6 +8,7 @@ import {
|
||||
Building2,
|
||||
DollarSign,
|
||||
Package,
|
||||
RefreshCw,
|
||||
Search,
|
||||
Clock,
|
||||
Activity,
|
||||
@@ -33,19 +34,12 @@ export function IntelligenceStores() {
|
||||
const [stores, setStores] = useState<StoreActivity[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [searchTerm, setSearchTerm] = useState('');
|
||||
const [availableStates, setAvailableStates] = useState<string[]>([]);
|
||||
const [localStates, setLocalStates] = useState<string[]>([]);
|
||||
|
||||
useEffect(() => {
|
||||
loadStores();
|
||||
}, [selectedState]);
|
||||
|
||||
useEffect(() => {
|
||||
// Load available states from orchestrator API
|
||||
api.getOrchestratorStates().then(data => {
|
||||
setAvailableStates(data.states?.map((s: any) => s.state) || []);
|
||||
}).catch(console.error);
|
||||
}, []);
|
||||
|
||||
const loadStores = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
@@ -54,6 +48,10 @@ export function IntelligenceStores() {
|
||||
limit: 500,
|
||||
});
|
||||
setStores(data.stores || []);
|
||||
|
||||
// Extract unique states from response for dropdown counts
|
||||
const uniqueStates = [...new Set(data.stores.map((s: StoreActivity) => s.state))].sort();
|
||||
setLocalStates(uniqueStates);
|
||||
} catch (error) {
|
||||
console.error('Failed to load stores:', error);
|
||||
} finally {
|
||||
@@ -99,72 +97,47 @@ export function IntelligenceStores() {
|
||||
);
|
||||
}
|
||||
|
||||
// Calculate stats with null safety
|
||||
const totalSKUs = stores.reduce((sum, s) => sum + (s.skuCount || 0), 0);
|
||||
const totalSnapshots = stores.reduce((sum, s) => sum + (s.snapshotCount || 0), 0);
|
||||
const storesWithFrequency = stores.filter(s => s.crawlFrequencyHours != null);
|
||||
const avgFrequency = storesWithFrequency.length > 0
|
||||
? storesWithFrequency.reduce((sum, s) => sum + (s.crawlFrequencyHours || 0), 0) / storesWithFrequency.length
|
||||
// Calculate stats
|
||||
const totalSKUs = stores.reduce((sum, s) => sum + s.skuCount, 0);
|
||||
const totalSnapshots = stores.reduce((sum, s) => sum + s.snapshotCount, 0);
|
||||
const avgFrequency = stores.filter(s => s.crawlFrequencyHours).length > 0
|
||||
? stores.filter(s => s.crawlFrequencyHours).reduce((sum, s) => sum + (s.crawlFrequencyHours || 0), 0) /
|
||||
stores.filter(s => s.crawlFrequencyHours).length
|
||||
: 0;
|
||||
|
||||
return (
|
||||
<Layout>
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex flex-col gap-4 sm:flex-row sm:items-center sm:justify-between">
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">Store Activity</h1>
|
||||
<p className="text-sm text-gray-600 mt-1">
|
||||
Per-store SKU counts, snapshots, and crawl frequency
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex flex-wrap gap-2 items-center">
|
||||
{/* State Selector */}
|
||||
<div className="dropdown dropdown-end">
|
||||
<button tabIndex={0} className="btn btn-sm gap-2 bg-emerald-50 border-emerald-200 hover:bg-emerald-100">
|
||||
{stateLabel}
|
||||
<ChevronDown className="w-4 h-4" />
|
||||
</button>
|
||||
<ul tabIndex={0} className="dropdown-content z-50 menu p-2 shadow-lg bg-white rounded-box w-44 max-h-60 overflow-y-auto border border-gray-200">
|
||||
<li>
|
||||
<a onClick={() => setSelectedState(null)} className={isAllStates ? 'active bg-emerald-100' : ''}>
|
||||
All States
|
||||
</a>
|
||||
</li>
|
||||
<div className="divider my-1"></div>
|
||||
{availableStates.map((state) => (
|
||||
<li key={state}>
|
||||
<a onClick={() => setSelectedState(state)} className={selectedState === state ? 'active bg-emerald-100' : ''}>
|
||||
{state}
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
{/* Page Navigation */}
|
||||
<div className="flex gap-1">
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/brands')}
|
||||
className="btn btn-sm gap-1 bg-white border-gray-300 text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
<Building2 className="w-4 h-4" />
|
||||
<span>Brands</span>
|
||||
</button>
|
||||
<button
|
||||
className="btn btn-sm gap-1 bg-emerald-600 text-white hover:bg-emerald-700 border-emerald-600"
|
||||
>
|
||||
<MapPin className="w-4 h-4" />
|
||||
<span>Stores</span>
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/pricing')}
|
||||
className="btn btn-sm gap-1 bg-white border-gray-300 text-gray-700 hover:bg-gray-100"
|
||||
>
|
||||
<DollarSign className="w-4 h-4" />
|
||||
<span>Pricing</span>
|
||||
</button>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/brands')}
|
||||
className="btn btn-sm btn-outline gap-1"
|
||||
>
|
||||
<Building2 className="w-4 h-4" />
|
||||
Brands
|
||||
</button>
|
||||
<button
|
||||
onClick={() => navigate('/admin/intelligence/pricing')}
|
||||
className="btn btn-sm btn-outline gap-1"
|
||||
>
|
||||
<DollarSign className="w-4 h-4" />
|
||||
Pricing
|
||||
</button>
|
||||
<button
|
||||
onClick={loadStores}
|
||||
className="btn btn-sm btn-outline gap-2"
|
||||
>
|
||||
<RefreshCw className="w-4 h-4" />
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -220,6 +193,26 @@ export function IntelligenceStores() {
|
||||
className="input input-bordered input-sm w-full pl-10"
|
||||
/>
|
||||
</div>
|
||||
<div className="dropdown">
|
||||
<button tabIndex={0} className="btn btn-sm btn-outline gap-2">
|
||||
{stateLabel}
|
||||
<ChevronDown className="w-4 h-4" />
|
||||
</button>
|
||||
<ul tabIndex={0} className="dropdown-content z-[1] menu p-2 shadow bg-base-100 rounded-box w-40 max-h-60 overflow-y-auto">
|
||||
<li>
|
||||
<a onClick={() => setSelectedState(null)} className={isAllStates ? 'active' : ''}>
|
||||
All States
|
||||
</a>
|
||||
</li>
|
||||
{localStates.map(state => (
|
||||
<li key={state}>
|
||||
<a onClick={() => setSelectedState(state)} className={selectedState === state ? 'active' : ''}>
|
||||
{state}
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
<span className="text-sm text-gray-500">
|
||||
Showing {filteredStores.length} of {stores.length} stores
|
||||
</span>
|
||||
@@ -253,7 +246,7 @@ export function IntelligenceStores() {
|
||||
<tr
|
||||
key={store.id}
|
||||
className="hover:bg-gray-50 cursor-pointer"
|
||||
onClick={() => navigate(`/stores/list/${store.id}`)}
|
||||
onClick={() => navigate(`/admin/orchestrator/stores?storeId=${store.id}`)}
|
||||
>
|
||||
<td>
|
||||
<span className="font-medium">{store.name}</span>
|
||||
@@ -269,10 +262,10 @@ export function IntelligenceStores() {
|
||||
)}
|
||||
</td>
|
||||
<td className="text-center">
|
||||
<span className="font-mono">{(store.skuCount || 0).toLocaleString()}</span>
|
||||
<span className="font-mono">{store.skuCount.toLocaleString()}</span>
|
||||
</td>
|
||||
<td className="text-center">
|
||||
<span className="font-mono">{(store.snapshotCount || 0).toLocaleString()}</span>
|
||||
<span className="font-mono">{store.snapshotCount.toLocaleString()}</span>
|
||||
</td>
|
||||
<td>
|
||||
<span className={store.lastCrawl ? 'text-green-600' : 'text-gray-400'}>
|
||||
|
||||
1025
cannaiq/src/pages/JobQueue.tsx
Normal file
1025
cannaiq/src/pages/JobQueue.tsx
Normal file
File diff suppressed because it is too large
Load Diff
@@ -8,6 +8,7 @@
|
||||
import { useState, useEffect } from 'react';
|
||||
import { useNavigate } from 'react-router-dom';
|
||||
import { Layout } from '../components/Layout';
|
||||
import { StateBadge } from '../components/StateSelector';
|
||||
import { useStateStore } from '../store/stateStore';
|
||||
import { api } from '../lib/api';
|
||||
import {
|
||||
@@ -20,6 +21,7 @@ import {
|
||||
DollarSign,
|
||||
MapPin,
|
||||
ArrowRight,
|
||||
RefreshCw,
|
||||
AlertCircle
|
||||
} from 'lucide-react';
|
||||
|
||||
@@ -203,6 +205,7 @@ export default function NationalDashboard() {
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [summary, setSummary] = useState<NationalSummary | null>(null);
|
||||
const [refreshing, setRefreshing] = useState(false);
|
||||
|
||||
const fetchData = async () => {
|
||||
setLoading(true);
|
||||
@@ -227,6 +230,18 @@ export default function NationalDashboard() {
|
||||
fetchData();
|
||||
}, []);
|
||||
|
||||
const handleRefreshMetrics = async () => {
|
||||
setRefreshing(true);
|
||||
try {
|
||||
await api.post('/api/admin/states/refresh-metrics');
|
||||
await fetchData();
|
||||
} catch (err) {
|
||||
console.error('Failed to refresh metrics:', err);
|
||||
} finally {
|
||||
setRefreshing(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleStateClick = (stateCode: string) => {
|
||||
setSelectedState(stateCode);
|
||||
navigate(`/national/state/${stateCode}`);
|
||||
@@ -263,11 +278,24 @@ export default function NationalDashboard() {
|
||||
<Layout>
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">National Dashboard</h1>
|
||||
<p className="text-gray-500 mt-1">
|
||||
Multi-state cannabis market intelligence
|
||||
</p>
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900">National Dashboard</h1>
|
||||
<p className="text-gray-500 mt-1">
|
||||
Multi-state cannabis market intelligence
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
<StateBadge />
|
||||
<button
|
||||
onClick={handleRefreshMetrics}
|
||||
disabled={refreshing}
|
||||
className="flex items-center gap-2 px-3 py-2 text-sm text-gray-600 hover:text-gray-900 border border-gray-200 rounded-lg hover:bg-gray-50 disabled:opacity-50"
|
||||
>
|
||||
<RefreshCw className={`w-4 h-4 ${refreshing ? 'animate-spin' : ''}`} />
|
||||
Refresh Metrics
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Summary Cards */}
|
||||
@@ -275,7 +303,7 @@ export default function NationalDashboard() {
|
||||
<>
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-4">
|
||||
<MetricCard
|
||||
title="Regions (US + CA)"
|
||||
title="Active States"
|
||||
value={summary.activeStates}
|
||||
icon={Globe}
|
||||
/>
|
||||
|
||||
@@ -96,8 +96,7 @@ export function Proxies() {
|
||||
try {
|
||||
const response = await api.testAllProxies();
|
||||
setNotification({ message: 'Proxy testing job started', type: 'success' });
|
||||
// Use response.total if available, otherwise proxies.length, but immediately poll for accurate count
|
||||
setActiveJob({ id: response.jobId, status: 'pending', tested_proxies: 0, total_proxies: response.total || proxies.length || 0, passed_proxies: 0, failed_proxies: 0 });
|
||||
setActiveJob({ id: response.jobId, status: 'pending', tested_proxies: 0, total_proxies: proxies.length, passed_proxies: 0, failed_proxies: 0 });
|
||||
} catch (error: any) {
|
||||
setNotification({ message: 'Failed to start testing: ' + error.message, type: 'error' });
|
||||
}
|
||||
|
||||
@@ -153,6 +153,29 @@ export function StoreDetailPage() {
|
||||
Back to Stores
|
||||
</button>
|
||||
|
||||
{/* Update Button */}
|
||||
<div className="relative">
|
||||
<button
|
||||
onClick={() => setShowUpdateDropdown(!showUpdateDropdown)}
|
||||
disabled={isUpdating}
|
||||
className="flex items-center gap-2 px-4 py-2 text-sm font-medium text-white bg-blue-600 hover:bg-blue-700 rounded-lg disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
<RefreshCw className={`w-4 h-4 ${isUpdating ? 'animate-spin' : ''}`} />
|
||||
{isUpdating ? 'Crawling...' : 'Crawl Now'}
|
||||
{!isUpdating && <ChevronDown className="w-4 h-4" />}
|
||||
</button>
|
||||
|
||||
{showUpdateDropdown && !isUpdating && (
|
||||
<div className="absolute right-0 mt-2 w-48 bg-white rounded-lg shadow-lg border border-gray-200 z-10">
|
||||
<button
|
||||
onClick={handleCrawl}
|
||||
className="w-full text-left px-4 py-2 text-sm text-gray-700 hover:bg-gray-100 rounded-lg"
|
||||
>
|
||||
Start Full Crawl
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Store Header */}
|
||||
@@ -177,7 +200,7 @@ export function StoreDetailPage() {
|
||||
<div className="flex items-center gap-2 text-sm text-gray-600 bg-gray-50 px-4 py-2 rounded-lg">
|
||||
<Clock className="w-4 h-4" />
|
||||
<div>
|
||||
<span className="font-medium">Last Updated:</span>
|
||||
<span className="font-medium">Last Crawl:</span>
|
||||
<span className="ml-2">
|
||||
{lastCrawl?.completed_at
|
||||
? new Date(lastCrawl.completed_at).toLocaleDateString('en-US', {
|
||||
@@ -189,6 +212,15 @@ export function StoreDetailPage() {
|
||||
})
|
||||
: 'Never'}
|
||||
</span>
|
||||
{lastCrawl?.status && (
|
||||
<span className={`ml-2 px-2 py-0.5 rounded text-xs ${
|
||||
lastCrawl.status === 'completed' ? 'bg-green-100 text-green-800' :
|
||||
lastCrawl.status === 'failed' ? 'bg-red-100 text-red-800' :
|
||||
'bg-yellow-100 text-yellow-800'
|
||||
}`}>
|
||||
{lastCrawl.status}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -250,8 +282,8 @@ export function StoreDetailPage() {
|
||||
setStockFilter('in_stock');
|
||||
setSearchQuery('');
|
||||
}}
|
||||
className={`bg-white rounded-lg border p-4 hover:border-gray-300 hover:shadow-md transition-all cursor-pointer text-left ${
|
||||
stockFilter === 'in_stock' ? 'border-gray-400' : 'border-gray-200'
|
||||
className={`bg-white rounded-lg border p-4 hover:border-blue-300 hover:shadow-md transition-all cursor-pointer text-left ${
|
||||
stockFilter === 'in_stock' ? 'border-blue-500' : 'border-gray-200'
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
@@ -271,8 +303,8 @@ export function StoreDetailPage() {
|
||||
setStockFilter('out_of_stock');
|
||||
setSearchQuery('');
|
||||
}}
|
||||
className={`bg-white rounded-lg border p-4 hover:border-gray-300 hover:shadow-md transition-all cursor-pointer text-left ${
|
||||
stockFilter === 'out_of_stock' ? 'border-gray-400' : 'border-gray-200'
|
||||
className={`bg-white rounded-lg border p-4 hover:border-blue-300 hover:shadow-md transition-all cursor-pointer text-left ${
|
||||
stockFilter === 'out_of_stock' ? 'border-blue-500' : 'border-gray-200'
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
@@ -288,8 +320,8 @@ export function StoreDetailPage() {
|
||||
|
||||
<button
|
||||
onClick={() => setActiveTab('brands')}
|
||||
className={`bg-white rounded-lg border p-4 hover:border-gray-300 hover:shadow-md transition-all cursor-pointer text-left ${
|
||||
activeTab === 'brands' ? 'border-gray-400' : 'border-gray-200'
|
||||
className={`bg-white rounded-lg border p-4 hover:border-blue-300 hover:shadow-md transition-all cursor-pointer text-left ${
|
||||
activeTab === 'brands' ? 'border-blue-500' : 'border-gray-200'
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
@@ -305,8 +337,8 @@ export function StoreDetailPage() {
|
||||
|
||||
<button
|
||||
onClick={() => setActiveTab('categories')}
|
||||
className={`bg-white rounded-lg border p-4 hover:border-gray-300 hover:shadow-md transition-all cursor-pointer text-left ${
|
||||
activeTab === 'categories' ? 'border-gray-400' : 'border-gray-200'
|
||||
className={`bg-white rounded-lg border p-4 hover:border-blue-300 hover:shadow-md transition-all cursor-pointer text-left ${
|
||||
activeTab === 'categories' ? 'border-blue-500' : 'border-gray-200'
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
@@ -332,7 +364,7 @@ export function StoreDetailPage() {
|
||||
}}
|
||||
className={`py-4 px-2 text-sm font-medium border-b-2 ${
|
||||
activeTab === 'products'
|
||||
? 'border-gray-800 text-gray-900'
|
||||
? 'border-blue-600 text-blue-600'
|
||||
: 'border-transparent text-gray-600 hover:text-gray-900'
|
||||
}`}
|
||||
>
|
||||
@@ -342,7 +374,7 @@ export function StoreDetailPage() {
|
||||
onClick={() => setActiveTab('brands')}
|
||||
className={`py-4 px-2 text-sm font-medium border-b-2 ${
|
||||
activeTab === 'brands'
|
||||
? 'border-gray-800 text-gray-900'
|
||||
? 'border-blue-600 text-blue-600'
|
||||
: 'border-transparent text-gray-600 hover:text-gray-900'
|
||||
}`}
|
||||
>
|
||||
@@ -352,7 +384,7 @@ export function StoreDetailPage() {
|
||||
onClick={() => setActiveTab('categories')}
|
||||
className={`py-4 px-2 text-sm font-medium border-b-2 ${
|
||||
activeTab === 'categories'
|
||||
? 'border-gray-800 text-gray-900'
|
||||
? 'border-blue-600 text-blue-600'
|
||||
: 'border-transparent text-gray-600 hover:text-gray-900'
|
||||
}`}
|
||||
>
|
||||
@@ -401,7 +433,7 @@ export function StoreDetailPage() {
|
||||
|
||||
{productsLoading ? (
|
||||
<div className="text-center py-8">
|
||||
<div className="inline-block animate-spin rounded-full h-6 w-6 border-4 border-gray-400 border-t-transparent"></div>
|
||||
<div className="inline-block animate-spin rounded-full h-6 w-6 border-4 border-blue-500 border-t-transparent"></div>
|
||||
<p className="mt-2 text-sm text-gray-600">Loading products...</p>
|
||||
</div>
|
||||
) : products.length === 0 ? (
|
||||
@@ -453,9 +485,9 @@ export function StoreDetailPage() {
|
||||
<div className="line-clamp-2" title={product.brand || '-'}>{product.brand || '-'}</div>
|
||||
</td>
|
||||
<td className="whitespace-nowrap">
|
||||
<span className="text-xs text-gray-500 bg-gray-100 px-1.5 py-0.5 rounded">{product.type || '-'}</span>
|
||||
<span className="badge badge-ghost badge-sm">{product.type || '-'}</span>
|
||||
{product.subcategory && (
|
||||
<span className="text-xs text-gray-500 bg-gray-100 px-1.5 py-0.5 rounded ml-1">{product.subcategory}</span>
|
||||
<span className="badge badge-ghost badge-sm ml-1">{product.subcategory}</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="text-right font-semibold whitespace-nowrap">
|
||||
@@ -468,14 +500,21 @@ export function StoreDetailPage() {
|
||||
`$${product.regular_price}`
|
||||
) : '-'}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
{product.thc_percentage ? `${product.thc_percentage}%` : '-'}
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.thc_percentage ? (
|
||||
<span className="badge badge-success badge-sm">{product.thc_percentage}%</span>
|
||||
) : '-'}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
{product.stock_status === 'in_stock' ? 'In Stock' :
|
||||
product.stock_status === 'out_of_stock' ? 'Out' : '-'}
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.stock_status === 'in_stock' ? (
|
||||
<span className="badge badge-success badge-sm">In Stock</span>
|
||||
) : product.stock_status === 'out_of_stock' ? (
|
||||
<span className="badge badge-error badge-sm">Out</span>
|
||||
) : (
|
||||
<span className="badge badge-warning badge-sm">Unknown</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="text-center whitespace-nowrap text-sm text-gray-700">
|
||||
<td className="text-center whitespace-nowrap">
|
||||
{product.total_quantity != null ? product.total_quantity : '-'}
|
||||
</td>
|
||||
<td className="whitespace-nowrap text-xs text-gray-500">
|
||||
|
||||
@@ -12,15 +12,10 @@ import {
|
||||
Search,
|
||||
ChevronDown,
|
||||
ChevronUp,
|
||||
ChevronLeft,
|
||||
ChevronRight,
|
||||
Gauge,
|
||||
Users,
|
||||
Square,
|
||||
Plus,
|
||||
X,
|
||||
Calendar,
|
||||
Trash2,
|
||||
Zap,
|
||||
} from 'lucide-react';
|
||||
|
||||
interface Task {
|
||||
@@ -70,313 +65,6 @@ interface TaskCounts {
|
||||
stale: number;
|
||||
}
|
||||
|
||||
interface Store {
|
||||
id: number;
|
||||
name: string;
|
||||
state_code: string;
|
||||
crawl_enabled: boolean;
|
||||
}
|
||||
|
||||
interface CreateTaskModalProps {
|
||||
isOpen: boolean;
|
||||
onClose: () => void;
|
||||
onTaskCreated: () => void;
|
||||
}
|
||||
|
||||
const TASK_ROLES = [
|
||||
{ id: 'product_refresh', name: 'Product Resync', description: 'Re-crawl products for price/stock changes' },
|
||||
{ id: 'product_discovery', name: 'Product Discovery', description: 'Initial crawl for new dispensaries' },
|
||||
{ id: 'store_discovery', name: 'Store Discovery', description: 'Discover new dispensary locations' },
|
||||
{ id: 'entry_point_discovery', name: 'Entry Point Discovery', description: 'Resolve platform IDs from menu URLs' },
|
||||
{ id: 'analytics_refresh', name: 'Analytics Refresh', description: 'Refresh materialized views' },
|
||||
];
|
||||
|
||||
function CreateTaskModal({ isOpen, onClose, onTaskCreated }: CreateTaskModalProps) {
|
||||
const [role, setRole] = useState('product_refresh');
|
||||
const [priority, setPriority] = useState(10);
|
||||
const [scheduleType, setScheduleType] = useState<'now' | 'scheduled'>('now');
|
||||
const [scheduledFor, setScheduledFor] = useState('');
|
||||
const [stores, setStores] = useState<Store[]>([]);
|
||||
const [storeSearch, setStoreSearch] = useState('');
|
||||
const [selectedStores, setSelectedStores] = useState<Store[]>([]);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [storesLoading, setStoresLoading] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (isOpen) {
|
||||
fetchStores();
|
||||
}
|
||||
}, [isOpen]);
|
||||
|
||||
const fetchStores = async () => {
|
||||
setStoresLoading(true);
|
||||
try {
|
||||
const res = await api.get('/api/stores?limit=500');
|
||||
setStores(res.data.stores || res.data || []);
|
||||
} catch (err) {
|
||||
console.error('Failed to fetch stores:', err);
|
||||
} finally {
|
||||
setStoresLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const filteredStores = stores.filter(s =>
|
||||
s.name.toLowerCase().includes(storeSearch.toLowerCase()) ||
|
||||
s.state_code?.toLowerCase().includes(storeSearch.toLowerCase())
|
||||
);
|
||||
|
||||
const toggleStore = (store: Store) => {
|
||||
if (selectedStores.find(s => s.id === store.id)) {
|
||||
setSelectedStores(selectedStores.filter(s => s.id !== store.id));
|
||||
} else {
|
||||
setSelectedStores([...selectedStores, store]);
|
||||
}
|
||||
};
|
||||
|
||||
const selectAll = () => setSelectedStores(filteredStores);
|
||||
const clearAll = () => setSelectedStores([]);
|
||||
|
||||
const handleSubmit = async () => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
const scheduledDate = scheduleType === 'scheduled' && scheduledFor
|
||||
? new Date(scheduledFor).toISOString()
|
||||
: undefined;
|
||||
|
||||
if (role === 'store_discovery' || role === 'analytics_refresh') {
|
||||
await api.post('/api/tasks', {
|
||||
role,
|
||||
priority,
|
||||
scheduled_for: scheduledDate,
|
||||
platform: 'dutchie',
|
||||
});
|
||||
} else if (selectedStores.length === 0) {
|
||||
setError('Please select at least one store');
|
||||
setLoading(false);
|
||||
return;
|
||||
} else {
|
||||
for (const store of selectedStores) {
|
||||
await api.post('/api/tasks', {
|
||||
role,
|
||||
dispensary_id: store.id,
|
||||
priority,
|
||||
scheduled_for: scheduledDate,
|
||||
platform: 'dutchie',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
onTaskCreated();
|
||||
onClose();
|
||||
setSelectedStores([]);
|
||||
setPriority(10);
|
||||
setScheduleType('now');
|
||||
setScheduledFor('');
|
||||
} catch (err: any) {
|
||||
setError(err.response?.data?.error || err.message || 'Failed to create task');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
if (!isOpen) return null;
|
||||
|
||||
const needsStore = role !== 'store_discovery' && role !== 'analytics_refresh';
|
||||
|
||||
return (
|
||||
<div className="fixed inset-0 z-50 overflow-y-auto">
|
||||
<div className="flex min-h-full items-center justify-center p-4">
|
||||
<div className="fixed inset-0 bg-black/50" onClick={onClose} />
|
||||
<div className="relative bg-white rounded-xl shadow-xl max-w-2xl w-full max-h-[90vh] overflow-hidden">
|
||||
<div className="px-6 py-4 border-b border-gray-200 flex items-center justify-between">
|
||||
<h2 className="text-lg font-semibold text-gray-900">Create New Task</h2>
|
||||
<button onClick={onClose} className="p-1 hover:bg-gray-100 rounded">
|
||||
<X className="w-5 h-5 text-gray-500" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div className="px-6 py-4 space-y-6 overflow-y-auto max-h-[calc(90vh-140px)]">
|
||||
{error && (
|
||||
<div className="bg-red-50 border border-red-200 rounded-lg p-3 text-red-700 text-sm">
|
||||
{error}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">Task Role</label>
|
||||
<div className="grid grid-cols-1 gap-2">
|
||||
{TASK_ROLES.map(r => (
|
||||
<button
|
||||
key={r.id}
|
||||
onClick={() => setRole(r.id)}
|
||||
className={`flex items-start gap-3 p-3 rounded-lg border text-left transition-colors ${
|
||||
role === r.id
|
||||
? 'border-emerald-500 bg-emerald-50'
|
||||
: 'border-gray-200 hover:border-gray-300'
|
||||
}`}
|
||||
>
|
||||
<div className={`w-4 h-4 rounded-full border-2 mt-0.5 flex-shrink-0 ${
|
||||
role === r.id ? 'border-emerald-500 bg-emerald-500' : 'border-gray-300'
|
||||
}`}>
|
||||
{role === r.id && (
|
||||
<div className="w-full h-full flex items-center justify-center">
|
||||
<div className="w-1.5 h-1.5 bg-white rounded-full" />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div>
|
||||
<p className="font-medium text-gray-900">{r.name}</p>
|
||||
<p className="text-xs text-gray-500">{r.description}</p>
|
||||
</div>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{needsStore && (
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
Select Stores ({selectedStores.length} selected)
|
||||
</label>
|
||||
<div className="border border-gray-200 rounded-lg overflow-hidden">
|
||||
<div className="p-2 border-b border-gray-200 bg-gray-50">
|
||||
<div className="relative">
|
||||
<Search className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-gray-400" />
|
||||
<input
|
||||
type="text"
|
||||
value={storeSearch}
|
||||
onChange={(e) => setStoreSearch(e.target.value)}
|
||||
placeholder="Search stores..."
|
||||
className="w-full pl-9 pr-3 py-2 text-sm border border-gray-200 rounded"
|
||||
/>
|
||||
</div>
|
||||
<div className="flex gap-2 mt-2">
|
||||
<button onClick={selectAll} className="text-xs text-emerald-600 hover:underline">
|
||||
Select all ({filteredStores.length})
|
||||
</button>
|
||||
<span className="text-gray-300">|</span>
|
||||
<button onClick={clearAll} className="text-xs text-gray-500 hover:underline">
|
||||
Clear
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div className="max-h-48 overflow-y-auto">
|
||||
{storesLoading ? (
|
||||
<div className="p-4 text-center text-gray-500">
|
||||
<RefreshCw className="w-5 h-5 animate-spin mx-auto mb-1" />
|
||||
Loading stores...
|
||||
</div>
|
||||
) : filteredStores.length === 0 ? (
|
||||
<div className="p-4 text-center text-gray-500">No stores found</div>
|
||||
) : (
|
||||
filteredStores.map(store => (
|
||||
<label key={store.id} className="flex items-center gap-3 px-3 py-2 hover:bg-gray-50 cursor-pointer">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={!!selectedStores.find(s => s.id === store.id)}
|
||||
onChange={() => toggleStore(store)}
|
||||
className="w-4 h-4 text-emerald-600 rounded"
|
||||
/>
|
||||
<div className="flex-1 min-w-0">
|
||||
<p className="text-sm text-gray-900 truncate">{store.name}</p>
|
||||
<p className="text-xs text-gray-500">{store.state_code}</p>
|
||||
</div>
|
||||
{!store.crawl_enabled && (
|
||||
<span className="text-xs text-orange-600 bg-orange-50 px-1.5 py-0.5 rounded">disabled</span>
|
||||
)}
|
||||
</label>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">Priority: {priority}</label>
|
||||
<input
|
||||
type="range"
|
||||
min="0"
|
||||
max="100"
|
||||
value={priority}
|
||||
onChange={(e) => setPriority(parseInt(e.target.value))}
|
||||
className="w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer"
|
||||
/>
|
||||
<div className="flex justify-between text-xs text-gray-500 mt-1">
|
||||
<span>0 (Low)</span>
|
||||
<span>10 (Normal)</span>
|
||||
<span>50 (High)</span>
|
||||
<span>100 (Urgent)</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">Schedule</label>
|
||||
<div className="flex gap-4">
|
||||
<label className="flex items-center gap-2 cursor-pointer">
|
||||
<input
|
||||
type="radio"
|
||||
name="schedule"
|
||||
checked={scheduleType === 'now'}
|
||||
onChange={() => setScheduleType('now')}
|
||||
className="w-4 h-4 text-emerald-600"
|
||||
/>
|
||||
<span className="text-sm text-gray-700">Run immediately</span>
|
||||
</label>
|
||||
<label className="flex items-center gap-2 cursor-pointer">
|
||||
<input
|
||||
type="radio"
|
||||
name="schedule"
|
||||
checked={scheduleType === 'scheduled'}
|
||||
onChange={() => setScheduleType('scheduled')}
|
||||
className="w-4 h-4 text-emerald-600"
|
||||
/>
|
||||
<span className="text-sm text-gray-700">Schedule for later</span>
|
||||
</label>
|
||||
</div>
|
||||
{scheduleType === 'scheduled' && (
|
||||
<div className="mt-3 relative">
|
||||
<Calendar className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-gray-400" />
|
||||
<input
|
||||
type="datetime-local"
|
||||
value={scheduledFor}
|
||||
onChange={(e) => setScheduledFor(e.target.value)}
|
||||
className="w-full pl-9 pr-3 py-2 text-sm border border-gray-200 rounded"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="px-6 py-4 border-t border-gray-200 bg-gray-50 flex items-center justify-between">
|
||||
<div className="text-sm text-gray-500">
|
||||
{needsStore ? (
|
||||
selectedStores.length > 0 ? `Will create ${selectedStores.length} task${selectedStores.length > 1 ? 's' : ''}` : 'Select stores to create tasks'
|
||||
) : 'Will create 1 task'}
|
||||
</div>
|
||||
<div className="flex gap-3">
|
||||
<button onClick={onClose} className="px-4 py-2 text-sm text-gray-700 hover:bg-gray-100 rounded-lg">
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
onClick={handleSubmit}
|
||||
disabled={loading || (needsStore && selectedStores.length === 0)}
|
||||
className="px-4 py-2 text-sm bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
|
||||
>
|
||||
{loading && <RefreshCw className="w-4 h-4 animate-spin" />}
|
||||
Create Task{selectedStores.length > 1 ? 's' : ''}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const ROLES = [
|
||||
'store_discovery',
|
||||
'entry_point_discovery',
|
||||
@@ -394,27 +82,6 @@ const STATUS_COLORS: Record<string, string> = {
|
||||
stale: 'bg-gray-100 text-gray-800',
|
||||
};
|
||||
|
||||
const getStatusIcon = (status: string, poolPaused: boolean): React.ReactNode => {
|
||||
switch (status) {
|
||||
case 'pending':
|
||||
return <Clock className="w-4 h-4" />;
|
||||
case 'claimed':
|
||||
return <PlayCircle className="w-4 h-4" />;
|
||||
case 'running':
|
||||
// Don't spin when pool is paused
|
||||
return <RefreshCw className={`w-4 h-4 ${!poolPaused ? 'animate-spin' : ''}`} />;
|
||||
case 'completed':
|
||||
return <CheckCircle2 className="w-4 h-4" />;
|
||||
case 'failed':
|
||||
return <XCircle className="w-4 h-4" />;
|
||||
case 'stale':
|
||||
return <AlertTriangle className="w-4 h-4" />;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
// Static version for summary cards (always shows animation)
|
||||
const STATUS_ICONS: Record<string, React.ReactNode> = {
|
||||
pending: <Clock className="w-4 h-4" />,
|
||||
claimed: <PlayCircle className="w-4 h-4" />,
|
||||
@@ -449,12 +116,6 @@ export default function TasksDashboard() {
|
||||
const [capacity, setCapacity] = useState<CapacityMetric[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [poolPaused, setPoolPaused] = useState(false);
|
||||
const [showCreateModal, setShowCreateModal] = useState(false);
|
||||
|
||||
// Pagination
|
||||
const [page, setPage] = useState(0);
|
||||
const tasksPerPage = 25;
|
||||
|
||||
// Filters
|
||||
const [roleFilter, setRoleFilter] = useState<string>('');
|
||||
@@ -462,10 +123,13 @@ export default function TasksDashboard() {
|
||||
const [searchQuery, setSearchQuery] = useState('');
|
||||
const [showCapacity, setShowCapacity] = useState(true);
|
||||
|
||||
// Actions
|
||||
const [actionLoading, setActionLoading] = useState(false);
|
||||
const [actionMessage, setActionMessage] = useState<string | null>(null);
|
||||
|
||||
const fetchData = async () => {
|
||||
try {
|
||||
const [tasksRes, countsRes, capacityRes, poolStatus] = await Promise.all([
|
||||
const [tasksRes, countsRes, capacityRes] = await Promise.all([
|
||||
api.getTasks({
|
||||
role: roleFilter || undefined,
|
||||
status: statusFilter || undefined,
|
||||
@@ -473,13 +137,11 @@ export default function TasksDashboard() {
|
||||
}),
|
||||
api.getTaskCounts(),
|
||||
api.getTaskCapacity(),
|
||||
api.getTaskPoolStatus(),
|
||||
]);
|
||||
|
||||
setTasks(tasksRes.tasks || []);
|
||||
setCounts(countsRes);
|
||||
setCapacity(capacityRes.metrics || []);
|
||||
setPoolPaused(poolStatus.paused);
|
||||
setError(null);
|
||||
} catch (err: any) {
|
||||
setError(err.message || 'Failed to load tasks');
|
||||
@@ -488,22 +150,39 @@ export default function TasksDashboard() {
|
||||
}
|
||||
};
|
||||
|
||||
const handleDeleteTask = async (taskId: number) => {
|
||||
if (!confirm('Delete this task?')) return;
|
||||
useEffect(() => {
|
||||
fetchData();
|
||||
const interval = setInterval(fetchData, 10000); // Refresh every 10 seconds
|
||||
return () => clearInterval(interval);
|
||||
}, [roleFilter, statusFilter]);
|
||||
|
||||
const handleGenerateResync = async () => {
|
||||
setActionLoading(true);
|
||||
try {
|
||||
await api.delete(`/api/tasks/${taskId}`);
|
||||
const result = await api.generateResyncTasks();
|
||||
setActionMessage(`Generated ${result.tasks_created} resync tasks`);
|
||||
fetchData();
|
||||
} catch (err: any) {
|
||||
console.error('Delete error:', err);
|
||||
alert(err.response?.data?.error || 'Failed to delete task');
|
||||
setActionMessage(`Error: ${err.message}`);
|
||||
} finally {
|
||||
setActionLoading(false);
|
||||
setTimeout(() => setActionMessage(null), 5000);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
fetchData();
|
||||
const interval = setInterval(fetchData, 15000); // Auto-refresh every 15 seconds
|
||||
return () => clearInterval(interval);
|
||||
}, [roleFilter, statusFilter]);
|
||||
const handleRecoverStale = async () => {
|
||||
setActionLoading(true);
|
||||
try {
|
||||
const result = await api.recoverStaleTasks();
|
||||
setActionMessage(`Recovered ${result.tasks_recovered} stale tasks`);
|
||||
fetchData();
|
||||
} catch (err: any) {
|
||||
setActionMessage(`Error: ${err.message}`);
|
||||
} finally {
|
||||
setActionLoading(false);
|
||||
setTimeout(() => setActionMessage(null), 5000);
|
||||
}
|
||||
};
|
||||
|
||||
const filteredTasks = tasks.filter((task) => {
|
||||
if (searchQuery) {
|
||||
@@ -518,10 +197,6 @@ export default function TasksDashboard() {
|
||||
return true;
|
||||
});
|
||||
|
||||
// Pagination
|
||||
const paginatedTasks = filteredTasks.slice(page * tasksPerPage, (page + 1) * tasksPerPage);
|
||||
const totalPages = Math.ceil(filteredTasks.length / tasksPerPage);
|
||||
|
||||
const totalActive = (counts?.claimed || 0) + (counts?.running || 0);
|
||||
const totalPending = counts?.pending || 0;
|
||||
|
||||
@@ -538,51 +213,62 @@ export default function TasksDashboard() {
|
||||
return (
|
||||
<Layout>
|
||||
<div className="space-y-6">
|
||||
{/* Sticky Header */}
|
||||
<div className="sticky top-0 z-10 bg-white pb-4 -mx-6 px-6 pt-2 border-b border-gray-200 shadow-sm">
|
||||
<div className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-4">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900 flex items-center gap-2">
|
||||
<ListChecks className="w-7 h-7 text-emerald-600" />
|
||||
Task Queue
|
||||
</h1>
|
||||
<p className="text-gray-500 mt-1">
|
||||
{totalActive} active, {totalPending} pending tasks
|
||||
</p>
|
||||
</div>
|
||||
{/* Header */}
|
||||
<div className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-4">
|
||||
<div>
|
||||
<h1 className="text-2xl font-bold text-gray-900 flex items-center gap-2">
|
||||
<ListChecks className="w-7 h-7 text-emerald-600" />
|
||||
Task Queue
|
||||
</h1>
|
||||
<p className="text-gray-500 mt-1">
|
||||
{totalActive} active, {totalPending} pending tasks
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center gap-4">
|
||||
{/* Create Task Button */}
|
||||
<button
|
||||
onClick={() => setShowCreateModal(true)}
|
||||
className="flex items-center gap-2 px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 transition-colors"
|
||||
>
|
||||
<Plus className="w-4 h-4" />
|
||||
Create Task
|
||||
</button>
|
||||
{/* Pool status indicator */}
|
||||
{poolPaused && (
|
||||
<span className="inline-flex items-center gap-1.5 px-3 py-1.5 rounded-full text-sm font-medium bg-yellow-100 text-yellow-800">
|
||||
<Square className="w-4 h-4" />
|
||||
Pool Paused
|
||||
</span>
|
||||
)}
|
||||
<span className="text-sm text-gray-400">Auto-refreshes every 15s</span>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={handleGenerateResync}
|
||||
disabled={actionLoading}
|
||||
className="flex items-center gap-2 px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 disabled:opacity-50"
|
||||
>
|
||||
<Calendar className="w-4 h-4" />
|
||||
Generate Resync
|
||||
</button>
|
||||
<button
|
||||
onClick={handleRecoverStale}
|
||||
disabled={actionLoading}
|
||||
className="flex items-center gap-2 px-4 py-2 bg-gray-600 text-white rounded-lg hover:bg-gray-700 disabled:opacity-50"
|
||||
>
|
||||
<Zap className="w-4 h-4" />
|
||||
Recover Stale
|
||||
</button>
|
||||
<button
|
||||
onClick={fetchData}
|
||||
className="flex items-center gap-2 px-4 py-2 bg-gray-100 text-gray-700 rounded-lg hover:bg-gray-200"
|
||||
>
|
||||
<RefreshCw className="w-4 h-4" />
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Action Message */}
|
||||
{actionMessage && (
|
||||
<div
|
||||
className={`p-4 rounded-lg ${
|
||||
actionMessage.startsWith('Error')
|
||||
? 'bg-red-50 text-red-700'
|
||||
: 'bg-green-50 text-green-700'
|
||||
}`}
|
||||
>
|
||||
{actionMessage}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="p-4 bg-red-50 text-red-700 rounded-lg">{error}</div>
|
||||
)}
|
||||
|
||||
{/* Create Task Modal */}
|
||||
<CreateTaskModal
|
||||
isOpen={showCreateModal}
|
||||
onClose={() => setShowCreateModal(false)}
|
||||
onTaskCreated={fetchData}
|
||||
/>
|
||||
|
||||
{/* Status Summary Cards */}
|
||||
<div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-6 gap-4">
|
||||
{Object.entries(counts || {}).map(([status, count]) => (
|
||||
@@ -595,7 +281,7 @@ export default function TasksDashboard() {
|
||||
>
|
||||
<div className="flex items-center gap-2 mb-2">
|
||||
<span className={`p-1.5 rounded ${STATUS_COLORS[status]}`}>
|
||||
{getStatusIcon(status, poolPaused)}
|
||||
{STATUS_ICONS[status]}
|
||||
</span>
|
||||
<span className="text-sm font-medium text-gray-600 capitalize">{status}</span>
|
||||
</div>
|
||||
@@ -785,19 +471,17 @@ export default function TasksDashboard() {
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">
|
||||
Error
|
||||
</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase w-16">
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="divide-y divide-gray-200">
|
||||
{paginatedTasks.length === 0 ? (
|
||||
{filteredTasks.length === 0 ? (
|
||||
<tr>
|
||||
<td colSpan={9} className="px-4 py-8 text-center text-gray-500">
|
||||
<td colSpan={8} className="px-4 py-8 text-center text-gray-500">
|
||||
No tasks found
|
||||
</td>
|
||||
</tr>
|
||||
) : (
|
||||
paginatedTasks.map((task) => (
|
||||
filteredTasks.map((task) => (
|
||||
<tr key={task.id} className="hover:bg-gray-50">
|
||||
<td className="px-4 py-3 text-sm font-mono text-gray-600">#{task.id}</td>
|
||||
<td className="px-4 py-3 text-sm text-gray-900">
|
||||
@@ -812,7 +496,7 @@ export default function TasksDashboard() {
|
||||
STATUS_COLORS[task.status]
|
||||
}`}
|
||||
>
|
||||
{getStatusIcon(task.status, poolPaused)}
|
||||
{STATUS_ICONS[task.status]}
|
||||
{task.status}
|
||||
</span>
|
||||
</td>
|
||||
@@ -828,47 +512,12 @@ export default function TasksDashboard() {
|
||||
<td className="px-4 py-3 text-sm text-red-600 max-w-xs truncate">
|
||||
{task.error_message || '-'}
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
{(task.status === 'failed' || task.status === 'completed' || task.status === 'pending') && (
|
||||
<button
|
||||
onClick={() => handleDeleteTask(task.id)}
|
||||
className="p-1 text-gray-400 hover:text-red-500 hover:bg-red-50 rounded transition-colors"
|
||||
title="Delete task"
|
||||
>
|
||||
<Trash2 className="w-4 h-4" />
|
||||
</button>
|
||||
)}
|
||||
</td>
|
||||
</tr>
|
||||
))
|
||||
)}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
{/* Pagination */}
|
||||
<div className="px-4 py-3 border-t border-gray-200 bg-gray-50 flex items-center justify-between">
|
||||
<div className="text-sm text-gray-500">
|
||||
Showing {page * tasksPerPage + 1} - {Math.min((page + 1) * tasksPerPage, filteredTasks.length)} of {filteredTasks.length} tasks
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<button
|
||||
onClick={() => setPage(p => Math.max(0, p - 1))}
|
||||
disabled={page === 0}
|
||||
className="px-3 py-1 text-sm border border-gray-200 rounded hover:bg-gray-100 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
<ChevronLeft className="w-4 h-4" />
|
||||
</button>
|
||||
<span className="text-sm text-gray-600">Page {page + 1} of {totalPages || 1}</span>
|
||||
<button
|
||||
onClick={() => setPage(p => p + 1)}
|
||||
disabled={page >= totalPages - 1}
|
||||
className="px-3 py-1 text-sm border border-gray-200 rounded hover:bg-gray-100 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
<ChevronRight className="w-4 h-4" />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</Layout>
|
||||
|
||||
@@ -18,11 +18,6 @@ import {
|
||||
Server,
|
||||
MapPin,
|
||||
Trash2,
|
||||
PowerOff,
|
||||
Undo2,
|
||||
Plus,
|
||||
MemoryStick,
|
||||
AlertTriangle,
|
||||
} from 'lucide-react';
|
||||
|
||||
// Worker from registry
|
||||
@@ -41,25 +36,16 @@ interface Worker {
|
||||
tasks_completed: number;
|
||||
tasks_failed: number;
|
||||
current_task_id: number | null;
|
||||
current_task_ids?: number[]; // Multiple concurrent tasks
|
||||
active_task_count?: number;
|
||||
max_concurrent_tasks?: number;
|
||||
health_status: string;
|
||||
seconds_since_heartbeat: number;
|
||||
decommission_requested?: boolean;
|
||||
decommission_reason?: string;
|
||||
metadata: {
|
||||
cpu?: number;
|
||||
memory?: number;
|
||||
memoryTotal?: number;
|
||||
memory_mb?: number;
|
||||
memory_total_mb?: number;
|
||||
memory_percent?: number; // NEW: memory as percentage
|
||||
cpu_user_ms?: number;
|
||||
cpu_system_ms?: number;
|
||||
cpu_percent?: number; // NEW: CPU percentage
|
||||
is_backing_off?: boolean; // NEW: resource backoff state
|
||||
backoff_reason?: string; // NEW: why backing off
|
||||
proxy_location?: {
|
||||
city?: string;
|
||||
state?: string;
|
||||
@@ -223,259 +209,26 @@ function HealthBadge({ status, healthStatus }: { status: string; healthStatus: s
|
||||
);
|
||||
}
|
||||
|
||||
// Format CPU time for display
|
||||
function formatCpuTime(ms: number): string {
|
||||
if (ms < 1000) return `${ms}ms`;
|
||||
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
|
||||
return `${(ms / 60000).toFixed(1)}m`;
|
||||
}
|
||||
|
||||
// Resource usage badge showing memory%, CPU%, and backoff status
|
||||
function ResourceBadge({ worker }: { worker: Worker }) {
|
||||
const memPercent = worker.metadata?.memory_percent;
|
||||
const cpuPercent = worker.metadata?.cpu_percent;
|
||||
const isBackingOff = worker.metadata?.is_backing_off;
|
||||
const backoffReason = worker.metadata?.backoff_reason;
|
||||
|
||||
if (isBackingOff) {
|
||||
return (
|
||||
<div className="flex items-center gap-1.5" title={backoffReason || 'Backing off due to resource pressure'}>
|
||||
<AlertTriangle className="w-4 h-4 text-amber-500 animate-pulse" />
|
||||
<span className="text-xs text-amber-600 font-medium">Backing off</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// No data yet
|
||||
if (memPercent === undefined && cpuPercent === undefined) {
|
||||
return <span className="text-gray-400 text-xs">-</span>;
|
||||
}
|
||||
|
||||
// Color based on usage level
|
||||
const getColor = (pct: number) => {
|
||||
if (pct >= 90) return 'text-red-600';
|
||||
if (pct >= 75) return 'text-amber-600';
|
||||
if (pct >= 50) return 'text-yellow-600';
|
||||
return 'text-emerald-600';
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-0.5 text-xs">
|
||||
{memPercent !== undefined && (
|
||||
<div className="flex items-center gap-1" title={`Memory: ${worker.metadata?.memory_mb || 0}MB / ${worker.metadata?.memory_total_mb || 0}MB`}>
|
||||
<MemoryStick className={`w-3 h-3 ${getColor(memPercent)}`} />
|
||||
<span className={getColor(memPercent)}>{memPercent}%</span>
|
||||
</div>
|
||||
)}
|
||||
{cpuPercent !== undefined && (
|
||||
<div className="flex items-center gap-1">
|
||||
<Cpu className={`w-3 h-3 ${getColor(cpuPercent)}`} />
|
||||
<span className={getColor(cpuPercent)}>{cpuPercent}%</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Task count badge showing active/max concurrent tasks
|
||||
function TaskCountBadge({ worker, tasks }: { worker: Worker; tasks: Task[] }) {
|
||||
const activeCount = worker.active_task_count ?? (worker.current_task_id ? 1 : 0);
|
||||
const maxCount = worker.max_concurrent_tasks ?? 1;
|
||||
const taskIds = worker.current_task_ids ?? (worker.current_task_id ? [worker.current_task_id] : []);
|
||||
|
||||
if (activeCount === 0) {
|
||||
return <span className="text-gray-400 text-sm">Idle</span>;
|
||||
}
|
||||
|
||||
// Get task names for tooltip
|
||||
const taskNames = taskIds.map(id => {
|
||||
const task = tasks.find(t => t.id === id);
|
||||
return task ? `#${id}: ${task.role}${task.dispensary_name ? ` (${task.dispensary_name})` : ''}` : `#${id}`;
|
||||
}).join('\n');
|
||||
|
||||
return (
|
||||
<div className="flex items-center gap-2" title={taskNames}>
|
||||
<span className="text-sm font-medium text-blue-600">
|
||||
{activeCount}/{maxCount} tasks
|
||||
</span>
|
||||
{taskIds.length === 1 && (
|
||||
<span className="text-xs text-gray-500">#{taskIds[0]}</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Pod visualization - shows pod as hub with worker nodes radiating out
|
||||
function PodVisualization({
|
||||
podName,
|
||||
workers,
|
||||
isSelected = false,
|
||||
onSelect
|
||||
}: {
|
||||
podName: string;
|
||||
workers: Worker[];
|
||||
isSelected?: boolean;
|
||||
onSelect?: () => void;
|
||||
}) {
|
||||
const busyCount = workers.filter(w => w.current_task_id !== null).length;
|
||||
const allBusy = busyCount === workers.length;
|
||||
const allIdle = busyCount === 0;
|
||||
|
||||
// Aggregate resource stats for the pod
|
||||
const totalMemoryMb = workers.reduce((sum, w) => sum + (w.metadata?.memory_mb || 0), 0);
|
||||
const totalCpuUserMs = workers.reduce((sum, w) => sum + (w.metadata?.cpu_user_ms || 0), 0);
|
||||
const totalCpuSystemMs = workers.reduce((sum, w) => sum + (w.metadata?.cpu_system_ms || 0), 0);
|
||||
const totalCompleted = workers.reduce((sum, w) => sum + w.tasks_completed, 0);
|
||||
const totalFailed = workers.reduce((sum, w) => sum + w.tasks_failed, 0);
|
||||
|
||||
// Pod color based on worker status
|
||||
const podColor = allBusy ? 'bg-blue-500' : allIdle ? 'bg-emerald-500' : 'bg-yellow-500';
|
||||
const podBorder = allBusy ? 'border-blue-400' : allIdle ? 'border-emerald-400' : 'border-yellow-400';
|
||||
const podGlow = allBusy ? 'shadow-blue-200' : allIdle ? 'shadow-emerald-200' : 'shadow-yellow-200';
|
||||
|
||||
// Selection ring
|
||||
const selectionRing = isSelected ? 'ring-4 ring-purple-400 ring-offset-2' : '';
|
||||
|
||||
// Build pod tooltip
|
||||
const podTooltip = [
|
||||
`Pod: ${podName}`,
|
||||
`Workers: ${busyCount}/${workers.length} busy`,
|
||||
`Memory: ${totalMemoryMb} MB (RSS)`,
|
||||
`CPU: ${formatCpuTime(totalCpuUserMs)} user, ${formatCpuTime(totalCpuSystemMs)} system`,
|
||||
`Tasks: ${totalCompleted} completed, ${totalFailed} failed`,
|
||||
'Click to select',
|
||||
].join('\n');
|
||||
|
||||
return (
|
||||
<div className="flex flex-col items-center p-4">
|
||||
{/* Pod hub */}
|
||||
<div className="relative">
|
||||
{/* Center pod circle */}
|
||||
<div
|
||||
className={`w-20 h-20 rounded-full ${podColor} border-4 ${podBorder} shadow-lg ${podGlow} ${selectionRing} flex items-center justify-center text-white font-bold text-xs text-center leading-tight z-10 relative cursor-pointer hover:scale-105 transition-all`}
|
||||
title={podTooltip}
|
||||
onClick={onSelect}
|
||||
>
|
||||
<span className="px-1">{podName}</span>
|
||||
</div>
|
||||
|
||||
{/* Worker nodes radiating out */}
|
||||
{workers.map((worker, index) => {
|
||||
const angle = (index * 360) / workers.length - 90; // Start from top
|
||||
const radians = (angle * Math.PI) / 180;
|
||||
const radius = 55; // Distance from center
|
||||
const x = Math.cos(radians) * radius;
|
||||
const y = Math.sin(radians) * radius;
|
||||
|
||||
const isBusy = worker.current_task_id !== null;
|
||||
const isDecommissioning = worker.decommission_requested;
|
||||
const isBackingOff = worker.metadata?.is_backing_off;
|
||||
// Color priority: decommissioning > backing off > busy > idle
|
||||
const workerColor = isDecommissioning ? 'bg-orange-500' : isBackingOff ? 'bg-yellow-500' : isBusy ? 'bg-blue-500' : 'bg-emerald-500';
|
||||
const workerBorder = isDecommissioning ? 'border-orange-300' : isBackingOff ? 'border-yellow-300' : isBusy ? 'border-blue-300' : 'border-emerald-300';
|
||||
|
||||
// Line from center to worker
|
||||
const lineLength = radius - 10;
|
||||
const lineX = Math.cos(radians) * (lineLength / 2 + 10);
|
||||
const lineY = Math.sin(radians) * (lineLength / 2 + 10);
|
||||
|
||||
return (
|
||||
<div key={worker.id}>
|
||||
{/* Connection line */}
|
||||
<div
|
||||
className={`absolute w-0.5 ${isDecommissioning ? 'bg-orange-300' : isBackingOff ? 'bg-yellow-300' : isBusy ? 'bg-blue-300' : 'bg-emerald-300'}`}
|
||||
style={{
|
||||
height: `${lineLength}px`,
|
||||
left: '50%',
|
||||
top: '50%',
|
||||
transform: `translate(-50%, -50%) translate(${lineX}px, ${lineY}px) rotate(${angle + 90}deg)`,
|
||||
transformOrigin: 'center',
|
||||
}}
|
||||
/>
|
||||
{/* Worker node */}
|
||||
<div
|
||||
className={`absolute w-6 h-6 rounded-full ${workerColor} border-2 ${workerBorder} flex items-center justify-center text-white text-xs font-bold cursor-pointer hover:scale-110 transition-transform`}
|
||||
style={{
|
||||
left: '50%',
|
||||
top: '50%',
|
||||
transform: `translate(-50%, -50%) translate(${x}px, ${y}px)`,
|
||||
}}
|
||||
title={`${worker.friendly_name}\nStatus: ${isDecommissioning ? 'Stopping after current task' : isBackingOff ? `Backing off: ${worker.metadata?.backoff_reason || 'resource pressure'}` : isBusy ? `Working on task #${worker.current_task_id}` : 'Ready - waiting for tasks'}\nMemory: ${worker.metadata?.memory_mb || 0} MB (${worker.metadata?.memory_percent || 0}%)\nCPU: ${formatCpuTime(worker.metadata?.cpu_user_ms || 0)} user, ${formatCpuTime(worker.metadata?.cpu_system_ms || 0)} sys\nCompleted: ${worker.tasks_completed} | Failed: ${worker.tasks_failed}\nLast heartbeat: ${new Date(worker.last_heartbeat_at).toLocaleTimeString()}`}
|
||||
>
|
||||
{index + 1}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* Pod stats */}
|
||||
<div className="mt-12 text-center">
|
||||
<p className="text-xs text-gray-500">
|
||||
{busyCount}/{workers.length} busy
|
||||
</p>
|
||||
{isSelected && (
|
||||
<p className="text-xs text-purple-600 font-medium mt-1">Selected</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// Group workers by pod
|
||||
function groupWorkersByPod(workers: Worker[]): Map<string, Worker[]> {
|
||||
const pods = new Map<string, Worker[]>();
|
||||
for (const worker of workers) {
|
||||
const podName = worker.pod_name || 'Unknown';
|
||||
if (!pods.has(podName)) {
|
||||
pods.set(podName, []);
|
||||
}
|
||||
pods.get(podName)!.push(worker);
|
||||
}
|
||||
return pods;
|
||||
}
|
||||
|
||||
// Format estimated time remaining
|
||||
function formatEstimatedTime(hours: number): string {
|
||||
if (hours < 1) {
|
||||
return `${Math.round(hours * 60)} minutes`;
|
||||
}
|
||||
if (hours < 24) {
|
||||
return `${hours.toFixed(1)} hours`;
|
||||
}
|
||||
const days = hours / 24;
|
||||
if (days < 7) {
|
||||
return `${days.toFixed(1)} days`;
|
||||
}
|
||||
return `${(days / 7).toFixed(1)} weeks`;
|
||||
}
|
||||
|
||||
export function WorkersDashboard() {
|
||||
const [workers, setWorkers] = useState<Worker[]>([]);
|
||||
const [tasks, setTasks] = useState<Task[]>([]);
|
||||
const [pendingTaskCount, setPendingTaskCount] = useState<number>(0);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
// Pod selection state
|
||||
const [selectedPod, setSelectedPod] = useState<string | null>(null);
|
||||
|
||||
// Pagination
|
||||
const [page, setPage] = useState(0);
|
||||
const workersPerPage = 15;
|
||||
|
||||
const fetchData = useCallback(async () => {
|
||||
try {
|
||||
// Fetch workers from registry, running tasks, and task counts
|
||||
const [workersRes, tasksRes, countsRes] = await Promise.all([
|
||||
api.get('/api/worker-registry/workers'),
|
||||
api.get('/api/tasks?status=running&limit=100'),
|
||||
api.get('/api/tasks/counts'),
|
||||
]);
|
||||
// Fetch workers from registry
|
||||
const workersRes = await api.get('/api/worker-registry/workers');
|
||||
|
||||
// Fetch running tasks to get current task details
|
||||
const tasksRes = await api.get('/api/tasks?status=running&limit=100');
|
||||
|
||||
setWorkers(workersRes.data.workers || []);
|
||||
setTasks(tasksRes.data.tasks || []);
|
||||
setPendingTaskCount(countsRes.data?.pending || 0);
|
||||
setError(null);
|
||||
} catch (err: any) {
|
||||
console.error('Fetch error:', err);
|
||||
@@ -485,6 +238,16 @@ export function WorkersDashboard() {
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Cleanup stale workers
|
||||
const handleCleanupStale = async () => {
|
||||
try {
|
||||
await api.post('/api/worker-registry/cleanup', { stale_threshold_minutes: 2 });
|
||||
fetchData();
|
||||
} catch (err: any) {
|
||||
console.error('Cleanup error:', err);
|
||||
}
|
||||
};
|
||||
|
||||
// Remove a single worker
|
||||
const handleRemoveWorker = async (workerId: string) => {
|
||||
if (!confirm('Remove this worker from the registry?')) return;
|
||||
@@ -496,46 +259,6 @@ export function WorkersDashboard() {
|
||||
}
|
||||
};
|
||||
|
||||
// Decommission a worker (graceful shutdown after current task)
|
||||
const handleDecommissionWorker = async (workerId: string, friendlyName: string) => {
|
||||
if (!confirm(`Decommission ${friendlyName}? Worker will stop after completing its current task.`)) return;
|
||||
try {
|
||||
const res = await api.post(`/api/worker-registry/workers/${workerId}/decommission`, {
|
||||
reason: 'Manual decommission from admin UI'
|
||||
});
|
||||
if (res.data.success) {
|
||||
fetchData();
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.error('Decommission error:', err);
|
||||
alert(err.response?.data?.error || 'Failed to decommission worker');
|
||||
}
|
||||
};
|
||||
|
||||
// Cancel decommission
|
||||
const handleCancelDecommission = async (workerId: string) => {
|
||||
try {
|
||||
await api.post(`/api/worker-registry/workers/${workerId}/cancel-decommission`);
|
||||
fetchData();
|
||||
} catch (err: any) {
|
||||
console.error('Cancel decommission error:', err);
|
||||
}
|
||||
};
|
||||
|
||||
// Add a worker by scaling up the K8s deployment
|
||||
const handleAddWorker = async () => {
|
||||
try {
|
||||
const res = await api.post('/api/workers/k8s/scale-up');
|
||||
if (res.data.success) {
|
||||
// Refresh after a short delay to see the new worker
|
||||
setTimeout(fetchData, 2000);
|
||||
}
|
||||
} catch (err: any) {
|
||||
console.error('Add worker error:', err);
|
||||
alert(err.response?.data?.error || 'Failed to add worker. K8s scaling may not be available.');
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
fetchData();
|
||||
const interval = setInterval(fetchData, 5000);
|
||||
@@ -580,9 +303,25 @@ export function WorkersDashboard() {
|
||||
<h1 className="text-2xl font-bold text-gray-900">Workers</h1>
|
||||
<p className="text-gray-500 mt-1">
|
||||
{workers.length} registered workers ({busyWorkers.length} busy, {idleWorkers.length} idle)
|
||||
<span className="text-xs text-gray-400 ml-2">(auto-refresh 5s)</span>
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<button
|
||||
onClick={handleCleanupStale}
|
||||
className="flex items-center gap-2 px-4 py-2 bg-gray-100 text-gray-700 rounded-lg hover:bg-gray-200 transition-colors"
|
||||
title="Mark stale workers (no heartbeat > 2 min) as offline"
|
||||
>
|
||||
<Trash2 className="w-4 h-4" />
|
||||
Cleanup Stale
|
||||
</button>
|
||||
<button
|
||||
onClick={() => fetchData()}
|
||||
className="flex items-center gap-2 px-4 py-2 bg-emerald-600 text-white rounded-lg hover:bg-emerald-700 transition-colors"
|
||||
>
|
||||
<RefreshCw className="w-4 h-4" />
|
||||
Refresh
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{error && (
|
||||
@@ -650,197 +389,6 @@ export function WorkersDashboard() {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Estimated Completion Time Card */}
|
||||
{pendingTaskCount > 0 && activeWorkers.length > 0 && (() => {
|
||||
// Calculate average task rate across all workers
|
||||
const totalHoursUp = activeWorkers.reduce((sum, w) => {
|
||||
if (!w.started_at) return sum;
|
||||
const start = new Date(w.started_at);
|
||||
const now = new Date();
|
||||
return sum + (now.getTime() - start.getTime()) / (1000 * 60 * 60);
|
||||
}, 0);
|
||||
|
||||
const totalTasksDone = totalCompleted + totalFailed;
|
||||
const avgTasksPerHour = totalHoursUp > 0.1 ? totalTasksDone / totalHoursUp : 0;
|
||||
const estimatedHours = avgTasksPerHour > 0 ? pendingTaskCount / avgTasksPerHour : null;
|
||||
|
||||
return (
|
||||
<div className="bg-gradient-to-r from-amber-50 to-orange-50 rounded-lg border border-amber-200 p-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="w-10 h-10 bg-amber-100 rounded-lg flex items-center justify-center">
|
||||
<Clock className="w-5 h-5 text-amber-600" />
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-sm text-amber-700 font-medium">Estimated Time to Complete Queue</p>
|
||||
<p className="text-2xl font-bold text-amber-900">
|
||||
{estimatedHours !== null ? formatEstimatedTime(estimatedHours) : 'Calculating...'}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="text-right text-sm text-amber-700">
|
||||
<p><span className="font-semibold">{pendingTaskCount}</span> pending tasks</p>
|
||||
<p><span className="font-semibold">{activeWorkers.length}</span> active workers</p>
|
||||
{avgTasksPerHour > 0 && (
|
||||
<p className="text-xs text-amber-600 mt-1">
|
||||
~{avgTasksPerHour.toFixed(1)} tasks/hour
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
|
||||
{/* Worker Pods Visualization */}
|
||||
<div className="bg-white rounded-lg border border-gray-200 overflow-hidden">
|
||||
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50">
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h3 className="text-sm font-semibold text-gray-900 flex items-center gap-2">
|
||||
<Zap className="w-4 h-4 text-emerald-500" />
|
||||
Worker Pods ({Array.from(groupWorkersByPod(workers)).length} pods, {activeWorkers.length} workers)
|
||||
</h3>
|
||||
<p className="text-xs text-gray-500 mt-0.5">
|
||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-emerald-500"></span> ready</span>
|
||||
<span className="mx-2">|</span>
|
||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-blue-500"></span> busy</span>
|
||||
<span className="mx-2">|</span>
|
||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-yellow-500"></span> backing off</span>
|
||||
<span className="mx-2">|</span>
|
||||
<span className="inline-flex items-center gap-1"><span className="w-2 h-2 rounded-full bg-orange-500"></span> stopping</span>
|
||||
</p>
|
||||
</div>
|
||||
<div className="text-sm text-gray-500">
|
||||
{busyWorkers.length} busy, {activeWorkers.length - busyWorkers.length} idle
|
||||
{selectedPod && (
|
||||
<button
|
||||
onClick={() => setSelectedPod(null)}
|
||||
className="ml-3 text-xs text-purple-600 hover:text-purple-800 underline"
|
||||
>
|
||||
Clear selection
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{workers.length === 0 ? (
|
||||
<div className="px-4 py-12 text-center text-gray-500">
|
||||
<Users className="w-12 h-12 mx-auto mb-3 text-gray-300" />
|
||||
<p className="font-medium">No worker pods running</p>
|
||||
<p className="text-xs mt-1">Start pods to process tasks from the queue</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="p-6">
|
||||
<div className="flex flex-wrap justify-center gap-8">
|
||||
{Array.from(groupWorkersByPod(workers)).map(([podName, podWorkers]) => (
|
||||
<PodVisualization
|
||||
key={podName}
|
||||
podName={podName}
|
||||
workers={podWorkers}
|
||||
isSelected={selectedPod === podName}
|
||||
onSelect={() => setSelectedPod(selectedPod === podName ? null : podName)}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Selected Pod Control Panel */}
|
||||
{selectedPod && (() => {
|
||||
const podWorkers = groupWorkersByPod(workers).get(selectedPod) || [];
|
||||
const busyInPod = podWorkers.filter(w => w.current_task_id !== null).length;
|
||||
const idleInPod = podWorkers.filter(w => w.current_task_id === null && !w.decommission_requested).length;
|
||||
const stoppingInPod = podWorkers.filter(w => w.decommission_requested).length;
|
||||
|
||||
return (
|
||||
<div className="mt-6 border-t border-gray-200 pt-6">
|
||||
<div className="bg-purple-50 rounded-lg border border-purple-200 p-4">
|
||||
<div className="flex items-center justify-between mb-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="w-10 h-10 bg-purple-100 rounded-lg flex items-center justify-center">
|
||||
<Server className="w-5 h-5 text-purple-600" />
|
||||
</div>
|
||||
<div>
|
||||
<h4 className="font-semibold text-purple-900">{selectedPod}</h4>
|
||||
<p className="text-xs text-purple-600">
|
||||
{podWorkers.length} workers: {busyInPod} busy, {idleInPod} idle{stoppingInPod > 0 && `, ${stoppingInPod} stopping`}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Worker list in selected pod */}
|
||||
<div className="space-y-2">
|
||||
{podWorkers.map((worker) => {
|
||||
const isBusy = worker.current_task_id !== null;
|
||||
const isDecommissioning = worker.decommission_requested;
|
||||
|
||||
return (
|
||||
<div key={worker.id} className="flex items-center justify-between bg-white rounded-lg px-3 py-2 border border-purple-100">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className={`w-8 h-8 rounded-full flex items-center justify-center text-white text-sm font-bold ${
|
||||
isDecommissioning ? 'bg-orange-500' :
|
||||
isBusy ? 'bg-blue-500' : 'bg-emerald-500'
|
||||
}`}>
|
||||
{worker.friendly_name?.charAt(0) || '?'}
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-sm font-medium text-gray-900">{worker.friendly_name}</p>
|
||||
<p className="text-xs text-gray-500">
|
||||
{isDecommissioning ? (
|
||||
<span className="text-orange-600">Stopping after current task...</span>
|
||||
) : isBusy ? (
|
||||
<span className="text-blue-600">Working on task #{worker.current_task_id}</span>
|
||||
) : (
|
||||
<span className="text-emerald-600">Idle - ready for tasks</span>
|
||||
)}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
{isDecommissioning ? (
|
||||
<button
|
||||
onClick={() => handleCancelDecommission(worker.worker_id)}
|
||||
className="flex items-center gap-1.5 px-3 py-1.5 text-sm bg-white border border-gray-300 text-gray-700 rounded-lg hover:bg-gray-50 transition-colors"
|
||||
title="Cancel decommission"
|
||||
>
|
||||
<Undo2 className="w-4 h-4" />
|
||||
Cancel
|
||||
</button>
|
||||
) : (
|
||||
<button
|
||||
onClick={() => handleDecommissionWorker(worker.worker_id, worker.friendly_name)}
|
||||
className="flex items-center gap-1.5 px-3 py-1.5 text-sm bg-orange-100 text-orange-700 rounded-lg hover:bg-orange-200 transition-colors"
|
||||
title={isBusy ? 'Worker will stop after completing current task' : 'Remove idle worker'}
|
||||
>
|
||||
<PowerOff className="w-4 h-4" />
|
||||
{isBusy ? 'Stop after task' : 'Remove'}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
|
||||
{/* Add Worker button */}
|
||||
<div className="mt-4 pt-4 border-t border-purple-200">
|
||||
<button
|
||||
onClick={handleAddWorker}
|
||||
className="flex items-center gap-1.5 px-3 py-2 text-sm bg-emerald-100 text-emerald-700 rounded-lg hover:bg-emerald-200 transition-colors"
|
||||
>
|
||||
<Plus className="w-4 h-4" />
|
||||
Add Worker
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Workers Table */}
|
||||
<div className="bg-white rounded-lg border border-gray-200 overflow-hidden">
|
||||
<div className="px-4 py-3 border-b border-gray-200 bg-gray-50 flex items-center justify-between">
|
||||
@@ -883,10 +431,10 @@ export function WorkersDashboard() {
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Worker</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Role</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Status</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Resources</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Tasks</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Exit Location</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Current Task</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Duration</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Throughput</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Utilization</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Heartbeat</th>
|
||||
<th className="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase"></th>
|
||||
</tr>
|
||||
@@ -901,29 +449,16 @@ export function WorkersDashboard() {
|
||||
<tr key={worker.id} className="hover:bg-gray-50">
|
||||
<td className="px-4 py-3">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className={`w-10 h-10 rounded-full flex items-center justify-center text-white font-bold text-sm relative ${
|
||||
worker.decommission_requested ? 'bg-orange-500' :
|
||||
<div className={`w-10 h-10 rounded-full flex items-center justify-center text-white font-bold text-sm ${
|
||||
worker.health_status === 'offline' ? 'bg-gray-400' :
|
||||
worker.health_status === 'stale' ? 'bg-yellow-500' :
|
||||
worker.health_status === 'busy' ? 'bg-blue-500' :
|
||||
'bg-emerald-500'
|
||||
}`}>
|
||||
{worker.friendly_name?.charAt(0) || '?'}
|
||||
{worker.decommission_requested && (
|
||||
<div className="absolute -top-1 -right-1 w-4 h-4 bg-red-500 rounded-full flex items-center justify-center">
|
||||
<PowerOff className="w-2.5 h-2.5 text-white" />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div>
|
||||
<p className="font-medium text-gray-900 flex items-center gap-1.5">
|
||||
{worker.friendly_name}
|
||||
{worker.decommission_requested && (
|
||||
<span className="text-xs text-orange-600 bg-orange-100 px-1.5 py-0.5 rounded" title={worker.decommission_reason || 'Pending decommission'}>
|
||||
stopping
|
||||
</span>
|
||||
)}
|
||||
</p>
|
||||
<p className="font-medium text-gray-900">{worker.friendly_name}</p>
|
||||
<p className="text-xs text-gray-400 font-mono">{worker.worker_id.slice(0, 20)}...</p>
|
||||
</div>
|
||||
</div>
|
||||
@@ -935,10 +470,45 @@ export function WorkersDashboard() {
|
||||
<HealthBadge status={worker.status} healthStatus={worker.health_status} />
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
<ResourceBadge worker={worker} />
|
||||
{(() => {
|
||||
const loc = worker.metadata?.proxy_location;
|
||||
if (!loc) {
|
||||
return <span className="text-gray-400 text-sm">-</span>;
|
||||
}
|
||||
const parts = [loc.city, loc.state, loc.country].filter(Boolean);
|
||||
if (parts.length === 0) {
|
||||
return loc.isRotating ? (
|
||||
<span className="text-xs text-purple-600 font-medium" title="Rotating proxy - exit location varies per request">
|
||||
Rotating
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-gray-400 text-sm">Unknown</span>
|
||||
);
|
||||
}
|
||||
return (
|
||||
<div className="flex items-center gap-1.5" title={loc.timezone || ''}>
|
||||
<MapPin className="w-3 h-3 text-gray-400" />
|
||||
<span className="text-sm text-gray-700">
|
||||
{parts.join(', ')}
|
||||
</span>
|
||||
{loc.isRotating && (
|
||||
<span className="text-xs text-purple-500" title="Rotating proxy">*</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
<TaskCountBadge worker={worker} tasks={tasks} />
|
||||
{worker.current_task_id ? (
|
||||
<div>
|
||||
<span className="text-sm text-gray-900">Task #{worker.current_task_id}</span>
|
||||
{currentTask?.dispensary_name && (
|
||||
<p className="text-xs text-gray-500">{currentTask.dispensary_name}</p>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<span className="text-gray-400 text-sm">Idle</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="px-4 py-3">
|
||||
{currentTask?.started_at ? (
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
import { useState, useEffect } from 'react';
|
||||
import { api } from '../../../lib/api';
|
||||
import { Building2, Tag, Globe, Target, FileText, RefreshCw, Sparkles, Loader2, AlertCircle } from 'lucide-react';
|
||||
import { Building2, Tag, Globe, Target, FileText, RefreshCw, Sparkles, Loader2 } from 'lucide-react';
|
||||
|
||||
interface SeoPage {
|
||||
id: number;
|
||||
@@ -47,31 +47,11 @@ export function PagesTab() {
|
||||
const [search, setSearch] = useState('');
|
||||
const [syncing, setSyncing] = useState(false);
|
||||
const [generatingId, setGeneratingId] = useState<number | null>(null);
|
||||
const [hasActiveAiProvider, setHasActiveAiProvider] = useState<boolean | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
loadPages();
|
||||
checkAiProvider();
|
||||
}, [typeFilter, search]);
|
||||
|
||||
async function checkAiProvider() {
|
||||
try {
|
||||
const data = await api.getSettings();
|
||||
const settings = data.settings || [];
|
||||
// Check if either Anthropic or OpenAI is configured with an API key AND enabled
|
||||
const anthropicKey = settings.find((s: any) => s.key === 'anthropic_api_key')?.value;
|
||||
const anthropicEnabled = settings.find((s: any) => s.key === 'anthropic_enabled')?.value === 'true';
|
||||
const openaiKey = settings.find((s: any) => s.key === 'openai_api_key')?.value;
|
||||
const openaiEnabled = settings.find((s: any) => s.key === 'openai_enabled')?.value === 'true';
|
||||
|
||||
const hasProvider = (anthropicKey && anthropicEnabled) || (openaiKey && openaiEnabled);
|
||||
setHasActiveAiProvider(!!hasProvider);
|
||||
} catch (error) {
|
||||
console.error('Failed to check AI provider:', error);
|
||||
setHasActiveAiProvider(false);
|
||||
}
|
||||
}
|
||||
|
||||
async function loadPages() {
|
||||
setLoading(true);
|
||||
try {
|
||||
@@ -208,18 +188,12 @@ export function PagesTab() {
|
||||
<td className="px-3 sm:px-4 py-3">
|
||||
<button
|
||||
onClick={() => handleGenerate(page.id)}
|
||||
disabled={generatingId === page.id || hasActiveAiProvider === false}
|
||||
className={`flex items-center gap-1 px-2 sm:px-3 py-1.5 text-xs font-medium rounded-lg disabled:cursor-not-allowed ${
|
||||
hasActiveAiProvider === false
|
||||
? 'bg-gray-100 text-gray-400'
|
||||
: 'bg-purple-50 text-purple-700 hover:bg-purple-100 disabled:opacity-50'
|
||||
}`}
|
||||
title={hasActiveAiProvider === false ? 'No Active AI Provider' : 'Generate content'}
|
||||
disabled={generatingId === page.id}
|
||||
className="flex items-center gap-1 px-2 sm:px-3 py-1.5 text-xs font-medium bg-purple-50 text-purple-700 rounded-lg hover:bg-purple-100 disabled:opacity-50"
|
||||
title="Generate content"
|
||||
>
|
||||
{generatingId === page.id ? (
|
||||
<Loader2 className="w-3.5 h-3.5 animate-spin" />
|
||||
) : hasActiveAiProvider === false ? (
|
||||
<AlertCircle className="w-3.5 h-3.5" />
|
||||
) : (
|
||||
<Sparkles className="w-3.5 h-3.5" />
|
||||
)}
|
||||
|
||||
@@ -7,6 +7,16 @@
|
||||
"src": "favicon.ico",
|
||||
"sizes": "64x64 32x32 24x24 16x16",
|
||||
"type": "image/x-icon"
|
||||
},
|
||||
{
|
||||
"src": "logo192.png",
|
||||
"type": "image/png",
|
||||
"sizes": "192x192"
|
||||
},
|
||||
{
|
||||
"src": "logo512.png",
|
||||
"type": "image/png",
|
||||
"sizes": "512x512"
|
||||
}
|
||||
],
|
||||
"start_url": ".",
|
||||
|
||||
@@ -373,12 +373,10 @@ export function mapCategoryForUI(apiCategory) {
|
||||
* Map API brand to UI-compatible format
|
||||
*/
|
||||
export function mapBrandForUI(apiBrand) {
|
||||
// API returns 'brand' field (see /api/v1/brands endpoint)
|
||||
const brandName = apiBrand.brand || apiBrand.brand_name || '';
|
||||
return {
|
||||
id: brandName,
|
||||
name: brandName,
|
||||
slug: brandName ? brandName.toLowerCase().replace(/\s+/g, '-') : '',
|
||||
id: apiBrand.brand_name,
|
||||
name: apiBrand.brand_name,
|
||||
slug: apiBrand.brand_name?.toLowerCase().replace(/\s+/g, '-'),
|
||||
logo: apiBrand.brand_logo_url || null,
|
||||
productCount: parseInt(apiBrand.product_count || 0, 10),
|
||||
dispensaryCount: parseInt(apiBrand.dispensary_count || 0, 10),
|
||||
|
||||
@@ -27,7 +27,7 @@ const Brands = () => {
|
||||
}, []);
|
||||
|
||||
const filteredBrands = brands.filter((brand) =>
|
||||
brand.name && brand.name.toLowerCase().includes(searchQuery.toLowerCase())
|
||||
brand.name.toLowerCase().includes(searchQuery.toLowerCase())
|
||||
);
|
||||
|
||||
// Group brands alphabetically
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
# RBAC configuration for scraper pod to control worker scaling
|
||||
# Allows the scraper to read and scale the scraper-worker statefulset
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: scraper-sa
|
||||
namespace: dispensary-scraper
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: worker-scaler
|
||||
namespace: dispensary-scraper
|
||||
rules:
|
||||
# Allow reading deployment and statefulset status
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["deployments", "statefulsets"]
|
||||
verbs: ["get", "list"]
|
||||
# Allow scaling deployments and statefulsets
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["deployments/scale", "statefulsets/scale"]
|
||||
verbs: ["get", "patch", "update"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: scraper-worker-scaler
|
||||
namespace: dispensary-scraper
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: scraper-sa
|
||||
namespace: dispensary-scraper
|
||||
roleRef:
|
||||
kind: Role
|
||||
name: worker-scaler
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
@@ -1,71 +1,4 @@
|
||||
# Task Worker Deployment
|
||||
#
|
||||
# Simple Deployment that runs task-worker.js to process tasks from worker_tasks queue.
|
||||
# Workers pull tasks using DB-level locking (FOR UPDATE SKIP LOCKED).
|
||||
#
|
||||
# The worker will wait up to 60 minutes for active proxies to be added before failing.
|
||||
# This allows deployment to succeed even if proxies aren't configured yet.
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: scraper-worker
|
||||
namespace: dispensary-scraper
|
||||
spec:
|
||||
replicas: 25
|
||||
selector:
|
||||
matchLabels:
|
||||
app: scraper-worker
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: scraper-worker
|
||||
spec:
|
||||
imagePullSecrets:
|
||||
- name: regcred
|
||||
containers:
|
||||
- name: worker
|
||||
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
|
||||
command: ["node"]
|
||||
args: ["dist/tasks/task-worker.js"]
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: scraper-config
|
||||
- secretRef:
|
||||
name: scraper-secrets
|
||||
env:
|
||||
- name: WORKER_MODE
|
||||
value: "true"
|
||||
- name: POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: API_BASE_URL
|
||||
value: "http://scraper"
|
||||
- name: NODE_OPTIONS
|
||||
value: "--max-old-space-size=1500"
|
||||
resources:
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
cpu: "500m"
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- "pgrep -f 'task-worker' > /dev/null"
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 30
|
||||
failureThreshold: 3
|
||||
terminationGracePeriodSeconds: 60
|
||||
---
|
||||
# =============================================================================
|
||||
# ALTERNATIVE: StatefulSet with multiple workers per pod (not currently used)
|
||||
# =============================================================================
|
||||
# Task Worker Pods (StatefulSet)
|
||||
# Task Worker Pods
|
||||
# Each pod runs 5 role-agnostic workers that pull tasks from worker_tasks queue.
|
||||
#
|
||||
# Architecture:
|
||||
|
||||
@@ -25,7 +25,6 @@ spec:
|
||||
labels:
|
||||
app: scraper
|
||||
spec:
|
||||
serviceAccountName: scraper-sa
|
||||
imagePullSecrets:
|
||||
- name: regcred
|
||||
containers:
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user