Merge branch 'master' into fix/analytics-v2-queries

2025-12-11 02:55:13 +00:00
143 changed files with 4335 additions and 14744 deletions
--- a/.woodpecker/.ci.yml
+++ b/.woodpecker/.ci.yml
@@ -1,3 +1,6 @@
 when:
  - event: [push, pull_request]
 steps:
  # ===========================================
  # PR VALIDATION: Parallel type checks (PRs only)
@@ -69,7 +72,6 @@ steps:
  # ===========================================
  # MASTER DEPLOY: Parallel Docker builds
  # NOTE: cache_from/cache_to removed due to plugin bug splitting on commas
  # ===========================================
  docker-backend:
    image: woodpeckerci/plugin-docker-buildx
@@ -161,7 +163,7 @@ steps:
      event: push
  # ===========================================
-  # STAGE 3: Deploy and Run Migrations
+  # STAGE 3: Deploy (after Docker builds)
  # ===========================================
  deploy:
    image: bitnami/kubectl:latest
@@ -172,17 +174,12 @@ steps:
      - mkdir -p ~/.kube
      - echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
      - chmod 600 ~/.kube/config
      # Deploy backend first
      - kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
      # Note: Migrations run automatically at startup via auto-migrate
      # Deploy remaining services
      # Resilience: ensure workers are scaled up if at 0
      - REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
      - kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
      - kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
    depends_on:
      - docker-backend
--- a/.woodpecker/ci.yml
+++ b/.woodpecker/ci.yml
@@ -1,191 +0,0 @@
 steps:
  # ===========================================
  # PR VALIDATION: Only typecheck changed projects
  # ===========================================
  typecheck-backend:
    image: code.cannabrands.app/creationshop/node:20
    commands:
      - npm config set cache /npm-cache/backend --global
      - cd backend
      - npm ci --prefer-offline
      - npx tsc --noEmit
    volumes:
      - npm-cache:/npm-cache
    depends_on: []
    when:
      event: pull_request
      path:
        include: ['backend/**']
  typecheck-cannaiq:
    image: code.cannabrands.app/creationshop/node:20
    commands:
      - npm config set cache /npm-cache/cannaiq --global
      - cd cannaiq
      - npm ci --prefer-offline
      - npx tsc --noEmit
    volumes:
      - npm-cache:/npm-cache
    depends_on: []
    when:
      event: pull_request
      path:
        include: ['cannaiq/**']
  # findadispo/findagram typechecks skipped - they have || true anyway
  # ===========================================
  # AUTO-MERGE: Merge PR after all checks pass
  # ===========================================
  auto-merge:
    image: alpine:latest
    environment:
      GITEA_TOKEN:
        from_secret: gitea_token
    commands:
      - apk add --no-cache curl
      - |
        echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
        curl -s -X POST \
          -H "Authorization: token $GITEA_TOKEN" \
          -H "Content-Type: application/json" \
          -d '{"Do":"merge"}' \
          "https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
    depends_on:
      - typecheck-backend
      - typecheck-cannaiq
    when:
      event: pull_request
  # ===========================================
  # MASTER DEPLOY: Parallel Docker builds
  # ===========================================
  docker-backend:
    image: woodpeckerci/plugin-docker-buildx
    settings:
      registry: code.cannabrands.app
      repo: code.cannabrands.app/creationshop/dispensary-scraper
      tags:
        - latest
        - ${CI_COMMIT_SHA:0:8}
      dockerfile: backend/Dockerfile
      context: backend
      username:
        from_secret: registry_username
      password:
        from_secret: registry_password
      platforms: linux/amd64
      provenance: false
      cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
      cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
      build_args:
        APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
        APP_GIT_SHA: ${CI_COMMIT_SHA}
        APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
        CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
    depends_on: []
    when:
      branch: master
      event: push
  docker-cannaiq:
    image: woodpeckerci/plugin-docker-buildx
    settings:
      registry: code.cannabrands.app
      repo: code.cannabrands.app/creationshop/cannaiq-frontend
      tags:
        - latest
        - ${CI_COMMIT_SHA:0:8}
      dockerfile: cannaiq/Dockerfile
      context: cannaiq
      username:
        from_secret: registry_username
      password:
        from_secret: registry_password
      platforms: linux/amd64
      provenance: false
      cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
      cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
    depends_on: []
    when:
      branch: master
      event: push
  docker-findadispo:
    image: woodpeckerci/plugin-docker-buildx
    settings:
      registry: code.cannabrands.app
      repo: code.cannabrands.app/creationshop/findadispo-frontend
      tags:
        - latest
        - ${CI_COMMIT_SHA:0:8}
      dockerfile: findadispo/frontend/Dockerfile
      context: findadispo/frontend
      username:
        from_secret: registry_username
      password:
        from_secret: registry_password
      platforms: linux/amd64
      provenance: false
      cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
      cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
    depends_on: []
    when:
      branch: master
      event: push
  docker-findagram:
    image: woodpeckerci/plugin-docker-buildx
    settings:
      registry: code.cannabrands.app
      repo: code.cannabrands.app/creationshop/findagram-frontend
      tags:
        - latest
        - ${CI_COMMIT_SHA:0:8}
      dockerfile: findagram/frontend/Dockerfile
      context: findagram/frontend
      username:
        from_secret: registry_username
      password:
        from_secret: registry_password
      platforms: linux/amd64
      provenance: false
      cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
      cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
    depends_on: []
    when:
      branch: master
      event: push
  # ===========================================
  # STAGE 3: Deploy and Run Migrations
  # ===========================================
  deploy:
    image: bitnami/kubectl:latest
    environment:
      KUBECONFIG_CONTENT:
        from_secret: kubeconfig_data
    commands:
      - mkdir -p ~/.kube
      - echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
      - chmod 600 ~/.kube/config
      # Deploy backend first
      - kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
      # Note: Migrations run automatically at startup via auto-migrate
      # Deploy remaining services
      # Resilience: ensure workers are scaled up if at 0
      - REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
      - kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
    depends_on:
      - docker-backend
      - docker-cannaiq
      - docker-findadispo
      - docker-findagram
    when:
      branch: master
      event: push
--- a/CLAUDE.md
+++ b/CLAUDE.md
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -25,9 +25,8 @@ ENV APP_GIT_SHA=${APP_GIT_SHA}
 ENV APP_BUILD_TIME=${APP_BUILD_TIME}
 ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
-# Install Chromium dependencies and curl for HTTP requests
+# Install Chromium dependencies
 RUN apt-get update && apt-get install -y \
    curl \
    chromium \
    fonts-liberation \
    libnss3 \
--- a/backend/docs/_archive/ANALYTICS_RUNBOOK.md
+++ b/backend/docs/_archive/ANALYTICS_RUNBOOK.md
--- a/backend/docs/_archive/ANALYTICS_V2_EXAMPLES.md
+++ b/backend/docs/_archive/ANALYTICS_V2_EXAMPLES.md
--- a/backend/docs/_archive/BRAND_INTELLIGENCE_API.md
+++ b/backend/docs/_archive/BRAND_INTELLIGENCE_API.md
--- a/backend/docs/CODEBASE_MAP.md
+++ b/backend/docs/CODEBASE_MAP.md
@@ -1,218 +0,0 @@
 # CannaiQ Backend Codebase Map
 **Last Updated:** 2025-12-12
 **Purpose:** Help Claude and developers understand which code is current vs deprecated
 ---
 ## Quick Reference: What to Use
 ### For Crawling/Scraping
 | Task | Use This | NOT This |
 |------|----------|----------|
 | Fetch products | `src/tasks/handlers/payload-fetch.ts` | `src/hydration/*` |
 | Process products | `src/tasks/handlers/product-refresh.ts` | `src/scraper-v2/*` |
 | GraphQL client | `src/platforms/dutchie/client.ts` | `src/dutchie-az/services/graphql-client.ts` |
 | Worker system | `src/tasks/task-worker.ts` | `src/dutchie-az/services/worker.ts` |
 ### For Database
 | Task | Use This | NOT This |
 |------|----------|----------|
 | Get DB pool | `src/db/pool.ts` | `src/dutchie-az/db/connection.ts` |
 | Run migrations | `src/db/migrate.ts` (CLI only) | Never import at runtime |
 | Query products | `store_products` table | `products`, `dutchie_products` |
 | Query stores | `dispensaries` table | `stores` table |
 ### For Discovery
 | Task | Use This |
 |------|----------|
 | Discover stores | `src/discovery/*.ts` |
 | Run discovery | `npx tsx src/scripts/run-discovery.ts` |
 ---
 ## Directory Status
 ### ACTIVE DIRECTORIES (Use These)
 ```
 src/
 ├── auth/               # JWT/session auth, middleware
 ├── db/                 # Database pool, migrations
 ├── discovery/          # Dutchie store discovery pipeline
 ├── middleware/         # Express middleware
 ├── multi-state/        # Multi-state query support
 ├── platforms/          # Platform-specific clients (Dutchie, Jane, etc)
 │   └── dutchie/        # THE Dutchie client - use this one
 ├── routes/             # Express API routes
 ├── services/           # Core services (logger, scheduler, etc)
 ├── tasks/              # Task system (workers, handlers, scheduler)
 │   └── handlers/       # Task handlers (payload_fetch, product_refresh, etc)
 ├── types/              # TypeScript types
 └── utils/              # Utilities (storage, image processing)
 ```
 ### DEPRECATED DIRECTORIES (DO NOT USE)
 ```
 src/
 ├── hydration/          # DEPRECATED - Old pipeline approach
 ├── scraper-v2/         # DEPRECATED - Old scraper engine
 ├── canonical-hydration/# DEPRECATED - Merged into tasks/handlers
 ├── dutchie-az/         # PARTIAL - Some parts deprecated, some active
 │   ├── db/             # DEPRECATED - Use src/db/pool.ts
 │   └── services/       # PARTIAL - worker.ts still runs, graphql-client.ts deprecated
 ├── portals/            # FUTURE - Not yet implemented
 ├── seo/                # PARTIAL - Settings work, templates WIP
 └── system/             # DEPRECATED - Old orchestration system
 ```
 ### DEPRECATED FILES (DO NOT USE)
 ```
 src/dutchie-az/db/connection.ts      # Use src/db/pool.ts instead
 src/dutchie-az/services/graphql-client.ts  # Use src/platforms/dutchie/client.ts
 src/hydration/*.ts                   # Entire directory deprecated
 src/scraper-v2/*.ts                  # Entire directory deprecated
 ```
 ---
 ## Key Files Reference
 ### Entry Points
 | File | Purpose | Status |
 |------|---------|--------|
 | `src/index.ts` | Main Express server | ACTIVE |
 | `src/dutchie-az/services/worker.ts` | Worker process entry | ACTIVE |
 | `src/tasks/task-worker.ts` | Task worker (new system) | ACTIVE |
 ### Dutchie Integration
 | File | Purpose | Status |
 |------|---------|--------|
 | `src/platforms/dutchie/client.ts` | GraphQL client, hashes, curl | **PRIMARY** |
 | `src/platforms/dutchie/queries.ts` | High-level query functions | ACTIVE |
 | `src/platforms/dutchie/index.ts` | Re-exports | ACTIVE |
 ### Task Handlers
 | File | Purpose | Status |
 |------|---------|--------|
 | `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
 | `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
 | `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
 | `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs | ACTIVE |
 | `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
 ### Database
 | File | Purpose | Status |
 |------|---------|--------|
 | `src/db/pool.ts` | Canonical DB pool | **PRIMARY** |
 | `src/db/migrate.ts` | Migration runner (CLI only) | CLI ONLY |
 | `src/db/auto-migrate.ts` | Auto-run migrations on startup | ACTIVE |
 ### Configuration
 | File | Purpose | Status |
 |------|---------|--------|
 | `.env` | Environment variables | ACTIVE |
 | `package.json` | Dependencies | ACTIVE |
 | `tsconfig.json` | TypeScript config | ACTIVE |
 ---
 ## GraphQL Hashes (CRITICAL)
 The correct hashes are in `src/platforms/dutchie/client.ts`:
 ```typescript
 export const GRAPHQL_HASHES = {
  FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0',
  GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
  ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
  GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
 };
 ```
 **ALWAYS** use `Status: 'Active'` for FilteredProducts (not `null` or `'All'`).
 ---
 ## Scripts Reference
 ### Useful Scripts (in `src/scripts/`)
 | Script | Purpose |
 |--------|---------|
 | `run-discovery.ts` | Run Dutchie discovery |
 | `crawl-single-store.ts` | Test crawl a single store |
 | `test-dutchie-graphql.ts` | Test GraphQL queries |
 ### One-Off Scripts (probably don't need)
 | Script | Purpose |
 |--------|---------|
 | `harmonize-az-dispensaries.ts` | One-time data cleanup |
 | `bootstrap-stores-for-dispensaries.ts` | One-time migration |
 | `backfill-*.ts` | Historical backfill scripts |
 ---
 ## API Routes
 ### Active Routes (in `src/routes/`)
 | Route File | Mount Point | Purpose |
 |------------|-------------|---------|
 | `auth.ts` | `/api/auth` | Login/logout/session |
 | `stores.ts` | `/api/stores` | Store CRUD |
 | `dashboard.ts` | `/api/dashboard` | Dashboard stats |
 | `workers.ts` | `/api/workers` | Worker monitoring |
 | `pipeline.ts` | `/api/pipeline` | Crawl triggers |
 | `discovery.ts` | `/api/discovery` | Discovery management |
 | `analytics.ts` | `/api/analytics` | Analytics queries |
 | `wordpress.ts` | `/api/v1/wordpress` | WordPress plugin API |
 ---
 ## Documentation Files
 ### Current Docs (in `backend/docs/`)
 | Doc | Purpose | Currency |
 |-----|---------|----------|
 | `TASK_WORKFLOW_2024-12-10.md` | Task system architecture | CURRENT |
 | `WORKER_TASK_ARCHITECTURE.md` | Worker/task design | CURRENT |
 | `CRAWL_PIPELINE.md` | Crawl pipeline overview | CURRENT |
 | `ORGANIC_SCRAPING_GUIDE.md` | Browser-based scraping | CURRENT |
 | `CODEBASE_MAP.md` | This file | CURRENT |
 | `ANALYTICS_V2_EXAMPLES.md` | Analytics API examples | CURRENT |
 | `BRAND_INTELLIGENCE_API.md` | Brand API docs | CURRENT |
 ### Root Docs
 | Doc | Purpose | Currency |
 |-----|---------|----------|
 | `CLAUDE.md` | Claude instructions | **PRIMARY** |
 | `README.md` | Project overview | NEEDS UPDATE |
 ---
 ## Common Mistakes to Avoid
 1. **Don't use `src/hydration/`** - It's an old approach that was superseded by the task system
 2. **Don't use `src/dutchie-az/db/connection.ts`** - Use `src/db/pool.ts` instead
 3. **Don't import `src/db/migrate.ts` at runtime** - It will crash. Only use for CLI migrations.
 4. **Don't query `stores` table** - It's empty. Use `dispensaries`.
 5. **Don't query `products` table** - It's empty. Use `store_products`.
 6. **Don't use wrong GraphQL hash** - Always get hash from `GRAPHQL_HASHES` in client.ts
 7. **Don't use `Status: null`** - It returns 0 products. Use `Status: 'Active'`.
 ---
 ## When in Doubt
 1. Check if the file is imported in `src/index.ts` - if not, it may be deprecated
 2. Check the last modified date - older files may be stale
 3. Look for `DEPRECATED` comments in the code
 4. Ask: "Is there a newer version of this in `src/tasks/` or `src/platforms/`?"
 5. Read the relevant doc in `docs/` before modifying code
--- a/backend/docs/_archive/CRAWL_PIPELINE.md
+++ b/backend/docs/_archive/CRAWL_PIPELINE.md
@@ -500,18 +500,17 @@ CREATE TABLE proxies (
 Proxies are mandatory. There is no environment variable to disable them. Workers will refuse to start without active proxies in the database.
-### User-Agent Generation
+### Fingerprints Available
-See `workflow-12102025.md` for full specification.
+The client includes 6 browser fingerprints:
 - Chrome 131 on Windows
 - Chrome 131 on macOS
 - Chrome 120 on Windows
 - Firefox 133 on Windows
 - Safari 17.2 on macOS
 - Edge 131 on Windows
-**Summary:**
+Each includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers.
 - Uses `intoli/user-agents` library (daily-updated market share data)
 - Device distribution: Mobile 62%, Desktop 36%, Tablet 2%
 - Browser whitelist: Chrome, Safari, Edge, Firefox only
 - UA sticks until IP rotates (403 or manual rotation)
 - Failure = alert admin + stop crawl (no fallback)
 Each fingerprint includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers.
 ---
--- a/backend/docs/_archive/WORKER_TASK_ARCHITECTURE.md
+++ b/backend/docs/_archive/WORKER_TASK_ARCHITECTURE.md
@@ -362,148 +362,6 @@ SET status = 'pending', retry_count = retry_count + 1
 WHERE status = 'failed' AND retry_count < max_retries;
 ```
 ## Concurrent Task Processing (Added 2024-12)
 Workers can now process multiple tasks concurrently within a single worker instance. This improves throughput by utilizing async I/O efficiently.
 ### Architecture
 ```
 ┌─────────────────────────────────────────────────────────────┐
 │                         Pod (K8s)                           │
 │                                                             │
 │  ┌─────────────────────────────────────────────────────┐   │
 │  │                    TaskWorker                        │   │
 │  │                                                      │   │
 │  │  ┌─────────┐  ┌─────────┐  ┌─────────┐             │   │
 │  │  │ Task 1  │  │ Task 2  │  │ Task 3  │  (concurrent)│   │
 │  │  └─────────┘  └─────────┘  └─────────┘             │   │
 │  │                                                      │   │
 │  │  Resource Monitor                                    │   │
 │  │  ├── Memory: 65% (threshold: 85%)                   │   │
 │  │  ├── CPU: 45% (threshold: 90%)                      │   │
 │  │  └── Status: Normal                                  │   │
 │  └─────────────────────────────────────────────────────┘   │
 └─────────────────────────────────────────────────────────────┘
 ```
 ### Environment Variables
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `MAX_CONCURRENT_TASKS` | 3 | Maximum tasks a worker will run concurrently |
 | `MEMORY_BACKOFF_THRESHOLD` | 0.85 | Back off when heap memory exceeds 85% |
 | `CPU_BACKOFF_THRESHOLD` | 0.90 | Back off when CPU exceeds 90% |
 | `BACKOFF_DURATION_MS` | 10000 | How long to wait when backing off (10s) |
 ### How It Works
 1. **Main Loop**: Worker continuously tries to fill up to `MAX_CONCURRENT_TASKS`
 2. **Resource Monitoring**: Before claiming a new task, worker checks memory and CPU
 3. **Backoff**: If resources exceed thresholds, worker pauses and stops claiming new tasks
 4. **Concurrent Execution**: Tasks run in parallel using `Promise` - they don't block each other
 5. **Graceful Shutdown**: On SIGTERM/decommission, worker stops claiming but waits for active tasks
 ### Resource Monitoring
 ```typescript
 // ResourceStats interface
 interface ResourceStats {
  memoryPercent: number;    // Current heap usage as decimal (0.0-1.0)
  memoryMb: number;         // Current heap used in MB
  memoryTotalMb: number;    // Total heap available in MB
  cpuPercent: number;       // CPU usage as percentage (0-100)
  isBackingOff: boolean;    // True if worker is in backoff state
  backoffReason: string;    // Why the worker is backing off
 }
 ```
 ### Heartbeat Data
 Workers report the following in their heartbeat:
 ```json
 {
  "worker_id": "worker-abc123",
  "current_task_id": 456,
  "current_task_ids": [456, 457, 458],
  "active_task_count": 3,
  "max_concurrent_tasks": 3,
  "status": "active",
  "resources": {
    "memory_mb": 256,
    "memory_total_mb": 512,
    "memory_rss_mb": 320,
    "memory_percent": 50,
    "cpu_user_ms": 12500,
    "cpu_system_ms": 3200,
    "cpu_percent": 45,
    "is_backing_off": false,
    "backoff_reason": null
  }
 }
 ```
 ### Backoff Behavior
 When resources exceed thresholds:
 1. Worker logs the backoff reason:
   ```
   [TaskWorker] MyWorker backing off: Memory at 87.3% (threshold: 85%)
   ```
 2. Worker stops claiming new tasks but continues existing tasks
 3. After `BACKOFF_DURATION_MS`, worker rechecks resources
 4. When resources return to normal:
   ```
   [TaskWorker] MyWorker resuming normal operation
   ```
 ### UI Display
 The Workers Dashboard shows:
 - **Tasks Column**: `2/3 tasks` (active/max concurrent)
 - **Resources Column**: Memory % and CPU % with color coding
  - Green: < 50%
  - Yellow: 50-74%
  - Amber: 75-89%
  - Red: 90%+
 - **Backing Off**: Orange warning badge when worker is in backoff state
 ### Task Count Badge Details
 ```
 ┌─────────────────────────────────────────────┐
 │ Worker: "MyWorker"                          │
 │ Tasks: 2/3 tasks  #456, #457                │
 │ Resources: 🧠 65%  💻 45%                    │
 │ Status: ● Active                            │
 └─────────────────────────────────────────────┘
 ```
 ### Best Practices
 1. **Start Conservative**: Use `MAX_CONCURRENT_TASKS=3` initially
 2. **Monitor Resources**: Watch for frequent backoffs in logs
 3. **Tune Per Workload**: I/O-bound tasks benefit from higher concurrency
 4. **Scale Horizontally**: Add more pods rather than cranking concurrency too high
 ### Code References
 | File | Purpose |
 |------|---------|
 | `src/tasks/task-worker.ts:68-71` | Concurrency environment variables |
 | `src/tasks/task-worker.ts:104-111` | ResourceStats interface |
 | `src/tasks/task-worker.ts:149-179` | getResourceStats() method |
 | `src/tasks/task-worker.ts:184-196` | shouldBackOff() method |
 | `src/tasks/task-worker.ts:462-516` | mainLoop() with concurrent claiming |
 | `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
 | `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
 ## Monitoring
 ### Logs
--- a/backend/docs/_archive/ORGANIC_SCRAPING_GUIDE.md
+++ b/backend/docs/_archive/ORGANIC_SCRAPING_GUIDE.md
@@ -1,297 +0,0 @@
 # Organic Browser-Based Scraping Guide
 **Last Updated:** 2025-12-12
 **Status:** Production-ready proof of concept
 ---
 ## Overview
 This document describes the "organic" browser-based approach to scraping Dutchie dispensary menus. Unlike direct curl/axios requests, this method uses a real browser session to make API calls, making requests appear natural and reducing detection risk.
 ---
 ## Why Organic Scraping?
 | Approach | Detection Risk | Speed | Complexity |
 |----------|---------------|-------|------------|
 | Direct curl | Higher | Fast | Low |
 | curl-impersonate | Medium | Fast | Medium |
 | **Browser-based (organic)** | **Lowest** | Slower | Higher |
 Direct curl requests can be fingerprinted via:
 - TLS fingerprint (cipher suites, extensions)
 - Header order and values
 - Missing cookies/session data
 - Request patterns
 Browser-based requests inherit:
 - Real Chrome TLS fingerprint
 - Session cookies from page visit
 - Natural header order
 - JavaScript execution environment
 ---
 ## Implementation
 ### Dependencies
 ```bash
 npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
 ```
 ### Core Script: `test-intercept.js`
 Located at: `backend/test-intercept.js`
 ```javascript
 const puppeteer = require('puppeteer-extra');
 const StealthPlugin = require('puppeteer-extra-plugin-stealth');
 const fs = require('fs');
 puppeteer.use(StealthPlugin());
 async function capturePayload(config) {
  const { dispensaryId, platformId, cName, outputPath } = config;
  const browser = await puppeteer.launch({
    headless: 'new',
    args: ['--no-sandbox', '--disable-setuid-sandbox']
  });
  const page = await browser.newPage();
  // STEP 1: Establish session by visiting the menu
  const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
  await page.goto(embedUrl, { waitUntil: 'networkidle2', timeout: 60000 });
  // STEP 2: Fetch ALL products using GraphQL from browser context
  const result = await page.evaluate(async (platformId) => {
    const allProducts = [];
    let pageNum = 0;
    const perPage = 100;
    let totalCount = 0;
    const sessionId = 'browser-session-' + Date.now();
    while (pageNum < 30) {
      const variables = {
        includeEnterpriseSpecials: false,
        productsFilter: {
          dispensaryId: platformId,
          pricingType: 'rec',
          Status: 'Active',  // CRITICAL: Must be 'Active', not null
          types: [],
          useCache: true,
          isDefaultSort: true,
          sortBy: 'popularSortIdx',
          sortDirection: 1,
          bypassOnlineThresholds: true,
          isKioskMenu: false,
          removeProductsBelowOptionThresholds: false,
        },
        page: pageNum,
        perPage: perPage,
      };
      const extensions = {
        persistedQuery: {
          version: 1,
          sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
        }
      };
      const qs = new URLSearchParams({
        operationName: 'FilteredProducts',
        variables: JSON.stringify(variables),
        extensions: JSON.stringify(extensions)
      });
      const response = await fetch(`https://dutchie.com/api-3/graphql?${qs}`, {
        method: 'GET',
        headers: {
          'Accept': 'application/json',
          'content-type': 'application/json',
          'x-dutchie-session': sessionId,
          'apollographql-client-name': 'Marketplace (production)',
        },
        credentials: 'include'
      });
      const json = await response.json();
      const data = json?.data?.filteredProducts;
      if (!data?.products) break;
      allProducts.push(...data.products);
      if (pageNum === 0) totalCount = data.queryInfo?.totalCount || 0;
      if (allProducts.length >= totalCount) break;
      pageNum++;
      await new Promise(r => setTimeout(r, 200)); // Polite delay
    }
    return { products: allProducts, totalCount };
  }, platformId);
  await browser.close();
  // STEP 3: Save payload
  const payload = {
    dispensaryId,
    platformId,
    cName,
    fetchedAt: new Date().toISOString(),
    productCount: result.products.length,
    products: result.products,
  };
  fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
  return payload;
 }
 ```
 ---
 ## Critical Parameters
 ### GraphQL Hash (FilteredProducts)
 ```
 ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0
 ```
 **WARNING:** Using the wrong hash returns HTTP 400.
 ### Status Parameter
 | Value | Result |
 |-------|--------|
 | `'Active'` | Returns in-stock products (1019 in test) |
 | `null` | Returns 0 products |
 | `'All'` | Returns HTTP 400 |
 **ALWAYS use `Status: 'Active'`**
 ### Required Headers
 ```javascript
 {
  'Accept': 'application/json',
  'content-type': 'application/json',
  'x-dutchie-session': 'unique-session-id',
  'apollographql-client-name': 'Marketplace (production)',
 }
 ```
 ### Endpoint
 ```
 https://dutchie.com/api-3/graphql
 ```
 ---
 ## Performance Benchmarks
 Test store: AZ-Deeply-Rooted (1019 products)
 | Metric | Value |
 |--------|-------|
 | Total products | 1019 |
 | Time | 18.5 seconds |
 | Payload size | 11.8 MB |
 | Pages fetched | 11 (100 per page) |
 | Success rate | 100% |
 ---
 ## Payload Format
 The output matches the existing `payload-fetch.ts` handler format:
 ```json
 {
  "dispensaryId": 123,
  "platformId": "6405ef617056e8014d79101b",
  "cName": "AZ-Deeply-Rooted",
  "fetchedAt": "2025-12-12T05:05:19.837Z",
  "productCount": 1019,
  "products": [
    {
      "id": "6927508db4851262f629a869",
      "Name": "Product Name",
      "brand": { "name": "Brand Name", ... },
      "type": "Flower",
      "THC": "25%",
      "Prices": [...],
      "Options": [...],
      ...
    }
  ]
 }
 ```
 ---
 ## Integration Points
 ### As a Task Handler
 The organic approach can be integrated as an alternative to curl-based fetching:
 ```typescript
 // In src/tasks/handlers/organic-payload-fetch.ts
 export async function handleOrganicPayloadFetch(ctx: TaskContext): Promise<TaskResult> {
  // Use puppeteer-based capture
  // Save to same payload storage
  // Queue product_refresh task
 }
 ```
 ### Worker Configuration
 Add to job_schedules:
 ```sql
 INSERT INTO job_schedules (name, role, cron_expression)
 VALUES ('organic_product_crawl', 'organic_payload_fetch', '0 */6 * * *');
 ```
 ---
 ## Troubleshooting
 ### HTTP 400 Bad Request
 - Check hash is correct: `ee29c060...`
 - Verify Status is `'Active'` (string, not null)
 ### 0 Products Returned
 - Status was likely `null` or `'All'` - use `'Active'`
 - Check platformId is valid MongoDB ObjectId
 ### Session Not Established
 - Increase timeout on initial page.goto()
 - Check cName is valid (matches embedded-menu URL)
 ### Detection/Blocking
 - StealthPlugin should handle most cases
 - Add random delays between pages
 - Use headless: 'new' (not true/false)
 ---
 ## Files Reference
 | File | Purpose |
 |------|---------|
 | `backend/test-intercept.js` | Proof of concept script |
 | `backend/src/platforms/dutchie/client.ts` | GraphQL hashes, curl implementation |
 | `backend/src/tasks/handlers/payload-fetch.ts` | Current curl-based handler |
 | `backend/src/utils/payload-storage.ts` | Payload save/load utilities |
 ---
 ## See Also
 - `DUTCHIE_CRAWL_WORKFLOW.md` - Full crawl pipeline documentation
 - `TASK_WORKFLOW_2024-12-10.md` - Task system architecture
 - `CLAUDE.md` - Project rules and constraints
--- a/backend/docs/_archive/README.md
+++ b/backend/docs/_archive/README.md
@@ -1,25 +0,0 @@
 # ARCHIVED DOCUMENTATION
 **WARNING: These docs may be outdated or inaccurate.**
 The code has evolved significantly. These docs are kept for historical reference only.
 ## What to Use Instead
 **The single source of truth is:**
 - `CLAUDE.md` (root) - Essential rules and quick reference
 - `docs/CODEBASE_MAP.md` - Current file/directory reference
 ## Why Archive?
 These docs were written during development iterations and may reference:
 - Old file paths that no longer exist
 - Deprecated approaches (hydration, scraper-v2)
 - APIs that have changed
 - Database schemas that evolved
 ## If You Need Details
 1. First check CODEBASE_MAP.md for current file locations
 2. Then read the actual source code
 3. Only use archive docs as a last resort for historical context
--- a/backend/docs/_archive/TASK_WORKFLOW_2024-12-10.md
+++ b/backend/docs/_archive/TASK_WORKFLOW_2024-12-10.md
@@ -1,584 +0,0 @@
 # Task Workflow Documentation
 **Date: 2024-12-10**
 This document describes the complete task/job processing architecture after the 2024-12-10 rewrite.
 ---
 ## Complete Architecture
 ```
 ┌─────────────────────────────────────────────────────────────────────────────────┐
 │                              KUBERNETES CLUSTER                                  │
 ├─────────────────────────────────────────────────────────────────────────────────┤
 │                                                                                  │
 │  ┌─────────────────────────────────────────────────────────────────────────┐    │
 │  │                         API SERVER POD (scraper)                         │    │
 │  │                                                                          │    │
 │  │   ┌──────────────────┐     ┌────────────────────────────────────────┐   │    │
 │  │   │   Express API    │     │         TaskScheduler                   │   │    │
 │  │   │                  │     │   (src/services/task-scheduler.ts)      │   │    │
 │  │   │  /api/job-queue  │     │                                         │   │    │
 │  │   │  /api/tasks      │     │   • Polls every 60s                     │   │    │
 │  │   │  /api/schedules  │     │   • Checks task_schedules table         │   │    │
 │  │   └────────┬─────────┘     │   • SELECT FOR UPDATE SKIP LOCKED       │   │    │
 │  │            │               │   • Generates tasks when due            │   │    │
 │  │            │               └──────────────────┬─────────────────────┘   │    │
 │  │            │                                  │                          │    │
 │  └────────────┼──────────────────────────────────┼──────────────────────────┘    │
 │               │                                  │                               │
 │               │         ┌────────────────────────┘                               │
 │               │         │                                                        │
 │               ▼         ▼                                                        │
 │  ┌─────────────────────────────────────────────────────────────────────────┐    │
 │  │                          POSTGRESQL DATABASE                             │    │
 │  │                                                                          │    │
 │  │   ┌─────────────────────┐        ┌─────────────────────┐                │    │
 │  │   │   task_schedules    │        │    worker_tasks     │                │    │
 │  │   │                     │        │                     │                │    │
 │  │   │ • product_refresh   │───────►│ • pending tasks     │                │    │
 │  │   │ • store_discovery   │ create │ • claimed tasks     │                │    │
 │  │   │ • analytics_refresh │ tasks  │ • running tasks     │                │    │
 │  │   │                     │        │ • completed tasks   │                │    │
 │  │   │ next_run_at         │        │                     │                │    │
 │  │   │ last_run_at         │        │ role, dispensary_id │                │    │
 │  │   │ interval_hours      │        │ priority, status    │                │    │
 │  │   └─────────────────────┘        └──────────┬──────────┘                │    │
 │  │                                             │                            │    │
 │  └─────────────────────────────────────────────┼────────────────────────────┘    │
 │                                                │                                  │
 │                         ┌──────────────────────┘                                  │
 │                         │ Workers poll for tasks                                  │
 │                         │ (SELECT FOR UPDATE SKIP LOCKED)                         │
 │                         ▼                                                         │
 │  ┌─────────────────────────────────────────────────────────────────────────┐    │
 │  │                    WORKER PODS (StatefulSet: scraper-worker)             │    │
 │  │                                                                          │    │
 │  │   ┌─────────────┐  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐    │    │
 │  │   │  Worker 0   │  │  Worker 1   │  │  Worker 2   │  │  Worker N   │    │    │
 │  │   │             │  │             │  │             │  │             │    │    │
 │  │   │ task-worker │  │ task-worker │  │ task-worker │  │ task-worker │    │    │
 │  │   │     .ts     │  │     .ts     │  │     .ts     │  │     .ts     │    │    │
 │  │   └─────────────┘  └─────────────┘  └─────────────┘  └─────────────┘    │    │
 │  │                                                                          │    │
 │  └──────────────────────────────────────────────────────────────────────────┘    │
 │                                                                                  │
 └──────────────────────────────────────────────────────────────────────────────────┘
 ```
 ---
 ## Startup Sequence
 ```
 ┌─────────────────────────────────────────────────────────────────────────────┐
 │                        API SERVER STARTUP                                    │
 ├─────────────────────────────────────────────────────────────────────────────┤
 │                                                                              │
 │   1. Express app initializes                                                 │
 │                    │                                                         │
 │                    ▼                                                         │
 │   2. runAutoMigrations()                                                     │
 │      • Runs pending migrations (including 079_task_schedules.sql)           │
 │                    │                                                         │
 │                    ▼                                                         │
 │   3. initializeMinio() / initializeImageStorage()                           │
 │                    │                                                         │
 │                    ▼                                                         │
 │   4. cleanupOrphanedJobs()                                                   │
 │                    │                                                         │
 │                    ▼                                                         │
 │   5. taskScheduler.start()  ◄─── NEW (per TASK_WORKFLOW_2024-12-10.md)      │
 │      │                                                                       │
 │      ├── Recover stale tasks (workers that died)                            │
 │      ├── Ensure default schedules exist in task_schedules                   │
 │      ├── Check and run any due schedules immediately                        │
 │      └── Start 60-second poll interval                                      │
 │                    │                                                         │
 │                    ▼                                                         │
 │   6. app.listen(PORT)                                                        │
 │                                                                              │
 └─────────────────────────────────────────────────────────────────────────────┘
 ┌─────────────────────────────────────────────────────────────────────────────┐
 │                        WORKER POD STARTUP                                    │
 ├─────────────────────────────────────────────────────────────────────────────┤
 │                                                                              │
 │   1. K8s starts pod from StatefulSet                                        │
 │                    │                                                         │
 │                    ▼                                                         │
 │   2. TaskWorker.constructor()                                               │
 │      • Create DB pool                                                        │
 │      • Create CrawlRotator                                                   │
 │                    │                                                         │
 │                    ▼                                                         │
 │   3. initializeStealth()                                                    │
 │      • Load proxies from DB (REQUIRED - fails if none)                      │
 │      • Wire rotator to Dutchie client                                       │
 │                    │                                                         │
 │                    ▼                                                         │
 │   4. register() with API                                                    │
 │      • Optional - continues if fails                                         │
 │                    │                                                         │
 │                    ▼                                                         │
 │   5. startRegistryHeartbeat() every 30s                                     │
 │                    │                                                         │
 │                    ▼                                                         │
 │   6. processNextTask() loop                                                 │
 │      │                                                                       │
 │      ├── Poll for pending task (FOR UPDATE SKIP LOCKED)                     │
 │      ├── Claim task atomically                                              │
 │      ├── Execute handler (product_refresh, store_discovery, etc.)           │
 │      ├── Mark complete/failed                                               │
 │      ├── Chain next task if applicable                                      │
 │      └── Loop                                                               │
 │                                                                              │
 └─────────────────────────────────────────────────────────────────────────────┘
 ```
 ---
 ## Schedule Flow
 ```
 ┌─────────────────────────────────────────────────────────────────────────────┐
 │                     SCHEDULER POLL (every 60 seconds)                        │
 ├─────────────────────────────────────────────────────────────────────────────┤
 │                                                                              │
 │   BEGIN TRANSACTION                                                          │
 │         │                                                                    │
 │         ▼                                                                    │
 │   SELECT * FROM task_schedules                                              │
 │   WHERE enabled = true AND next_run_at <= NOW()                             │
 │   FOR UPDATE SKIP LOCKED  ◄─── Prevents duplicate execution across replicas │
 │         │                                                                    │
 │         ▼                                                                    │
 │   For each due schedule:                                                     │
 │         │                                                                    │
 │         ├── product_refresh_all                                             │
 │         │   └─► Query dispensaries needing crawl                            │
 │         │   └─► Create product_refresh tasks in worker_tasks                │
 │         │                                                                    │
 │         ├── store_discovery_dutchie                                         │
 │         │   └─► Create single store_discovery task                          │
 │         │                                                                    │
 │         └── analytics_refresh                                                │
 │             └─► Create single analytics_refresh task                        │
 │         │                                                                    │
 │         ▼                                                                    │
 │   UPDATE task_schedules SET                                                  │
 │     last_run_at = NOW(),                                                     │
 │     next_run_at = NOW() + interval_hours                                    │
 │         │                                                                    │
 │         ▼                                                                    │
 │   COMMIT                                                                     │
 │                                                                              │
 └─────────────────────────────────────────────────────────────────────────────┘
 ```
 ---
 ## Task Lifecycle
 ```
                                    ┌──────────┐
                                    │ SCHEDULE │
                                    │   DUE    │
                                    └────┬─────┘
                                         │
                                         ▼
 ┌──────────────┐    claim    ┌──────────────┐    start    ┌──────────────┐
 │   PENDING    │────────────►│   CLAIMED    │────────────►│   RUNNING    │
 └──────────────┘             └──────────────┘             └──────┬───────┘
       ▲                                                        │
       │                                         ┌──────────────┼──────────────┐
       │ retry                                   │              │              │
       │ (if retries < max)                      ▼              ▼              ▼
       │                                  ┌──────────┐   ┌──────────┐   ┌──────────┐
       └──────────────────────────────────│  FAILED  │   │ COMPLETED│   │  STALE   │
                                          └──────────┘   └──────────┘   └────┬─────┘
                                                                              │
                                                              recover_stale_tasks()
                                                                              │
                                                                              ▼
                                                                        ┌──────────┐
                                                                        │ PENDING  │
                                                                        └──────────┘
 ```
 ---
 ## Database Tables
 ### task_schedules (NEW - migration 079)
 Stores schedule definitions. Survives restarts.
 ```sql
 CREATE TABLE task_schedules (
  id SERIAL PRIMARY KEY,
  name VARCHAR(100) NOT NULL UNIQUE,
  role VARCHAR(50) NOT NULL,        -- product_refresh, store_discovery, etc.
  enabled BOOLEAN DEFAULT TRUE,
  interval_hours INTEGER NOT NULL,  -- How often to run
  priority INTEGER DEFAULT 0,       -- Task priority when created
  state_code VARCHAR(2),            -- Optional filter
  last_run_at TIMESTAMPTZ,          -- When it last ran
  next_run_at TIMESTAMPTZ,          -- When it's due next
  last_task_count INTEGER,          -- Tasks created last run
  last_error TEXT                   -- Error message if failed
 );
 ```
 ### worker_tasks (migration 074)
 The task queue. Workers pull from here.
 ```sql
 CREATE TABLE worker_tasks (
  id SERIAL PRIMARY KEY,
  role task_role NOT NULL,          -- What type of work
  dispensary_id INTEGER,            -- Which store (if applicable)
  platform VARCHAR(50),             -- Which platform
  status task_status DEFAULT 'pending',
  priority INTEGER DEFAULT 0,       -- Higher = process first
  scheduled_for TIMESTAMP,          -- Don't process before this time
  worker_id VARCHAR(100),           -- Which worker claimed it
  claimed_at TIMESTAMP,
  started_at TIMESTAMP,
  completed_at TIMESTAMP,
  last_heartbeat_at TIMESTAMP,      -- For stale detection
  result JSONB,
  error_message TEXT,
  retry_count INTEGER DEFAULT 0,
  max_retries INTEGER DEFAULT 3
 );
 ```
 ---
 ## Default Schedules
 | Name | Role | Interval | Priority | Description |
 |------|------|----------|----------|-------------|
 | `payload_fetch_all` | payload_fetch | 4 hours | 0 | Fetch payloads from Dutchie API (chains to product_refresh) |
 | `store_discovery_dutchie` | store_discovery | 24 hours | 5 | Find new Dutchie stores |
 | `analytics_refresh` | analytics_refresh | 6 hours | 0 | Refresh MVs |
 ---
 ## Task Roles
 | Role | Description | Creates Tasks For |
 |------|-------------|-------------------|
 | `payload_fetch` | **NEW** - Fetch from Dutchie API, save to disk | Each dispensary needing crawl |
 | `product_refresh` | **CHANGED** - Read local payload, normalize, upsert to DB | Chained from payload_fetch |
 | `store_discovery` | Find new dispensaries, returns newStoreIds[] | Single task per platform |
 | `entry_point_discovery` | **DEPRECATED** - Resolve platform IDs | No longer used |
 | `product_discovery` | Initial product fetch for new stores | Chained from store_discovery |
 | `analytics_refresh` | Refresh MVs | Single global task |
 ### Payload/Refresh Separation (2024-12-10)
 The crawl workflow is now split into two phases:
 ```
 payload_fetch (scheduled every 4h)
  └─► Hit Dutchie GraphQL API
  └─► Save raw JSON to /storage/payloads/{year}/{month}/{day}/store_{id}_{ts}.json.gz
  └─► Record metadata in raw_crawl_payloads table
  └─► Queue product_refresh task with payload_id
 product_refresh (chained from payload_fetch)
  └─► Load payload from filesystem (NOT from API)
  └─► Normalize via DutchieNormalizer
  └─► Upsert to store_products
  └─► Create snapshots
  └─► Track missing products
  └─► Download images
 ```
 **Benefits:**
 - **Retry-friendly**: If normalize fails, re-run product_refresh without re-crawling
 - **Replay-able**: Run product_refresh against any historical payload
 - **Faster refreshes**: Local file read vs network call
 - **Historical diffs**: Compare payloads to see what changed between crawls
 - **Less API pressure**: Only payload_fetch hits Dutchie
 ---
 ## Task Chaining
 Tasks automatically queue follow-up tasks upon successful completion. This creates two main flows:
 ### Discovery Flow (New Stores)
 When `store_discovery` finds new dispensaries, they automatically get their initial product data:
 ```
 store_discovery
  └─► Discovers new locations via Dutchie GraphQL
  └─► Auto-promotes valid locations to dispensaries table
  └─► Collects newDispensaryIds[] from promotions
  └─► Returns { newStoreIds: [...] } in result
 chainNextTask() detects newStoreIds
  └─► Creates product_discovery task for each new store
 product_discovery
  └─► Calls handlePayloadFetch() internally
  └─► payload_fetch hits Dutchie API
  └─► Saves raw JSON to /storage/payloads/
  └─► Queues product_refresh task with payload_id
 product_refresh
  └─► Loads payload from filesystem
  └─► Normalizes and upserts to store_products
  └─► Creates snapshots, downloads images
 ```
 **Complete Discovery Chain:**
 ```
 store_discovery → product_discovery → payload_fetch → product_refresh
                        (internal call)    (queues next)
 ```
 ### Scheduled Flow (Existing Stores)
 For existing stores, `payload_fetch_all` schedule runs every 4 hours:
 ```
 TaskScheduler (every 60s)
  └─► Checks task_schedules for due schedules
  └─► payload_fetch_all is due
  └─► Generates payload_fetch task for each dispensary
 payload_fetch
  └─► Hits Dutchie GraphQL API
  └─► Saves raw JSON to /storage/payloads/
  └─► Queues product_refresh task with payload_id
 product_refresh
  └─► Loads payload from filesystem (NOT API)
  └─► Normalizes via DutchieNormalizer
  └─► Upserts to store_products
  └─► Creates snapshots
 ```
 **Complete Scheduled Chain:**
 ```
 payload_fetch → product_refresh
  (queues)        (reads local)
 ```
 ### Chaining Implementation
 Task chaining is handled in two places:
 1. **Internal chaining (handler calls handler):**
   - `product_discovery` calls `handlePayloadFetch()` directly
 2. **External chaining (chainNextTask() in task-service.ts):**
   - Called after task completion
   - `store_discovery` → queues `product_discovery` for each newStoreId
 3. **Queue-based chaining (taskService.createTask):**
   - `payload_fetch` queues `product_refresh` with `payload: { payload_id }`
 ---
 ## Payload API Endpoints
 Raw crawl payloads can be accessed via the Payloads API:
 | Endpoint | Method | Description |
 |----------|--------|-------------|
 | `GET /api/payloads` | GET | List payload metadata (paginated) |
 | `GET /api/payloads/:id` | GET | Get payload metadata by ID |
 | `GET /api/payloads/:id/data` | GET | Get full payload JSON (decompressed) |
 | `GET /api/payloads/store/:dispensaryId` | GET | List payloads for a store |
 | `GET /api/payloads/store/:dispensaryId/latest` | GET | Get latest payload for a store |
 | `GET /api/payloads/store/:dispensaryId/diff` | GET | Diff two payloads for changes |
 ### Payload Diff Response
 The diff endpoint returns:
 ```json
 {
  "success": true,
  "from": { "id": 123, "fetchedAt": "...", "productCount": 100 },
  "to": { "id": 456, "fetchedAt": "...", "productCount": 105 },
  "diff": {
    "added": 10,
    "removed": 5,
    "priceChanges": 8,
    "stockChanges": 12
  },
  "details": {
    "added": [...],
    "removed": [...],
    "priceChanges": [...],
    "stockChanges": [...]
  }
 }
 ```
 ---
 ## API Endpoints
 ### Schedules (NEW)
 | Endpoint | Method | Description |
 |----------|--------|-------------|
 | `GET /api/schedules` | GET | List all schedules |
 | `PUT /api/schedules/:id` | PUT | Update schedule |
 | `POST /api/schedules/:id/trigger` | POST | Run schedule immediately |
 ### Task Creation (rewired 2024-12-10)
 | Endpoint | Method | Description |
 |----------|--------|-------------|
 | `POST /api/job-queue/enqueue` | POST | Create single task |
 | `POST /api/job-queue/enqueue-batch` | POST | Create batch tasks |
 | `POST /api/job-queue/enqueue-state` | POST | Create tasks for state |
 | `POST /api/tasks` | POST | Direct task creation |
 ### Task Management
 | Endpoint | Method | Description |
 |----------|--------|-------------|
 | `GET /api/tasks` | GET | List tasks |
 | `GET /api/tasks/:id` | GET | Get single task |
 | `GET /api/tasks/counts` | GET | Task counts by status |
 | `POST /api/tasks/recover-stale` | POST | Recover stale tasks |
 ---
 ## Key Files
 | File | Purpose |
 |------|---------|
 | `src/services/task-scheduler.ts` | **NEW** - DB-driven scheduler |
 | `src/tasks/task-worker.ts` | Worker that processes tasks |
 | `src/tasks/task-service.ts` | Task CRUD operations |
 | `src/tasks/handlers/payload-fetch.ts` | **NEW** - Fetches from API, saves to disk |
 | `src/tasks/handlers/product-refresh.ts` | **CHANGED** - Reads from disk, processes to DB |
 | `src/utils/payload-storage.ts` | **NEW** - Payload save/load utilities |
 | `src/routes/tasks.ts` | Task API endpoints |
 | `src/routes/job-queue.ts` | Job Queue UI endpoints (rewired) |
 | `migrations/079_task_schedules.sql` | Schedule table |
 | `migrations/080_raw_crawl_payloads.sql` | Payload metadata table |
 | `migrations/081_payload_fetch_columns.sql` | payload, last_fetch_at columns |
 | `migrations/074_worker_task_queue.sql` | Task queue table |
 ---
 ## Legacy Code (DEPRECATED)
 | File | Status | Replacement |
 |------|--------|-------------|
 | `src/services/scheduler.ts` | DEPRECATED | `task-scheduler.ts` |
 | `dispensary_crawl_jobs` table | ORPHANED | `worker_tasks` |
 | `job_schedules` table | LEGACY | `task_schedules` |
 ---
 ## Dashboard Integration
 Both pages remain wired to the dashboard:
 | Page | Data Source | Actions |
 |------|-------------|---------|
 | **Job Queue** | `worker_tasks`, `task_schedules` | Create tasks, view schedules |
 | **Task Queue** | `worker_tasks` | View tasks, recover stale |
 ---
 ## Multi-Replica Safety
 The scheduler uses `SELECT FOR UPDATE SKIP LOCKED` to ensure:
 1. **Only one replica** executes a schedule at a time
 2. **No duplicate tasks** created
 3. **Survives pod restarts** - state in DB, not memory
 4. **Self-healing** - recovers stale tasks on startup
 ```sql
 -- This query is atomic across all API server replicas
 SELECT * FROM task_schedules
 WHERE enabled = true AND next_run_at <= NOW()
 FOR UPDATE SKIP LOCKED
 ```
 ---
 ## Worker Scaling (K8s)
 Workers run as a StatefulSet in Kubernetes. You can scale from the admin UI or CLI.
 ### From Admin UI
 The Workers page (`/admin/workers`) provides:
 - Current replica count display
 - Scale up/down buttons
 - Target replica input
 ### API Endpoints
 | Endpoint | Method | Description |
 |----------|--------|-------------|
 | `GET /api/workers/k8s/replicas` | GET | Get current/desired replica counts |
 | `POST /api/workers/k8s/scale` | POST | Scale to N replicas (body: `{ replicas: N }`) |
 ### From CLI
 ```bash
 # View current replicas
 kubectl get statefulset scraper-worker -n dispensary-scraper
 # Scale to 10 workers
 kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=10
 # Scale down to 3 workers
 kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=3
 ```
 ### Configuration
 Environment variables for the API server:
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `K8S_NAMESPACE` | `dispensary-scraper` | Kubernetes namespace |
 | `K8S_WORKER_STATEFULSET` | `scraper-worker` | StatefulSet name |
 ### RBAC Requirements
 The API server pod needs these K8s permissions:
 ```yaml
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
  name: worker-scaler
  namespace: dispensary-scraper
 rules:
 - apiGroups: ["apps"]
  resources: ["statefulsets"]
  verbs: ["get", "patch"]
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
 metadata:
  name: scraper-worker-scaler
  namespace: dispensary-scraper
 subjects:
 - kind: ServiceAccount
  name: default
  namespace: dispensary-scraper
 roleRef:
  kind: Role
  name: worker-scaler
  apiGroup: rbac.authorization.k8s.io
 ```
--- a/backend/k8s/scraper-worker-statefulset.yaml
+++ b/backend/k8s/scraper-worker-statefulset.yaml
@@ -1,77 +0,0 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: scraper-worker
  namespace: dispensary-scraper
  labels:
    app: scraper-worker
 spec:
  clusterIP: None  # Headless service required for StatefulSet
  selector:
    app: scraper-worker
  ports:
  - port: 3010
    name: http
 ---
 apiVersion: apps/v1
 kind: StatefulSet
 metadata:
  name: scraper-worker
  namespace: dispensary-scraper
 spec:
  serviceName: scraper-worker
  replicas: 8
  podManagementPolicy: Parallel  # Start all pods at once
  updateStrategy:
    type: OnDelete  # Pods only update when manually deleted - no automatic restarts
  selector:
    matchLabels:
      app: scraper-worker
  template:
    metadata:
      labels:
        app: scraper-worker
    spec:
      terminationGracePeriodSeconds: 60
      imagePullSecrets:
      - name: regcred
      containers:
      - name: worker
        image: code.cannabrands.app/creationshop/dispensary-scraper:latest
        imagePullPolicy: Always
        command: ["node"]
        args: ["dist/tasks/task-worker.js"]
        env:
        - name: WORKER_MODE
          value: "true"
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: MAX_CONCURRENT_TASKS
          value: "50"
        - name: API_BASE_URL
          value: http://scraper
        - name: NODE_OPTIONS
          value: --max-old-space-size=1500
        envFrom:
        - configMapRef:
            name: scraper-config
        - secretRef:
            name: scraper-secrets
        resources:
          requests:
            cpu: 100m
            memory: 1Gi
          limits:
            cpu: 500m
            memory: 2Gi
        livenessProbe:
          exec:
            command:
            - /bin/sh
            - -c
            - pgrep -f 'task-worker' > /dev/null
          initialDelaySeconds: 10
          periodSeconds: 30
          failureThreshold: 3
--- a/backend/migrations/074_worker_commands.sql
+++ b/backend/migrations/074_worker_commands.sql
@@ -1,27 +0,0 @@
 -- Migration: Worker Commands Table
 -- Purpose: Store commands for workers (decommission, etc.)
 -- Workers poll this table after each task to check for commands
 CREATE TABLE IF NOT EXISTS worker_commands (
  id SERIAL PRIMARY KEY,
  worker_id TEXT NOT NULL,
  command TEXT NOT NULL,  -- 'decommission', 'pause', 'resume'
  reason TEXT,
  issued_by TEXT,
  issued_at TIMESTAMPTZ DEFAULT NOW(),
  acknowledged_at TIMESTAMPTZ,
  executed_at TIMESTAMPTZ,
  status TEXT DEFAULT 'pending'  -- 'pending', 'acknowledged', 'executed', 'cancelled'
 );
 -- Index for worker lookups
 CREATE INDEX IF NOT EXISTS idx_worker_commands_worker_id ON worker_commands(worker_id);
 CREATE INDEX IF NOT EXISTS idx_worker_commands_pending ON worker_commands(worker_id, status) WHERE status = 'pending';
 -- Add decommission_requested column to worker_registry for quick checks
 ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested BOOLEAN DEFAULT FALSE;
 ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_reason TEXT;
 ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested_at TIMESTAMPTZ;
 -- Comment
 COMMENT ON TABLE worker_commands IS 'Commands issued to workers (decommission after task, pause, etc.)';
--- a/backend/migrations/078_proxy_consecutive_403.sql
+++ b/backend/migrations/078_proxy_consecutive_403.sql
@@ -1,8 +0,0 @@
 -- Migration 078: Add consecutive_403_count to proxies table
 -- Per workflow-12102025.md: Track consecutive 403s per proxy
 -- After 3 consecutive 403s with different fingerprints → disable proxy
 ALTER TABLE proxies ADD COLUMN IF NOT EXISTS consecutive_403_count INTEGER DEFAULT 0;
 -- Add comment explaining the column
 COMMENT ON COLUMN proxies.consecutive_403_count IS 'Tracks consecutive 403 blocks. Reset to 0 on success. Proxy disabled at 3.';
--- a/backend/migrations/079_task_schedules.sql
+++ b/backend/migrations/079_task_schedules.sql
@@ -1,49 +0,0 @@
 -- Migration 079: Task Schedules for Database-Driven Scheduler
 -- Per TASK_WORKFLOW_2024-12-10.md: Replaces node-cron with DB-driven scheduling
 --
 -- 2024-12-10: Created for reliable, multi-replica-safe task scheduling
 -- task_schedules: Stores schedule definitions and state
 CREATE TABLE IF NOT EXISTS task_schedules (
  id SERIAL PRIMARY KEY,
  name VARCHAR(100) NOT NULL UNIQUE,
  role VARCHAR(50) NOT NULL,  -- TaskRole: product_refresh, store_discovery, etc.
  description TEXT,
  -- Schedule configuration
  enabled BOOLEAN DEFAULT TRUE,
  interval_hours INTEGER NOT NULL DEFAULT 4,
  priority INTEGER DEFAULT 0,
  -- Optional scope filters
  state_code VARCHAR(2),  -- NULL = all states
  platform VARCHAR(50),   -- NULL = all platforms
  -- Execution state (updated by scheduler)
  last_run_at TIMESTAMPTZ,
  next_run_at TIMESTAMPTZ,
  last_task_count INTEGER DEFAULT 0,
  last_error TEXT,
  created_at TIMESTAMPTZ DEFAULT NOW(),
  updated_at TIMESTAMPTZ DEFAULT NOW()
 );
 -- Indexes for scheduler queries
 CREATE INDEX IF NOT EXISTS idx_task_schedules_enabled ON task_schedules(enabled) WHERE enabled = TRUE;
 CREATE INDEX IF NOT EXISTS idx_task_schedules_next_run ON task_schedules(next_run_at) WHERE enabled = TRUE;
 -- Insert default schedules
 INSERT INTO task_schedules (name, role, interval_hours, priority, description, next_run_at)
 VALUES
  ('product_refresh_all', 'product_refresh', 4, 0, 'Generate product refresh tasks for all crawl-enabled stores every 4 hours', NOW()),
  ('store_discovery_dutchie', 'store_discovery', 24, 5, 'Discover new Dutchie stores daily', NOW()),
  ('analytics_refresh', 'analytics_refresh', 6, 0, 'Refresh analytics materialized views every 6 hours', NOW())
 ON CONFLICT (name) DO NOTHING;
 -- Comment for documentation
 COMMENT ON TABLE task_schedules IS 'Database-driven task scheduler configuration. Per TASK_WORKFLOW_2024-12-10.md:
 - Schedules persist in DB (survive restarts)
 - Uses SELECT FOR UPDATE SKIP LOCKED for multi-replica safety
 - Scheduler polls every 60s and executes due schedules
 - Creates tasks in worker_tasks for task-worker.ts to process';
--- a/backend/migrations/080_raw_crawl_payloads.sql
+++ b/backend/migrations/080_raw_crawl_payloads.sql
@@ -1,58 +0,0 @@
 -- Migration 080: Raw Crawl Payloads Metadata Table
 -- Per TASK_WORKFLOW_2024-12-10.md: Store full GraphQL payloads for historical analysis
 --
 -- Design Pattern: Metadata/Payload Separation
 -- - Metadata (this table): Small, indexed, queryable
 -- - Payload (filesystem): Gzipped JSON at storage_path
 --
 -- Benefits:
 -- - Compare any two crawls to see what changed
 -- - Replay/re-normalize historical data if logic changes
 -- - Debug issues by seeing exactly what the API returned
 -- - DB stays small, backups stay fast
 --
 -- Storage location: /storage/payloads/{year}/{month}/{day}/store_{id}_{timestamp}.json.gz
 -- Compression: ~90% reduction (1.5MB -> 150KB per crawl)
 CREATE TABLE IF NOT EXISTS raw_crawl_payloads (
  id SERIAL PRIMARY KEY,
  -- Links to crawl tracking
  crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
  dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
  -- File location (gzipped JSON)
  storage_path TEXT NOT NULL,
  -- Metadata for quick queries without loading file
  product_count INTEGER NOT NULL DEFAULT 0,
  size_bytes INTEGER,                    -- Compressed size
  size_bytes_raw INTEGER,                -- Uncompressed size
  -- Timestamps
  fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
  created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
  -- Optional: checksum for integrity verification
  checksum_sha256 VARCHAR(64)
 );
 -- Indexes for common queries
 CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary
  ON raw_crawl_payloads(dispensary_id);
 CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary_fetched
  ON raw_crawl_payloads(dispensary_id, fetched_at DESC);
 CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_fetched
  ON raw_crawl_payloads(fetched_at DESC);
 CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_crawl_run
  ON raw_crawl_payloads(crawl_run_id)
  WHERE crawl_run_id IS NOT NULL;
 -- Comments
 COMMENT ON TABLE raw_crawl_payloads IS 'Metadata for raw GraphQL payloads stored on filesystem. Per TASK_WORKFLOW_2024-12-10.md: Full payloads enable historical diffs and replay.';
 COMMENT ON COLUMN raw_crawl_payloads.storage_path IS 'Path to gzipped JSON file, e.g. /storage/payloads/2024/12/10/store_123_1702234567.json.gz';
 COMMENT ON COLUMN raw_crawl_payloads.size_bytes IS 'Compressed file size in bytes';
 COMMENT ON COLUMN raw_crawl_payloads.size_bytes_raw IS 'Uncompressed payload size in bytes';
--- a/backend/migrations/081_payload_fetch_columns.sql
+++ b/backend/migrations/081_payload_fetch_columns.sql
@@ -1,37 +0,0 @@
 -- Migration 081: Payload Fetch Columns
 -- Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing
 --
 -- New architecture:
 -- - payload_fetch: Hits Dutchie API, saves raw payload to disk
 -- - product_refresh: Reads local payload, normalizes, upserts to DB
 --
 -- This migration adds:
 -- 1. payload column to worker_tasks (for task chaining data)
 -- 2. processed_at column to raw_crawl_payloads (track when payload was processed)
 -- 3. last_fetch_at column to dispensaries (track when last payload was fetched)
 -- Add payload column to worker_tasks for task chaining
 -- Used by payload_fetch to pass payload_id to product_refresh
 ALTER TABLE worker_tasks
 ADD COLUMN IF NOT EXISTS payload JSONB DEFAULT NULL;
 COMMENT ON COLUMN worker_tasks.payload IS 'Per TASK_WORKFLOW_2024-12-10.md: Task chaining data (e.g., payload_id from payload_fetch to product_refresh)';
 -- Add processed_at to raw_crawl_payloads
 -- Tracks when the payload was processed by product_refresh
 ALTER TABLE raw_crawl_payloads
 ADD COLUMN IF NOT EXISTS processed_at TIMESTAMPTZ DEFAULT NULL;
 COMMENT ON COLUMN raw_crawl_payloads.processed_at IS 'When this payload was processed by product_refresh handler';
 -- Index for finding unprocessed payloads
 CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_unprocessed
  ON raw_crawl_payloads(dispensary_id, fetched_at DESC)
  WHERE processed_at IS NULL;
 -- Add last_fetch_at to dispensaries
 -- Tracks when the last payload was fetched (separate from last_crawl_at which is when processing completed)
 ALTER TABLE dispensaries
 ADD COLUMN IF NOT EXISTS last_fetch_at TIMESTAMPTZ DEFAULT NULL;
 COMMENT ON COLUMN dispensaries.last_fetch_at IS 'Per TASK_WORKFLOW_2024-12-10.md: When last payload was fetched from API (separate from last_crawl_at which is when processing completed)';
--- a/backend/migrations/082_proxy_notification_trigger.sql
+++ b/backend/migrations/082_proxy_notification_trigger.sql
@@ -1,27 +0,0 @@
 -- Migration: 082_proxy_notification_trigger
 -- Date: 2024-12-11
 -- Description: Add PostgreSQL NOTIFY trigger to alert workers when proxies are added
 -- Create function to notify workers when active proxy is added/activated
 CREATE OR REPLACE FUNCTION notify_proxy_added()
 RETURNS TRIGGER AS $$
 BEGIN
  -- Only notify if proxy is active
  IF NEW.active = true THEN
    PERFORM pg_notify('proxy_added', NEW.id::text);
  END IF;
  RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 -- Drop existing trigger if any
 DROP TRIGGER IF EXISTS proxy_added_trigger ON proxies;
 -- Create trigger on insert and update of active column
 CREATE TRIGGER proxy_added_trigger
 AFTER INSERT OR UPDATE OF active ON proxies
 FOR EACH ROW
 EXECUTE FUNCTION notify_proxy_added();
 COMMENT ON FUNCTION notify_proxy_added() IS
 'Sends PostgreSQL NOTIFY to proxy_added channel when an active proxy is added or activated. Workers LISTEN on this channel to wake up immediately.';
--- a/backend/migrations/083_discovery_runs.sql
+++ b/backend/migrations/083_discovery_runs.sql
@@ -1,88 +0,0 @@
 -- Migration 083: Discovery Run Tracking
 -- Tracks progress of store discovery runs step-by-step
 -- Main discovery runs table
 CREATE TABLE IF NOT EXISTS discovery_runs (
  id SERIAL PRIMARY KEY,
  platform VARCHAR(50) NOT NULL DEFAULT 'dutchie',
  status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed
  started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
  finished_at TIMESTAMPTZ,
  task_id INTEGER REFERENCES worker_task_queue(id),
  -- Totals
  states_total INTEGER DEFAULT 0,
  states_completed INTEGER DEFAULT 0,
  locations_discovered INTEGER DEFAULT 0,
  locations_promoted INTEGER DEFAULT 0,
  new_store_ids INTEGER[] DEFAULT '{}',
  -- Error info
  error_message TEXT,
  created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
 );
 -- Per-state progress within a run
 CREATE TABLE IF NOT EXISTS discovery_run_states (
  id SERIAL PRIMARY KEY,
  run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
  state_code VARCHAR(2) NOT NULL,
  status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, completed, failed
  started_at TIMESTAMPTZ,
  finished_at TIMESTAMPTZ,
  -- Results
  cities_found INTEGER DEFAULT 0,
  locations_found INTEGER DEFAULT 0,
  locations_upserted INTEGER DEFAULT 0,
  new_dispensary_ids INTEGER[] DEFAULT '{}',
  -- Error info
  error_message TEXT,
  created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
  UNIQUE(run_id, state_code)
 );
 -- Step-by-step log for detailed progress tracking
 CREATE TABLE IF NOT EXISTS discovery_run_steps (
  id SERIAL PRIMARY KEY,
  run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
  state_code VARCHAR(2),
  step_name VARCHAR(100) NOT NULL,
  status VARCHAR(20) NOT NULL DEFAULT 'started', -- started, completed, failed
  started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
  finished_at TIMESTAMPTZ,
  -- Details (JSON for flexibility)
  details JSONB DEFAULT '{}',
  created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
 );
 -- Indexes for querying
 CREATE INDEX IF NOT EXISTS idx_discovery_runs_status ON discovery_runs(status);
 CREATE INDEX IF NOT EXISTS idx_discovery_runs_platform ON discovery_runs(platform);
 CREATE INDEX IF NOT EXISTS idx_discovery_runs_started_at ON discovery_runs(started_at DESC);
 CREATE INDEX IF NOT EXISTS idx_discovery_run_states_run_id ON discovery_run_states(run_id);
 CREATE INDEX IF NOT EXISTS idx_discovery_run_steps_run_id ON discovery_run_steps(run_id);
 -- View for latest run status per platform
 CREATE OR REPLACE VIEW v_latest_discovery_runs AS
 SELECT DISTINCT ON (platform)
  id,
  platform,
  status,
  started_at,
  finished_at,
  states_total,
  states_completed,
  locations_discovered,
  locations_promoted,
  array_length(new_store_ids, 1) as new_stores_count,
  error_message,
  EXTRACT(EPOCH FROM (COALESCE(finished_at, NOW()) - started_at)) as duration_seconds
 FROM discovery_runs
 ORDER BY platform, started_at DESC;
--- a/backend/migrations/084_dual_transport_preflight.sql
+++ b/backend/migrations/084_dual_transport_preflight.sql
@@ -1,253 +0,0 @@
 -- Migration 084: Dual Transport Preflight System
 -- Workers run both curl and http (Puppeteer) preflights on startup
 -- Tasks can require a specific transport method
 -- ===================================================================
 -- PART 1: Add preflight columns to worker_registry
 -- ===================================================================
 -- Preflight status for curl/axios transport (proxy-based)
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS preflight_curl_status VARCHAR(20) DEFAULT 'pending';
 -- Preflight status for http/Puppeteer transport (browser-based)
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS preflight_http_status VARCHAR(20) DEFAULT 'pending';
 -- Timestamps for when each preflight completed
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS preflight_curl_at TIMESTAMPTZ;
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS preflight_http_at TIMESTAMPTZ;
 -- Error messages for failed preflights
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS preflight_curl_error TEXT;
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS preflight_http_error TEXT;
 -- Response time for successful preflights (ms)
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS preflight_curl_ms INTEGER;
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS preflight_http_ms INTEGER;
 -- Constraints for preflight status values
 ALTER TABLE worker_registry
 DROP CONSTRAINT IF EXISTS valid_preflight_curl_status;
 ALTER TABLE worker_registry
 ADD CONSTRAINT valid_preflight_curl_status
 CHECK (preflight_curl_status IN ('pending', 'passed', 'failed', 'skipped'));
 ALTER TABLE worker_registry
 DROP CONSTRAINT IF EXISTS valid_preflight_http_status;
 ALTER TABLE worker_registry
 ADD CONSTRAINT valid_preflight_http_status
 CHECK (preflight_http_status IN ('pending', 'passed', 'failed', 'skipped'));
 -- ===================================================================
 -- PART 2: Add method column to worker_tasks
 -- ===================================================================
 -- Transport method requirement for the task
 -- NULL = no preference (any worker can claim)
 -- 'curl' = requires curl/axios transport (proxy-based, fast)
 -- 'http' = requires http/Puppeteer transport (browser-based, anti-detect)
 ALTER TABLE worker_tasks
 ADD COLUMN IF NOT EXISTS method VARCHAR(10);
 -- Constraint for valid method values
 ALTER TABLE worker_tasks
 DROP CONSTRAINT IF EXISTS valid_task_method;
 ALTER TABLE worker_tasks
 ADD CONSTRAINT valid_task_method
 CHECK (method IS NULL OR method IN ('curl', 'http'));
 -- Index for method-based task claiming
 CREATE INDEX IF NOT EXISTS idx_worker_tasks_method
  ON worker_tasks(method)
  WHERE status = 'pending';
 -- Set default method for all existing pending tasks to 'http'
 -- ALL current tasks require Puppeteer/browser-based transport
 UPDATE worker_tasks
 SET method = 'http'
 WHERE method IS NULL;
 -- ===================================================================
 -- PART 3: Update claim_task function for method compatibility
 -- ===================================================================
 CREATE OR REPLACE FUNCTION claim_task(
  p_role VARCHAR(50),
  p_worker_id VARCHAR(100),
  p_curl_passed BOOLEAN DEFAULT TRUE,
  p_http_passed BOOLEAN DEFAULT FALSE
 ) RETURNS worker_tasks AS $$
 DECLARE
  claimed_task worker_tasks;
 BEGIN
  UPDATE worker_tasks
  SET
    status = 'claimed',
    worker_id = p_worker_id,
    claimed_at = NOW(),
    updated_at = NOW()
  WHERE id = (
    SELECT id FROM worker_tasks
    WHERE role = p_role
      AND status = 'pending'
      AND (scheduled_for IS NULL OR scheduled_for <= NOW())
      -- Method compatibility: worker must have passed the required preflight
      AND (
        method IS NULL  -- No preference, any worker can claim
        OR (method = 'curl' AND p_curl_passed = TRUE)
        OR (method = 'http' AND p_http_passed = TRUE)
      )
      -- Exclude stores that already have an active task
      AND (dispensary_id IS NULL OR dispensary_id NOT IN (
        SELECT dispensary_id FROM worker_tasks
        WHERE status IN ('claimed', 'running')
        AND dispensary_id IS NOT NULL
      ))
    ORDER BY priority DESC, created_at ASC
    LIMIT 1
    FOR UPDATE SKIP LOCKED
  )
  RETURNING * INTO claimed_task;
  RETURN claimed_task;
 END;
 $$ LANGUAGE plpgsql;
 -- ===================================================================
 -- PART 4: Update v_active_workers view
 -- ===================================================================
 DROP VIEW IF EXISTS v_active_workers;
 CREATE VIEW v_active_workers AS
 SELECT
  wr.id,
  wr.worker_id,
  wr.friendly_name,
  wr.role,
  wr.status,
  wr.pod_name,
  wr.hostname,
  wr.started_at,
  wr.last_heartbeat_at,
  wr.last_task_at,
  wr.tasks_completed,
  wr.tasks_failed,
  wr.current_task_id,
  -- Preflight status
  wr.preflight_curl_status,
  wr.preflight_http_status,
  wr.preflight_curl_at,
  wr.preflight_http_at,
  wr.preflight_curl_error,
  wr.preflight_http_error,
  wr.preflight_curl_ms,
  wr.preflight_http_ms,
  -- Computed fields
  EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
  CASE
    WHEN wr.status = 'offline' THEN 'offline'
    WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
    WHEN wr.current_task_id IS NOT NULL THEN 'busy'
    ELSE 'ready'
  END as health_status,
  -- Capability flags (can this worker handle curl/http tasks?)
  (wr.preflight_curl_status = 'passed') as can_curl,
  (wr.preflight_http_status = 'passed') as can_http
 FROM worker_registry wr
 WHERE wr.status != 'terminated'
 ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
 -- ===================================================================
 -- PART 5: View for task queue with method info
 -- ===================================================================
 DROP VIEW IF EXISTS v_task_history;
 CREATE VIEW v_task_history AS
 SELECT
  t.id,
  t.role,
  t.dispensary_id,
  d.name as dispensary_name,
  t.platform,
  t.status,
  t.priority,
  t.method,
  t.worker_id,
  t.scheduled_for,
  t.claimed_at,
  t.started_at,
  t.completed_at,
  t.error_message,
  t.retry_count,
  t.created_at,
  EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
 FROM worker_tasks t
 LEFT JOIN dispensaries d ON d.id = t.dispensary_id
 ORDER BY t.created_at DESC;
 -- ===================================================================
 -- PART 6: Helper function to update worker preflight status
 -- ===================================================================
 CREATE OR REPLACE FUNCTION update_worker_preflight(
  p_worker_id VARCHAR(100),
  p_transport VARCHAR(10),  -- 'curl' or 'http'
  p_status VARCHAR(20),     -- 'passed', 'failed', 'skipped'
  p_response_ms INTEGER DEFAULT NULL,
  p_error TEXT DEFAULT NULL
 ) RETURNS VOID AS $$
 BEGIN
  IF p_transport = 'curl' THEN
    UPDATE worker_registry
    SET
      preflight_curl_status = p_status,
      preflight_curl_at = NOW(),
      preflight_curl_ms = p_response_ms,
      preflight_curl_error = p_error,
      updated_at = NOW()
    WHERE worker_id = p_worker_id;
  ELSIF p_transport = 'http' THEN
    UPDATE worker_registry
    SET
      preflight_http_status = p_status,
      preflight_http_at = NOW(),
      preflight_http_ms = p_response_ms,
      preflight_http_error = p_error,
      updated_at = NOW()
    WHERE worker_id = p_worker_id;
  END IF;
 END;
 $$ LANGUAGE plpgsql;
 -- ===================================================================
 -- Comments
 -- ===================================================================
 COMMENT ON COLUMN worker_registry.preflight_curl_status IS 'Status of curl/axios preflight: pending, passed, failed, skipped';
 COMMENT ON COLUMN worker_registry.preflight_http_status IS 'Status of http/Puppeteer preflight: pending, passed, failed, skipped';
 COMMENT ON COLUMN worker_registry.preflight_curl_at IS 'When curl preflight completed';
 COMMENT ON COLUMN worker_registry.preflight_http_at IS 'When http preflight completed';
 COMMENT ON COLUMN worker_registry.preflight_curl_error IS 'Error message if curl preflight failed';
 COMMENT ON COLUMN worker_registry.preflight_http_error IS 'Error message if http preflight failed';
 COMMENT ON COLUMN worker_registry.preflight_curl_ms IS 'Response time of successful curl preflight (ms)';
 COMMENT ON COLUMN worker_registry.preflight_http_ms IS 'Response time of successful http preflight (ms)';
 COMMENT ON COLUMN worker_tasks.method IS 'Transport method required: NULL=any, curl=proxy-based, http=browser-based';
 COMMENT ON FUNCTION claim_task IS 'Atomically claim a task, respecting method requirements and per-store locking';
 COMMENT ON FUNCTION update_worker_preflight IS 'Update a workers preflight status for a given transport';
--- a/backend/migrations/085_preflight_ip_fingerprint.sql
+++ b/backend/migrations/085_preflight_ip_fingerprint.sql
@@ -1,168 +0,0 @@
 -- Migration 085: Add IP and fingerprint columns for preflight reporting
 -- These columns were missing from migration 084
 -- ===================================================================
 -- PART 1: Add IP address columns to worker_registry
 -- ===================================================================
 -- IP address detected during curl/axios preflight
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS curl_ip VARCHAR(45);
 -- IP address detected during http/Puppeteer preflight
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS http_ip VARCHAR(45);
 -- ===================================================================
 -- PART 2: Add fingerprint data column
 -- ===================================================================
 -- Browser fingerprint data captured during Puppeteer preflight
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS fingerprint_data JSONB;
 -- ===================================================================
 -- PART 3: Add combined preflight status/timestamp for convenience
 -- ===================================================================
 -- Overall preflight status (computed from both transports)
 -- Values: 'pending', 'passed', 'partial', 'failed'
 --   - 'pending': neither transport tested
 --   - 'passed': both transports passed (or http passed for browser-only)
 --   - 'partial': at least one passed
 --   - 'failed': no transport passed
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS preflight_status VARCHAR(20) DEFAULT 'pending';
 -- Most recent preflight completion timestamp
 ALTER TABLE worker_registry
 ADD COLUMN IF NOT EXISTS preflight_at TIMESTAMPTZ;
 -- ===================================================================
 -- PART 4: Update function to set preflight status
 -- ===================================================================
 CREATE OR REPLACE FUNCTION update_worker_preflight(
  p_worker_id VARCHAR(100),
  p_transport VARCHAR(10),  -- 'curl' or 'http'
  p_status VARCHAR(20),     -- 'passed', 'failed', 'skipped'
  p_ip VARCHAR(45) DEFAULT NULL,
  p_response_ms INTEGER DEFAULT NULL,
  p_error TEXT DEFAULT NULL,
  p_fingerprint JSONB DEFAULT NULL
 ) RETURNS VOID AS $$
 DECLARE
  v_curl_status VARCHAR(20);
  v_http_status VARCHAR(20);
  v_overall_status VARCHAR(20);
 BEGIN
  IF p_transport = 'curl' THEN
    UPDATE worker_registry
    SET
      preflight_curl_status = p_status,
      preflight_curl_at = NOW(),
      preflight_curl_ms = p_response_ms,
      preflight_curl_error = p_error,
      curl_ip = p_ip,
      updated_at = NOW()
    WHERE worker_id = p_worker_id;
  ELSIF p_transport = 'http' THEN
    UPDATE worker_registry
    SET
      preflight_http_status = p_status,
      preflight_http_at = NOW(),
      preflight_http_ms = p_response_ms,
      preflight_http_error = p_error,
      http_ip = p_ip,
      fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
      updated_at = NOW()
    WHERE worker_id = p_worker_id;
  END IF;
  -- Update overall preflight status
  SELECT preflight_curl_status, preflight_http_status
  INTO v_curl_status, v_http_status
  FROM worker_registry
  WHERE worker_id = p_worker_id;
  -- Compute overall status
  IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
    v_overall_status := 'passed';
  ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
    v_overall_status := 'partial';
  ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
    v_overall_status := 'failed';
  ELSE
    v_overall_status := 'pending';
  END IF;
  UPDATE worker_registry
  SET
    preflight_status = v_overall_status,
    preflight_at = NOW()
  WHERE worker_id = p_worker_id;
 END;
 $$ LANGUAGE plpgsql;
 -- ===================================================================
 -- PART 5: Update v_active_workers view
 -- ===================================================================
 DROP VIEW IF EXISTS v_active_workers;
 CREATE VIEW v_active_workers AS
 SELECT
  wr.id,
  wr.worker_id,
  wr.friendly_name,
  wr.role,
  wr.status,
  wr.pod_name,
  wr.hostname,
  wr.started_at,
  wr.last_heartbeat_at,
  wr.last_task_at,
  wr.tasks_completed,
  wr.tasks_failed,
  wr.current_task_id,
  -- IP addresses from preflights
  wr.curl_ip,
  wr.http_ip,
  -- Combined preflight status
  wr.preflight_status,
  wr.preflight_at,
  -- Detailed preflight status per transport
  wr.preflight_curl_status,
  wr.preflight_http_status,
  wr.preflight_curl_at,
  wr.preflight_http_at,
  wr.preflight_curl_error,
  wr.preflight_http_error,
  wr.preflight_curl_ms,
  wr.preflight_http_ms,
  -- Fingerprint data
  wr.fingerprint_data,
  -- Computed fields
  EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
  CASE
    WHEN wr.status = 'offline' THEN 'offline'
    WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
    WHEN wr.current_task_id IS NOT NULL THEN 'busy'
    ELSE 'ready'
  END as health_status,
  -- Capability flags (can this worker handle curl/http tasks?)
  (wr.preflight_curl_status = 'passed') as can_curl,
  (wr.preflight_http_status = 'passed') as can_http
 FROM worker_registry wr
 WHERE wr.status != 'terminated'
 ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
 -- ===================================================================
 -- Comments
 -- ===================================================================
 COMMENT ON COLUMN worker_registry.curl_ip IS 'IP address detected during curl/axios preflight';
 COMMENT ON COLUMN worker_registry.http_ip IS 'IP address detected during Puppeteer preflight';
 COMMENT ON COLUMN worker_registry.fingerprint_data IS 'Browser fingerprint captured during Puppeteer preflight';
 COMMENT ON COLUMN worker_registry.preflight_status IS 'Overall preflight status: pending, passed, partial, failed';
 COMMENT ON COLUMN worker_registry.preflight_at IS 'Most recent preflight completion timestamp';
--- a/backend/migrations/086_proxy_url_column.sql
+++ b/backend/migrations/086_proxy_url_column.sql
@@ -1,10 +0,0 @@
 -- Migration 086: Add proxy_url column for alternative URL formats
 -- Some proxy providers use non-standard URL formats (e.g., host:port:user:pass)
 -- This column allows storing the raw URL directly
 -- Add proxy_url column - if set, used directly instead of constructing from parts
 ALTER TABLE proxies
 ADD COLUMN IF NOT EXISTS proxy_url TEXT;
 -- Add comment
 COMMENT ON COLUMN proxies.proxy_url IS 'Raw proxy URL (if provider uses non-standard format). Takes precedence over constructed URL from host/port/user/pass.';
--- a/backend/migrations/088_discovery_payloads.sql
+++ b/backend/migrations/088_discovery_payloads.sql
@@ -1,30 +0,0 @@
 -- Migration 088: Extend raw_crawl_payloads for discovery payloads
 --
 -- Enables saving raw store data from Dutchie discovery crawls.
 -- Store discovery returns raw dispensary objects - save them for historical analysis.
 -- Add payload_type to distinguish product crawls from discovery crawls
 ALTER TABLE raw_crawl_payloads
 ADD COLUMN IF NOT EXISTS payload_type VARCHAR(32) NOT NULL DEFAULT 'product';
 -- Add state_code for discovery payloads (null for product payloads)
 ALTER TABLE raw_crawl_payloads
 ADD COLUMN IF NOT EXISTS state_code VARCHAR(10);
 -- Add store_count for discovery payloads (alternative to product_count)
 ALTER TABLE raw_crawl_payloads
 ADD COLUMN IF NOT EXISTS store_count INTEGER;
 -- Make dispensary_id nullable for discovery payloads
 ALTER TABLE raw_crawl_payloads
 ALTER COLUMN dispensary_id DROP NOT NULL;
 -- Add index for discovery payload queries
 CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_type_state
  ON raw_crawl_payloads(payload_type, state_code)
  WHERE payload_type = 'store_discovery';
 -- Comments
 COMMENT ON COLUMN raw_crawl_payloads.payload_type IS 'Type: product (default), store_discovery';
 COMMENT ON COLUMN raw_crawl_payloads.state_code IS 'State code for discovery payloads (e.g., AZ, MI)';
 COMMENT ON COLUMN raw_crawl_payloads.store_count IS 'Number of stores in discovery payload';
--- a/backend/migrations/089_immutable_schedules.sql
+++ b/backend/migrations/089_immutable_schedules.sql
@@ -1,105 +0,0 @@
 -- Migration 089: Immutable Schedules with Per-State Product Discovery
 --
 -- Key changes:
 -- 1. Add is_immutable column - schedules can be edited but not deleted
 -- 2. Add method column - all tasks use 'http' (Puppeteer transport)
 -- 3. Store discovery weekly (168h)
 -- 4. Per-state product_discovery schedules (4h default)
 -- 5. Remove old payload_fetch schedules
 -- =====================================================
 -- 1) Add new columns to task_schedules
 -- =====================================================
 ALTER TABLE task_schedules
 ADD COLUMN IF NOT EXISTS is_immutable BOOLEAN DEFAULT FALSE;
 ALTER TABLE task_schedules
 ADD COLUMN IF NOT EXISTS method VARCHAR(10) DEFAULT 'http';
 -- =====================================================
 -- 2) Update store_discovery to weekly and immutable
 -- =====================================================
 UPDATE task_schedules
 SET interval_hours = 168,  -- 7 days
    is_immutable = TRUE,
    method = 'http',
    description = 'Discover new Dutchie stores weekly (HTTP transport)'
 WHERE name = 'store_discovery_dutchie';
 -- Insert if doesn't exist
 INSERT INTO task_schedules (name, role, interval_hours, priority, description, is_immutable, method, platform, next_run_at)
 VALUES ('store_discovery_dutchie', 'store_discovery', 168, 5, 'Discover new Dutchie stores weekly (HTTP transport)', TRUE, 'http', 'dutchie', NOW())
 ON CONFLICT (name) DO UPDATE SET
  interval_hours = 168,
  is_immutable = TRUE,
  method = 'http',
  description = 'Discover new Dutchie stores weekly (HTTP transport)';
 -- =====================================================
 -- 3) Remove old payload_fetch and product_refresh_all schedules
 -- =====================================================
 DELETE FROM task_schedules WHERE name IN ('payload_fetch_all', 'product_refresh_all');
 -- =====================================================
 -- 4) Create per-state product_discovery schedules
 -- =====================================================
 -- One schedule per state that has dispensaries with active cannabis programs
 INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
 SELECT
  'product_discovery_' || lower(s.code) AS name,
  'product_discovery' AS role,
  s.code AS state_code,
  4 AS interval_hours,  -- 4 hours default, editable
  10 AS priority,
  'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
  TRUE AS is_immutable,  -- Can edit but not delete
  'http' AS method,
  CASE WHEN s.is_active THEN TRUE ELSE FALSE END AS enabled,
  -- Stagger start times: each state starts 5 minutes after the previous
  NOW() + (ROW_NUMBER() OVER (ORDER BY s.code) * INTERVAL '5 minutes') AS next_run_at
 FROM states s
 WHERE EXISTS (
  SELECT 1 FROM dispensaries d
  WHERE d.state_id = s.id AND d.crawl_enabled = true
 )
 ON CONFLICT (name) DO UPDATE SET
  is_immutable = TRUE,
  method = 'http',
  description = EXCLUDED.description;
 -- Also create schedules for states that might have stores discovered later
 INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
 SELECT
  'product_discovery_' || lower(s.code) AS name,
  'product_discovery' AS role,
  s.code AS state_code,
  4 AS interval_hours,
  10 AS priority,
  'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
  TRUE AS is_immutable,
  'http' AS method,
  FALSE AS enabled,  -- Disabled until stores exist
  NOW() + INTERVAL '1 hour'
 FROM states s
 WHERE NOT EXISTS (
  SELECT 1 FROM task_schedules ts WHERE ts.name = 'product_discovery_' || lower(s.code)
 )
 ON CONFLICT (name) DO NOTHING;
 -- =====================================================
 -- 5) Make analytics_refresh immutable
 -- =====================================================
 UPDATE task_schedules
 SET is_immutable = TRUE, method = 'http'
 WHERE name = 'analytics_refresh';
 -- =====================================================
 -- 6) Add index for schedule lookups
 -- =====================================================
 CREATE INDEX IF NOT EXISTS idx_task_schedules_state_code
  ON task_schedules(state_code)
  WHERE state_code IS NOT NULL;
 -- Comments
 COMMENT ON COLUMN task_schedules.is_immutable IS 'If TRUE, schedule cannot be deleted (only edited)';
 COMMENT ON COLUMN task_schedules.method IS 'Transport method: http (Puppeteer/browser) or curl (axios)';
--- a/backend/node_modules/.package-lock.json
+++ b/backend/node_modules/.package-lock.json
@@ -1,6 +1,6 @@
 {
  "name": "dutchie-menus-backend",
-  "version": "1.6.0",
+  "version": "1.5.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
@@ -46,97 +46,6 @@
      "resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
      "integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
    },
    "node_modules/@jsep-plugin/assignment": {
      "version": "1.3.0",
      "resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
      "integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
      "engines": {
        "node": ">= 10.16.0"
      },
      "peerDependencies": {
        "jsep": "^0.4.0||^1.0.0"
      }
    },
    "node_modules/@jsep-plugin/regex": {
      "version": "1.0.4",
      "resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
      "integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
      "engines": {
        "node": ">= 10.16.0"
      },
      "peerDependencies": {
        "jsep": "^0.4.0||^1.0.0"
      }
    },
    "node_modules/@kubernetes/client-node": {
      "version": "1.4.0",
      "resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
      "integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
      "dependencies": {
        "@types/js-yaml": "^4.0.1",
        "@types/node": "^24.0.0",
        "@types/node-fetch": "^2.6.13",
        "@types/stream-buffers": "^3.0.3",
        "form-data": "^4.0.0",
        "hpagent": "^1.2.0",
        "isomorphic-ws": "^5.0.0",
        "js-yaml": "^4.1.0",
        "jsonpath-plus": "^10.3.0",
        "node-fetch": "^2.7.0",
        "openid-client": "^6.1.3",
        "rfc4648": "^1.3.0",
        "socks-proxy-agent": "^8.0.4",
        "stream-buffers": "^3.0.2",
        "tar-fs": "^3.0.9",
        "ws": "^8.18.2"
      }
    },
    "node_modules/@kubernetes/client-node/node_modules/@types/node": {
      "version": "24.10.3",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
      "integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
      "dependencies": {
        "undici-types": "~7.16.0"
      }
    },
    "node_modules/@kubernetes/client-node/node_modules/tar-fs": {
      "version": "3.1.1",
      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
      "integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
      "dependencies": {
        "pump": "^3.0.0",
        "tar-stream": "^3.1.5"
      },
      "optionalDependencies": {
        "bare-fs": "^4.0.1",
        "bare-path": "^3.0.0"
      }
    },
    "node_modules/@kubernetes/client-node/node_modules/undici-types": {
      "version": "7.16.0",
      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
      "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
    },
    "node_modules/@kubernetes/client-node/node_modules/ws": {
      "version": "8.18.3",
      "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
      "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
      "engines": {
        "node": ">=10.0.0"
      },
      "peerDependencies": {
        "bufferutil": "^4.0.1",
        "utf-8-validate": ">=5.0.2"
      },
      "peerDependenciesMeta": {
        "bufferutil": {
          "optional": true
        },
        "utf-8-validate": {
          "optional": true
        }
      }
    },
    "node_modules/@mapbox/node-pre-gyp": {
      "version": "1.0.11",
      "resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
@@ -342,11 +251,6 @@
      "integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
      "dev": true
    },
    "node_modules/@types/js-yaml": {
      "version": "4.0.9",
      "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
      "integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
    },
    "node_modules/@types/jsonwebtoken": {
      "version": "9.0.10",
      "resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
@@ -372,6 +276,7 @@
      "version": "20.19.25",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
      "integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
      "devOptional": true,
      "dependencies": {
        "undici-types": "~6.21.0"
      }
@@ -382,15 +287,6 @@
      "integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
      "dev": true
    },
    "node_modules/@types/node-fetch": {
      "version": "2.6.13",
      "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
      "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
      "dependencies": {
        "@types/node": "*",
        "form-data": "^4.0.4"
      }
    },
    "node_modules/@types/pg": {
      "version": "8.15.6",
      "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
@@ -444,14 +340,6 @@
        "@types/node": "*"
      }
    },
    "node_modules/@types/stream-buffers": {
      "version": "3.0.8",
      "resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
      "integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
      "dependencies": {
        "@types/node": "*"
      }
    },
    "node_modules/@types/uuid": {
      "version": "9.0.8",
      "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
@@ -632,78 +520,6 @@
        }
      }
    },
    "node_modules/bare-fs": {
      "version": "4.5.2",
      "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
      "integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
      "optional": true,
      "dependencies": {
        "bare-events": "^2.5.4",
        "bare-path": "^3.0.0",
        "bare-stream": "^2.6.4",
        "bare-url": "^2.2.2",
        "fast-fifo": "^1.3.2"
      },
      "engines": {
        "bare": ">=1.16.0"
      },
      "peerDependencies": {
        "bare-buffer": "*"
      },
      "peerDependenciesMeta": {
        "bare-buffer": {
          "optional": true
        }
      }
    },
    "node_modules/bare-os": {
      "version": "3.6.2",
      "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
      "integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
      "optional": true,
      "engines": {
        "bare": ">=1.14.0"
      }
    },
    "node_modules/bare-path": {
      "version": "3.0.0",
      "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
      "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
      "optional": true,
      "dependencies": {
        "bare-os": "^3.0.1"
      }
    },
    "node_modules/bare-stream": {
      "version": "2.7.0",
      "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
      "integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
      "optional": true,
      "dependencies": {
        "streamx": "^2.21.0"
      },
      "peerDependencies": {
        "bare-buffer": "*",
        "bare-events": "*"
      },
      "peerDependenciesMeta": {
        "bare-buffer": {
          "optional": true
        },
        "bare-events": {
          "optional": true
        }
      }
    },
    "node_modules/bare-url": {
      "version": "2.3.2",
      "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
      "integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
      "optional": true,
      "dependencies": {
        "bare-path": "^3.0.0"
      }
    },
    "node_modules/base64-js": {
      "version": "1.5.1",
      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
@@ -2203,14 +2019,6 @@
        "node": ">=16.0.0"
      }
    },
    "node_modules/hpagent": {
      "version": "1.2.0",
      "resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
      "integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
      "engines": {
        "node": ">=14"
      }
    },
    "node_modules/htmlparser2": {
      "version": "10.0.0",
      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
@@ -2574,22 +2382,6 @@
        "node": ">=0.10.0"
      }
    },
    "node_modules/isomorphic-ws": {
      "version": "5.0.0",
      "resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
      "integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
      "peerDependencies": {
        "ws": "*"
      }
    },
    "node_modules/jose": {
      "version": "6.1.3",
      "resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
      "integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
      "funding": {
        "url": "https://github.com/sponsors/panva"
      }
    },
    "node_modules/js-tokens": {
      "version": "4.0.0",
      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
@@ -2606,14 +2398,6 @@
        "js-yaml": "bin/js-yaml.js"
      }
    },
    "node_modules/jsep": {
      "version": "1.4.0",
      "resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
      "integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
      "engines": {
        "node": ">= 10.16.0"
      }
    },
    "node_modules/json-parse-even-better-errors": {
      "version": "2.3.1",
      "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
@@ -2635,23 +2419,6 @@
        "graceful-fs": "^4.1.6"
      }
    },
    "node_modules/jsonpath-plus": {
      "version": "10.3.0",
      "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
      "integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
      "dependencies": {
        "@jsep-plugin/assignment": "^1.3.0",
        "@jsep-plugin/regex": "^1.0.4",
        "jsep": "^1.4.0"
      },
      "bin": {
        "jsonpath": "bin/jsonpath-cli.js",
        "jsonpath-plus": "bin/jsonpath-cli.js"
      },
      "engines": {
        "node": ">=18.0.0"
      }
    },
    "node_modules/jsonwebtoken": {
      "version": "9.0.2",
      "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
@@ -2726,11 +2493,6 @@
      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
      "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
    },
    "node_modules/lodash.clonedeep": {
      "version": "4.5.0",
      "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
      "integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
    },
    "node_modules/lodash.defaults": {
      "version": "4.2.0",
      "resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
@@ -3180,14 +2942,6 @@
        "url": "https://github.com/fb55/nth-check?sponsor=1"
      }
    },
    "node_modules/oauth4webapi": {
      "version": "3.8.3",
      "resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
      "integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
      "funding": {
        "url": "https://github.com/sponsors/panva"
      }
    },
    "node_modules/object-assign": {
      "version": "4.1.1",
      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
@@ -3226,18 +2980,6 @@
        "wrappy": "1"
      }
    },
    "node_modules/openid-client": {
      "version": "6.8.1",
      "resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
      "integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
      "dependencies": {
        "jose": "^6.1.0",
        "oauth4webapi": "^3.8.2"
      },
      "funding": {
        "url": "https://github.com/sponsors/panva"
      }
    },
    "node_modules/pac-proxy-agent": {
      "version": "7.2.0",
      "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
@@ -4141,11 +3883,6 @@
        "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
      }
    },
    "node_modules/rfc4648": {
      "version": "1.5.4",
      "resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
      "integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
    },
    "node_modules/rimraf": {
      "version": "3.0.2",
      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
@@ -4576,14 +4313,6 @@
        "node": ">= 0.8"
      }
    },
    "node_modules/stream-buffers": {
      "version": "3.0.3",
      "resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
      "integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
      "engines": {
        "node": ">= 0.10.0"
      }
    },
    "node_modules/streamx": {
      "version": "2.23.0",
      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
@@ -4803,7 +4532,8 @@
    "node_modules/undici-types": {
      "version": "6.21.0",
      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
-      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
+      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
      "devOptional": true
    },
    "node_modules/universalify": {
      "version": "2.0.1",
@@ -4826,14 +4556,6 @@
      "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
      "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
    },
    "node_modules/user-agents": {
      "version": "1.1.669",
      "resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
      "integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
      "dependencies": {
        "lodash.clonedeep": "^4.5.0"
      }
    },
    "node_modules/util": {
      "version": "0.12.5",
      "resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
--- a/backend/package-lock.json
+++ b/backend/package-lock.json
@@ -1,14 +1,13 @@
 {
  "name": "dutchie-menus-backend",
-  "version": "1.6.0",
+  "version": "1.5.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "dutchie-menus-backend",
-      "version": "1.6.0",
+      "version": "1.5.1",
      "dependencies": {
        "@kubernetes/client-node": "^1.4.0",
        "@types/bcryptjs": "^3.0.0",
        "axios": "^1.6.2",
        "bcrypt": "^5.1.1",
@@ -35,7 +34,6 @@
        "puppeteer-extra-plugin-stealth": "^2.11.2",
        "sharp": "^0.32.0",
        "socks-proxy-agent": "^8.0.2",
        "user-agents": "^1.1.669",
        "uuid": "^9.0.1",
        "zod": "^3.22.4"
      },
@@ -494,97 +492,6 @@
      "resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
      "integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
    },
    "node_modules/@jsep-plugin/assignment": {
      "version": "1.3.0",
      "resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
      "integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
      "engines": {
        "node": ">= 10.16.0"
      },
      "peerDependencies": {
        "jsep": "^0.4.0||^1.0.0"
      }
    },
    "node_modules/@jsep-plugin/regex": {
      "version": "1.0.4",
      "resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
      "integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
      "engines": {
        "node": ">= 10.16.0"
      },
      "peerDependencies": {
        "jsep": "^0.4.0||^1.0.0"
      }
    },
    "node_modules/@kubernetes/client-node": {
      "version": "1.4.0",
      "resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
      "integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
      "dependencies": {
        "@types/js-yaml": "^4.0.1",
        "@types/node": "^24.0.0",
        "@types/node-fetch": "^2.6.13",
        "@types/stream-buffers": "^3.0.3",
        "form-data": "^4.0.0",
        "hpagent": "^1.2.0",
        "isomorphic-ws": "^5.0.0",
        "js-yaml": "^4.1.0",
        "jsonpath-plus": "^10.3.0",
        "node-fetch": "^2.7.0",
        "openid-client": "^6.1.3",
        "rfc4648": "^1.3.0",
        "socks-proxy-agent": "^8.0.4",
        "stream-buffers": "^3.0.2",
        "tar-fs": "^3.0.9",
        "ws": "^8.18.2"
      }
    },
    "node_modules/@kubernetes/client-node/node_modules/@types/node": {
      "version": "24.10.3",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
      "integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
      "dependencies": {
        "undici-types": "~7.16.0"
      }
    },
    "node_modules/@kubernetes/client-node/node_modules/tar-fs": {
      "version": "3.1.1",
      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
      "integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
      "dependencies": {
        "pump": "^3.0.0",
        "tar-stream": "^3.1.5"
      },
      "optionalDependencies": {
        "bare-fs": "^4.0.1",
        "bare-path": "^3.0.0"
      }
    },
    "node_modules/@kubernetes/client-node/node_modules/undici-types": {
      "version": "7.16.0",
      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
      "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
    },
    "node_modules/@kubernetes/client-node/node_modules/ws": {
      "version": "8.18.3",
      "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
      "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
      "engines": {
        "node": ">=10.0.0"
      },
      "peerDependencies": {
        "bufferutil": "^4.0.1",
        "utf-8-validate": ">=5.0.2"
      },
      "peerDependenciesMeta": {
        "bufferutil": {
          "optional": true
        },
        "utf-8-validate": {
          "optional": true
        }
      }
    },
    "node_modules/@mapbox/node-pre-gyp": {
      "version": "1.0.11",
      "resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
@@ -850,11 +757,6 @@
      "integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
      "dev": true
    },
    "node_modules/@types/js-yaml": {
      "version": "4.0.9",
      "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
      "integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
    },
    "node_modules/@types/jsonwebtoken": {
      "version": "9.0.10",
      "resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
@@ -880,6 +782,7 @@
      "version": "20.19.25",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
      "integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
      "devOptional": true,
      "dependencies": {
        "undici-types": "~6.21.0"
      }
@@ -890,15 +793,6 @@
      "integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
      "dev": true
    },
    "node_modules/@types/node-fetch": {
      "version": "2.6.13",
      "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
      "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
      "dependencies": {
        "@types/node": "*",
        "form-data": "^4.0.4"
      }
    },
    "node_modules/@types/pg": {
      "version": "8.15.6",
      "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
@@ -952,14 +846,6 @@
        "@types/node": "*"
      }
    },
    "node_modules/@types/stream-buffers": {
      "version": "3.0.8",
      "resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
      "integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
      "dependencies": {
        "@types/node": "*"
      }
    },
    "node_modules/@types/uuid": {
      "version": "9.0.8",
      "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
@@ -1140,78 +1026,6 @@
        }
      }
    },
    "node_modules/bare-fs": {
      "version": "4.5.2",
      "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
      "integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
      "optional": true,
      "dependencies": {
        "bare-events": "^2.5.4",
        "bare-path": "^3.0.0",
        "bare-stream": "^2.6.4",
        "bare-url": "^2.2.2",
        "fast-fifo": "^1.3.2"
      },
      "engines": {
        "bare": ">=1.16.0"
      },
      "peerDependencies": {
        "bare-buffer": "*"
      },
      "peerDependenciesMeta": {
        "bare-buffer": {
          "optional": true
        }
      }
    },
    "node_modules/bare-os": {
      "version": "3.6.2",
      "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
      "integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
      "optional": true,
      "engines": {
        "bare": ">=1.14.0"
      }
    },
    "node_modules/bare-path": {
      "version": "3.0.0",
      "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
      "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
      "optional": true,
      "dependencies": {
        "bare-os": "^3.0.1"
      }
    },
    "node_modules/bare-stream": {
      "version": "2.7.0",
      "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
      "integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
      "optional": true,
      "dependencies": {
        "streamx": "^2.21.0"
      },
      "peerDependencies": {
        "bare-buffer": "*",
        "bare-events": "*"
      },
      "peerDependenciesMeta": {
        "bare-buffer": {
          "optional": true
        },
        "bare-events": {
          "optional": true
        }
      }
    },
    "node_modules/bare-url": {
      "version": "2.3.2",
      "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
      "integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
      "optional": true,
      "dependencies": {
        "bare-path": "^3.0.0"
      }
    },
    "node_modules/base64-js": {
      "version": "1.5.1",
      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
@@ -2725,14 +2539,6 @@
        "node": ">=16.0.0"
      }
    },
    "node_modules/hpagent": {
      "version": "1.2.0",
      "resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
      "integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
      "engines": {
        "node": ">=14"
      }
    },
    "node_modules/htmlparser2": {
      "version": "10.0.0",
      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
@@ -3096,22 +2902,6 @@
        "node": ">=0.10.0"
      }
    },
    "node_modules/isomorphic-ws": {
      "version": "5.0.0",
      "resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
      "integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
      "peerDependencies": {
        "ws": "*"
      }
    },
    "node_modules/jose": {
      "version": "6.1.3",
      "resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
      "integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
      "funding": {
        "url": "https://github.com/sponsors/panva"
      }
    },
    "node_modules/js-tokens": {
      "version": "4.0.0",
      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
@@ -3128,14 +2918,6 @@
        "js-yaml": "bin/js-yaml.js"
      }
    },
    "node_modules/jsep": {
      "version": "1.4.0",
      "resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
      "integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
      "engines": {
        "node": ">= 10.16.0"
      }
    },
    "node_modules/json-parse-even-better-errors": {
      "version": "2.3.1",
      "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
@@ -3157,23 +2939,6 @@
        "graceful-fs": "^4.1.6"
      }
    },
    "node_modules/jsonpath-plus": {
      "version": "10.3.0",
      "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
      "integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
      "dependencies": {
        "@jsep-plugin/assignment": "^1.3.0",
        "@jsep-plugin/regex": "^1.0.4",
        "jsep": "^1.4.0"
      },
      "bin": {
        "jsonpath": "bin/jsonpath-cli.js",
        "jsonpath-plus": "bin/jsonpath-cli.js"
      },
      "engines": {
        "node": ">=18.0.0"
      }
    },
    "node_modules/jsonwebtoken": {
      "version": "9.0.2",
      "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
@@ -3248,11 +3013,6 @@
      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
      "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
    },
    "node_modules/lodash.clonedeep": {
      "version": "4.5.0",
      "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
      "integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
    },
    "node_modules/lodash.defaults": {
      "version": "4.2.0",
      "resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
@@ -3702,14 +3462,6 @@
        "url": "https://github.com/fb55/nth-check?sponsor=1"
      }
    },
    "node_modules/oauth4webapi": {
      "version": "3.8.3",
      "resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
      "integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
      "funding": {
        "url": "https://github.com/sponsors/panva"
      }
    },
    "node_modules/object-assign": {
      "version": "4.1.1",
      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
@@ -3748,18 +3500,6 @@
        "wrappy": "1"
      }
    },
    "node_modules/openid-client": {
      "version": "6.8.1",
      "resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
      "integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
      "dependencies": {
        "jose": "^6.1.0",
        "oauth4webapi": "^3.8.2"
      },
      "funding": {
        "url": "https://github.com/sponsors/panva"
      }
    },
    "node_modules/pac-proxy-agent": {
      "version": "7.2.0",
      "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
@@ -4676,11 +4416,6 @@
        "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
      }
    },
    "node_modules/rfc4648": {
      "version": "1.5.4",
      "resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
      "integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
    },
    "node_modules/rimraf": {
      "version": "3.0.2",
      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
@@ -5111,14 +4846,6 @@
        "node": ">= 0.8"
      }
    },
    "node_modules/stream-buffers": {
      "version": "3.0.3",
      "resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
      "integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
      "engines": {
        "node": ">= 0.10.0"
      }
    },
    "node_modules/streamx": {
      "version": "2.23.0",
      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
@@ -5338,7 +5065,8 @@
    "node_modules/undici-types": {
      "version": "6.21.0",
      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
-      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
+      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
      "devOptional": true
    },
    "node_modules/universalify": {
      "version": "2.0.1",
@@ -5361,14 +5089,6 @@
      "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
      "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
    },
    "node_modules/user-agents": {
      "version": "1.1.669",
      "resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
      "integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
      "dependencies": {
        "lodash.clonedeep": "^4.5.0"
      }
    },
    "node_modules/util": {
      "version": "0.12.5",
      "resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
--- a/backend/package.json
+++ b/backend/package.json
@@ -22,7 +22,6 @@
    "seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
  },
  "dependencies": {
    "@kubernetes/client-node": "^1.4.0",
    "@types/bcryptjs": "^3.0.0",
    "axios": "^1.6.2",
    "bcrypt": "^5.1.1",
@@ -49,7 +48,6 @@
    "puppeteer-extra-plugin-stealth": "^2.11.2",
    "sharp": "^0.32.0",
    "socks-proxy-agent": "^8.0.2",
    "user-agents": "^1.1.669",
    "uuid": "^9.0.1",
    "zod": "^3.22.4"
  },
--- a/backend/src/_deprecated/DONT_USE.md
+++ b/backend/src/_deprecated/DONT_USE.md
@@ -1,46 +0,0 @@
 # DEPRECATED CODE - DO NOT USE
 **These directories contain OLD, ABANDONED code.**
 ## What's Here
 | Directory | What It Was | Why Deprecated |
 |-----------|-------------|----------------|
 | `hydration/` | Old pipeline for processing crawl data | Replaced by `src/tasks/handlers/` |
 | `scraper-v2/` | Old Puppeteer-based scraper engine | Replaced by curl-based `src/platforms/dutchie/client.ts` |
 | `canonical-hydration/` | Intermediate step toward canonical schema | Merged into task handlers |
 ## What to Use Instead
 | Old (DONT USE) | New (USE THIS) |
 |----------------|----------------|
 | `hydration/normalizers/dutchie.ts` | `src/tasks/handlers/product-refresh.ts` |
 | `hydration/producer.ts` | `src/tasks/handlers/payload-fetch.ts` |
 | `scraper-v2/engine.ts` | `src/platforms/dutchie/client.ts` |
 | `scraper-v2/scheduler.ts` | `src/services/task-scheduler.ts` |
 ## Why Keep This Code?
 - Historical reference only
 - Some patterns may be useful for debugging
 - Will be deleted once confirmed not needed
 ## Claude Instructions
 **IF YOU ARE CLAUDE:**
 1. NEVER import from `src/_deprecated/`
 2. NEVER reference these files as examples
 3. NEVER try to "fix" or "update" code in here
 4. If you see imports from these directories, suggest replacing them
 **Correct imports:**
 ```typescript
 // GOOD
 import { executeGraphQL } from '../platforms/dutchie/client';
 import { pool } from '../db/pool';
 // BAD - DO NOT USE
 import { something } from '../_deprecated/hydration/...';
 import { something } from '../_deprecated/scraper-v2/...';
 ```
--- a/backend/src/_deprecated/system/routes/index.ts
+++ b/backend/src/_deprecated/system/routes/index.ts
@@ -1,584 +0,0 @@
 /**
 * System API Routes
 *
 * Provides REST API endpoints for system monitoring and control:
 * - /api/system/sync/* - Sync orchestrator
 * - /api/system/dlq/* - Dead-letter queue
 * - /api/system/integrity/* - Integrity checks
 * - /api/system/fix/* - Auto-fix routines
 * - /api/system/alerts/* - System alerts
 * - /metrics - Prometheus metrics
 *
 * Phase 5: Full Production Sync + Monitoring
 */
 import { Router, Request, Response } from 'express';
 import { Pool } from 'pg';
 import {
  SyncOrchestrator,
  MetricsService,
  DLQService,
  AlertService,
  IntegrityService,
  AutoFixService,
 } from '../services';
 export function createSystemRouter(pool: Pool): Router {
  const router = Router();
  // Initialize services
  const metrics = new MetricsService(pool);
  const dlq = new DLQService(pool);
  const alerts = new AlertService(pool);
  const integrity = new IntegrityService(pool, alerts);
  const autoFix = new AutoFixService(pool, alerts);
  const orchestrator = new SyncOrchestrator(pool, metrics, dlq, alerts);
  // ============================================================
  // SYNC ORCHESTRATOR ENDPOINTS
  // ============================================================
  /**
   * GET /api/system/sync/status
   * Get current sync status
   */
  router.get('/sync/status', async (_req: Request, res: Response) => {
    try {
      const status = await orchestrator.getStatus();
      res.json(status);
    } catch (error) {
      console.error('[System] Sync status error:', error);
      res.status(500).json({ error: 'Failed to get sync status' });
    }
  });
  /**
   * POST /api/system/sync/run
   * Trigger a sync run
   */
  router.post('/sync/run', async (req: Request, res: Response) => {
    try {
      const triggeredBy = req.body.triggeredBy || 'api';
      const result = await orchestrator.runSync();
      res.json({
        success: true,
        triggeredBy,
        metrics: result,
      });
    } catch (error) {
      console.error('[System] Sync run error:', error);
      res.status(500).json({
        success: false,
        error: error instanceof Error ? error.message : 'Sync run failed',
      });
    }
  });
  /**
   * GET /api/system/sync/queue-depth
   * Get queue depth information
   */
  router.get('/sync/queue-depth', async (_req: Request, res: Response) => {
    try {
      const depth = await orchestrator.getQueueDepth();
      res.json(depth);
    } catch (error) {
      console.error('[System] Queue depth error:', error);
      res.status(500).json({ error: 'Failed to get queue depth' });
    }
  });
  /**
   * GET /api/system/sync/health
   * Get sync health status
   */
  router.get('/sync/health', async (_req: Request, res: Response) => {
    try {
      const health = await orchestrator.getHealth();
      res.status(health.healthy ? 200 : 503).json(health);
    } catch (error) {
      console.error('[System] Health check error:', error);
      res.status(500).json({ healthy: false, error: 'Health check failed' });
    }
  });
  /**
   * POST /api/system/sync/pause
   * Pause the orchestrator
   */
  router.post('/sync/pause', async (req: Request, res: Response) => {
    try {
      const reason = req.body.reason || 'Manual pause';
      await orchestrator.pause(reason);
      res.json({ success: true, message: 'Orchestrator paused' });
    } catch (error) {
      console.error('[System] Pause error:', error);
      res.status(500).json({ error: 'Failed to pause orchestrator' });
    }
  });
  /**
   * POST /api/system/sync/resume
   * Resume the orchestrator
   */
  router.post('/sync/resume', async (_req: Request, res: Response) => {
    try {
      await orchestrator.resume();
      res.json({ success: true, message: 'Orchestrator resumed' });
    } catch (error) {
      console.error('[System] Resume error:', error);
      res.status(500).json({ error: 'Failed to resume orchestrator' });
    }
  });
  // ============================================================
  // DLQ ENDPOINTS
  // ============================================================
  /**
   * GET /api/system/dlq
   * List DLQ payloads
   */
  router.get('/dlq', async (req: Request, res: Response) => {
    try {
      const options = {
        status: req.query.status as string,
        errorType: req.query.errorType as string,
        dispensaryId: req.query.dispensaryId ? parseInt(req.query.dispensaryId as string) : undefined,
        limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
        offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
      };
      const result = await dlq.listPayloads(options);
      res.json(result);
    } catch (error) {
      console.error('[System] DLQ list error:', error);
      res.status(500).json({ error: 'Failed to list DLQ payloads' });
    }
  });
  /**
   * GET /api/system/dlq/stats
   * Get DLQ statistics
   */
  router.get('/dlq/stats', async (_req: Request, res: Response) => {
    try {
      const stats = await dlq.getStats();
      res.json(stats);
    } catch (error) {
      console.error('[System] DLQ stats error:', error);
      res.status(500).json({ error: 'Failed to get DLQ stats' });
    }
  });
  /**
   * GET /api/system/dlq/summary
   * Get DLQ summary by error type
   */
  router.get('/dlq/summary', async (_req: Request, res: Response) => {
    try {
      const summary = await dlq.getSummary();
      res.json(summary);
    } catch (error) {
      console.error('[System] DLQ summary error:', error);
      res.status(500).json({ error: 'Failed to get DLQ summary' });
    }
  });
  /**
   * GET /api/system/dlq/:id
   * Get a specific DLQ payload
   */
  router.get('/dlq/:id', async (req: Request, res: Response) => {
    try {
      const payload = await dlq.getPayload(req.params.id);
      if (!payload) {
        return res.status(404).json({ error: 'Payload not found' });
      }
      res.json(payload);
    } catch (error) {
      console.error('[System] DLQ get error:', error);
      res.status(500).json({ error: 'Failed to get DLQ payload' });
    }
  });
  /**
   * POST /api/system/dlq/:id/retry
   * Retry a DLQ payload
   */
  router.post('/dlq/:id/retry', async (req: Request, res: Response) => {
    try {
      const result = await dlq.retryPayload(req.params.id);
      if (result.success) {
        res.json(result);
      } else {
        res.status(400).json(result);
      }
    } catch (error) {
      console.error('[System] DLQ retry error:', error);
      res.status(500).json({ error: 'Failed to retry payload' });
    }
  });
  /**
   * POST /api/system/dlq/:id/abandon
   * Abandon a DLQ payload
   */
  router.post('/dlq/:id/abandon', async (req: Request, res: Response) => {
    try {
      const reason = req.body.reason || 'Manually abandoned';
      const abandonedBy = req.body.abandonedBy || 'api';
      const success = await dlq.abandonPayload(req.params.id, reason, abandonedBy);
      res.json({ success });
    } catch (error) {
      console.error('[System] DLQ abandon error:', error);
      res.status(500).json({ error: 'Failed to abandon payload' });
    }
  });
  /**
   * POST /api/system/dlq/bulk-retry
   * Bulk retry payloads by error type
   */
  router.post('/dlq/bulk-retry', async (req: Request, res: Response) => {
    try {
      const { errorType } = req.body;
      if (!errorType) {
        return res.status(400).json({ error: 'errorType is required' });
      }
      const result = await dlq.bulkRetryByErrorType(errorType);
      res.json(result);
    } catch (error) {
      console.error('[System] DLQ bulk retry error:', error);
      res.status(500).json({ error: 'Failed to bulk retry' });
    }
  });
  // ============================================================
  // INTEGRITY CHECK ENDPOINTS
  // ============================================================
  /**
   * POST /api/system/integrity/run
   * Run all integrity checks
   */
  router.post('/integrity/run', async (req: Request, res: Response) => {
    try {
      const triggeredBy = req.body.triggeredBy || 'api';
      const result = await integrity.runAllChecks(triggeredBy);
      res.json(result);
    } catch (error) {
      console.error('[System] Integrity run error:', error);
      res.status(500).json({ error: 'Failed to run integrity checks' });
    }
  });
  /**
   * GET /api/system/integrity/runs
   * Get recent integrity check runs
   */
  router.get('/integrity/runs', async (req: Request, res: Response) => {
    try {
      const limit = req.query.limit ? parseInt(req.query.limit as string) : 10;
      const runs = await integrity.getRecentRuns(limit);
      res.json(runs);
    } catch (error) {
      console.error('[System] Integrity runs error:', error);
      res.status(500).json({ error: 'Failed to get integrity runs' });
    }
  });
  /**
   * GET /api/system/integrity/runs/:runId
   * Get results for a specific integrity run
   */
  router.get('/integrity/runs/:runId', async (req: Request, res: Response) => {
    try {
      const results = await integrity.getRunResults(req.params.runId);
      res.json(results);
    } catch (error) {
      console.error('[System] Integrity run results error:', error);
      res.status(500).json({ error: 'Failed to get run results' });
    }
  });
  // ============================================================
  // AUTO-FIX ENDPOINTS
  // ============================================================
  /**
   * GET /api/system/fix/routines
   * Get available fix routines
   */
  router.get('/fix/routines', (_req: Request, res: Response) => {
    try {
      const routines = autoFix.getAvailableRoutines();
      res.json(routines);
    } catch (error) {
      console.error('[System] Get routines error:', error);
      res.status(500).json({ error: 'Failed to get routines' });
    }
  });
  /**
   * POST /api/system/fix/:routine
   * Run a fix routine
   */
  router.post('/fix/:routine', async (req: Request, res: Response) => {
    try {
      const routineName = req.params.routine;
      const dryRun = req.body.dryRun === true;
      const triggeredBy = req.body.triggeredBy || 'api';
      const result = await autoFix.runRoutine(routineName as any, triggeredBy, { dryRun });
      res.json(result);
    } catch (error) {
      console.error('[System] Fix routine error:', error);
      res.status(500).json({ error: 'Failed to run fix routine' });
    }
  });
  /**
   * GET /api/system/fix/runs
   * Get recent fix runs
   */
  router.get('/fix/runs', async (req: Request, res: Response) => {
    try {
      const limit = req.query.limit ? parseInt(req.query.limit as string) : 20;
      const runs = await autoFix.getRecentRuns(limit);
      res.json(runs);
    } catch (error) {
      console.error('[System] Fix runs error:', error);
      res.status(500).json({ error: 'Failed to get fix runs' });
    }
  });
  // ============================================================
  // ALERTS ENDPOINTS
  // ============================================================
  /**
   * GET /api/system/alerts
   * List alerts
   */
  router.get('/alerts', async (req: Request, res: Response) => {
    try {
      const options = {
        status: req.query.status as any,
        severity: req.query.severity as any,
        type: req.query.type as string,
        limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
        offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
      };
      const result = await alerts.listAlerts(options);
      res.json(result);
    } catch (error) {
      console.error('[System] Alerts list error:', error);
      res.status(500).json({ error: 'Failed to list alerts' });
    }
  });
  /**
   * GET /api/system/alerts/active
   * Get active alerts
   */
  router.get('/alerts/active', async (_req: Request, res: Response) => {
    try {
      const activeAlerts = await alerts.getActiveAlerts();
      res.json(activeAlerts);
    } catch (error) {
      console.error('[System] Active alerts error:', error);
      res.status(500).json({ error: 'Failed to get active alerts' });
    }
  });
  /**
   * GET /api/system/alerts/summary
   * Get alert summary
   */
  router.get('/alerts/summary', async (_req: Request, res: Response) => {
    try {
      const summary = await alerts.getSummary();
      res.json(summary);
    } catch (error) {
      console.error('[System] Alerts summary error:', error);
      res.status(500).json({ error: 'Failed to get alerts summary' });
    }
  });
  /**
   * POST /api/system/alerts/:id/acknowledge
   * Acknowledge an alert
   */
  router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
    try {
      const alertId = parseInt(req.params.id);
      const acknowledgedBy = req.body.acknowledgedBy || 'api';
      const success = await alerts.acknowledgeAlert(alertId, acknowledgedBy);
      res.json({ success });
    } catch (error) {
      console.error('[System] Acknowledge alert error:', error);
      res.status(500).json({ error: 'Failed to acknowledge alert' });
    }
  });
  /**
   * POST /api/system/alerts/:id/resolve
   * Resolve an alert
   */
  router.post('/alerts/:id/resolve', async (req: Request, res: Response) => {
    try {
      const alertId = parseInt(req.params.id);
      const resolvedBy = req.body.resolvedBy || 'api';
      const success = await alerts.resolveAlert(alertId, resolvedBy);
      res.json({ success });
    } catch (error) {
      console.error('[System] Resolve alert error:', error);
      res.status(500).json({ error: 'Failed to resolve alert' });
    }
  });
  /**
   * POST /api/system/alerts/bulk-acknowledge
   * Bulk acknowledge alerts
   */
  router.post('/alerts/bulk-acknowledge', async (req: Request, res: Response) => {
    try {
      const { ids, acknowledgedBy } = req.body;
      if (!ids || !Array.isArray(ids)) {
        return res.status(400).json({ error: 'ids array is required' });
      }
      const count = await alerts.bulkAcknowledge(ids, acknowledgedBy || 'api');
      res.json({ acknowledged: count });
    } catch (error) {
      console.error('[System] Bulk acknowledge error:', error);
      res.status(500).json({ error: 'Failed to bulk acknowledge' });
    }
  });
  // ============================================================
  // METRICS ENDPOINTS
  // ============================================================
  /**
   * GET /api/system/metrics
   * Get all current metrics
   */
  router.get('/metrics', async (_req: Request, res: Response) => {
    try {
      const allMetrics = await metrics.getAllMetrics();
      res.json(allMetrics);
    } catch (error) {
      console.error('[System] Metrics error:', error);
      res.status(500).json({ error: 'Failed to get metrics' });
    }
  });
  /**
   * GET /api/system/metrics/:name
   * Get a specific metric
   */
  router.get('/metrics/:name', async (req: Request, res: Response) => {
    try {
      const metric = await metrics.getMetric(req.params.name);
      if (!metric) {
        return res.status(404).json({ error: 'Metric not found' });
      }
      res.json(metric);
    } catch (error) {
      console.error('[System] Metric error:', error);
      res.status(500).json({ error: 'Failed to get metric' });
    }
  });
  /**
   * GET /api/system/metrics/:name/history
   * Get metric time series
   */
  router.get('/metrics/:name/history', async (req: Request, res: Response) => {
    try {
      const hours = req.query.hours ? parseInt(req.query.hours as string) : 24;
      const history = await metrics.getMetricHistory(req.params.name, hours);
      res.json(history);
    } catch (error) {
      console.error('[System] Metric history error:', error);
      res.status(500).json({ error: 'Failed to get metric history' });
    }
  });
  /**
   * GET /api/system/errors
   * Get error summary
   */
  router.get('/errors', async (_req: Request, res: Response) => {
    try {
      const summary = await metrics.getErrorSummary();
      res.json(summary);
    } catch (error) {
      console.error('[System] Error summary error:', error);
      res.status(500).json({ error: 'Failed to get error summary' });
    }
  });
  /**
   * GET /api/system/errors/recent
   * Get recent errors
   */
  router.get('/errors/recent', async (req: Request, res: Response) => {
    try {
      const limit = req.query.limit ? parseInt(req.query.limit as string) : 50;
      const errorType = req.query.type as string;
      const errors = await metrics.getRecentErrors(limit, errorType);
      res.json(errors);
    } catch (error) {
      console.error('[System] Recent errors error:', error);
      res.status(500).json({ error: 'Failed to get recent errors' });
    }
  });
  /**
   * POST /api/system/errors/acknowledge
   * Acknowledge errors
   */
  router.post('/errors/acknowledge', async (req: Request, res: Response) => {
    try {
      const { ids, acknowledgedBy } = req.body;
      if (!ids || !Array.isArray(ids)) {
        return res.status(400).json({ error: 'ids array is required' });
      }
      const count = await metrics.acknowledgeErrors(ids, acknowledgedBy || 'api');
      res.json({ acknowledged: count });
    } catch (error) {
      console.error('[System] Acknowledge errors error:', error);
      res.status(500).json({ error: 'Failed to acknowledge errors' });
    }
  });
  return router;
 }
 /**
 * Create Prometheus metrics endpoint (standalone)
 */
 export function createPrometheusRouter(pool: Pool): Router {
  const router = Router();
  const metrics = new MetricsService(pool);
  /**
   * GET /metrics
   * Prometheus-compatible metrics endpoint
   */
  router.get('/', async (_req: Request, res: Response) => {
    try {
      const prometheusOutput = await metrics.getPrometheusMetrics();
      res.set('Content-Type', 'text/plain; version=0.0.4');
      res.send(prometheusOutput);
    } catch (error) {
      console.error('[Prometheus] Metrics error:', error);
      res.status(500).send('# Error generating metrics');
    }
  });
  return router;
 }
--- a/backend/src/_deprecated/canonical-hydration/RUNBOOK.md
+++ b/backend/src/_deprecated/canonical-hydration/RUNBOOK.md
--- a/backend/src/_deprecated/canonical-hydration/cli/backfill.ts
+++ b/backend/src/_deprecated/canonical-hydration/cli/backfill.ts
--- a/backend/src/_deprecated/canonical-hydration/cli/incremental.ts
+++ b/backend/src/_deprecated/canonical-hydration/cli/incremental.ts
--- a/backend/src/_deprecated/canonical-hydration/cli/products-only.ts
+++ b/backend/src/_deprecated/canonical-hydration/cli/products-only.ts
--- a/backend/src/_deprecated/canonical-hydration/crawl-run-recorder.ts
+++ b/backend/src/_deprecated/canonical-hydration/crawl-run-recorder.ts
--- a/backend/src/_deprecated/canonical-hydration/hydration-service.ts
+++ b/backend/src/_deprecated/canonical-hydration/hydration-service.ts
--- a/backend/src/_deprecated/canonical-hydration/index.ts
+++ b/backend/src/_deprecated/canonical-hydration/index.ts
--- a/backend/src/_deprecated/canonical-hydration/snapshot-writer.ts
+++ b/backend/src/_deprecated/canonical-hydration/snapshot-writer.ts
--- a/backend/src/_deprecated/canonical-hydration/store-product-normalizer.ts
+++ b/backend/src/_deprecated/canonical-hydration/store-product-normalizer.ts
--- a/backend/src/_deprecated/canonical-hydration/types.ts
+++ b/backend/src/_deprecated/canonical-hydration/types.ts
--- a/backend/src/discovery/discovery-crawler.ts
+++ b/backend/src/discovery/discovery-crawler.ts
@@ -172,9 +172,6 @@ export async function runFullDiscovery(
    console.log(`Errors: ${totalErrors}`);
  }
  // Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
  let newDispensaryIds: number[] = [];
  // Step 4: Auto-validate and promote discovered locations
  if (!dryRun && totalLocationsUpserted > 0) {
    console.log('\n[Discovery] Step 4: Auto-promoting discovered locations...');
@@ -183,13 +180,6 @@ export async function runFullDiscovery(
    console.log(`  Created: ${promotionResult.created} new dispensaries`);
    console.log(`  Updated: ${promotionResult.updated} existing dispensaries`);
    console.log(`  Rejected: ${promotionResult.rejected} (validation failed)`);
    // Per TASK_WORKFLOW_2024-12-10.md: Capture new IDs for task chaining
    newDispensaryIds = promotionResult.newDispensaryIds;
    if (newDispensaryIds.length > 0) {
      console.log(`  New store IDs for crawl: [${newDispensaryIds.join(', ')}]`);
    }
    if (promotionResult.rejectedRecords.length > 0) {
      console.log(`  Rejection reasons:`);
      promotionResult.rejectedRecords.slice(0, 5).forEach(r => {
@@ -224,8 +214,6 @@ export async function runFullDiscovery(
    totalLocationsFound,
    totalLocationsUpserted,
    durationMs,
    // Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
    newDispensaryIds,
  };
 }
--- a/backend/src/discovery/promotion.ts
+++ b/backend/src/discovery/promotion.ts
@@ -127,8 +127,6 @@ export interface PromotionSummary {
    errors: string[];
  }>;
  durationMs: number;
  // Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
  newDispensaryIds: number[];
 }
 /**
@@ -471,8 +469,6 @@ export async function promoteDiscoveredLocations(
  const results: PromotionResult[] = [];
  const rejectedRecords: PromotionSummary['rejectedRecords'] = [];
  // Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
  const newDispensaryIds: number[] = [];
  let created = 0;
  let updated = 0;
  let skipped = 0;
@@ -529,8 +525,6 @@ export async function promoteDiscoveredLocations(
      if (promotionResult.action === 'created') {
        created++;
        // Per TASK_WORKFLOW_2024-12-10.md: Track new IDs for task chaining
        newDispensaryIds.push(promotionResult.dispensaryId);
      } else {
        updated++;
      }
@@ -554,8 +548,6 @@ export async function promoteDiscoveredLocations(
    results,
    rejectedRecords,
    durationMs: Date.now() - startTime,
    // Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
    newDispensaryIds,
  };
 }
--- a/backend/src/discovery/types.ts
+++ b/backend/src/discovery/types.ts
@@ -211,8 +211,6 @@ export interface FullDiscoveryResult {
  totalLocationsFound: number;
  totalLocationsUpserted: number;
  durationMs: number;
  // Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
  newDispensaryIds?: number[];
 }
 // ============================================================
--- a/backend/src/index.ts
+++ b/backend/src/index.ts
@@ -6,8 +6,6 @@ import { initializeMinio, isMinioEnabled } from './utils/minio';
 import { initializeImageStorage } from './utils/image-storage';
 import { logger } from './services/logger';
 import { cleanupOrphanedJobs } from './services/proxyTestQueue';
 // Per TASK_WORKFLOW_2024-12-10.md: Database-driven task scheduler
 import { taskScheduler } from './services/task-scheduler';
 import { runAutoMigrations } from './db/auto-migrate';
 import { getPool } from './db/pool';
 import healthRoutes from './routes/health';
@@ -109,7 +107,7 @@ import scraperMonitorRoutes from './routes/scraper-monitor';
 import apiTokensRoutes from './routes/api-tokens';
 import apiPermissionsRoutes from './routes/api-permissions';
 import parallelScrapeRoutes from './routes/parallel-scrape';
-// crawler-sandbox moved to _deprecated
+import crawlerSandboxRoutes from './routes/crawler-sandbox';
 import versionRoutes from './routes/version';
 import deployStatusRoutes from './routes/deploy-status';
 import publicApiRoutes from './routes/public-api';
@@ -144,9 +142,6 @@ import seoRoutes from './routes/seo';
 import priceAnalyticsRoutes from './routes/price-analytics';
 import tasksRoutes from './routes/tasks';
 import workerRegistryRoutes from './routes/worker-registry';
 // Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
 import payloadsRoutes from './routes/payloads';
 import k8sRoutes from './routes/k8s';
 // Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
 // These domains can access the API without authentication
@@ -187,7 +182,7 @@ app.use('/api/scraper-monitor', scraperMonitorRoutes);
 app.use('/api/api-tokens', apiTokensRoutes);
 app.use('/api/api-permissions', apiPermissionsRoutes);
 app.use('/api/parallel-scrape', parallelScrapeRoutes);
-// crawler-sandbox moved to _deprecated
+app.use('/api/crawler-sandbox', crawlerSandboxRoutes);
 app.use('/api/version', versionRoutes);
 app.use('/api/admin/deploy-status', deployStatusRoutes);
 console.log('[DeployStatus] Routes registered at /api/admin/deploy-status');
@@ -227,14 +222,6 @@ console.log('[Tasks] Routes registered at /api/tasks');
 app.use('/api/worker-registry', workerRegistryRoutes);
 console.log('[WorkerRegistry] Routes registered at /api/worker-registry');
 // Per TASK_WORKFLOW_2024-12-10.md: Raw payload access API
 app.use('/api/payloads', payloadsRoutes);
 console.log('[Payloads] Routes registered at /api/payloads');
 // K8s control routes - worker scaling from admin UI
 app.use('/api/k8s', k8sRoutes);
 console.log('[K8s] Routes registered at /api/k8s');
 // Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
 try {
  const analyticsV2Router = createAnalyticsV2Router(getPool());
@@ -339,17 +326,6 @@ async function startServer() {
    // Clean up any orphaned proxy test jobs from previous server runs
    await cleanupOrphanedJobs();
    // Per TASK_WORKFLOW_2024-12-10.md: Start database-driven task scheduler
    // This replaces node-cron - schedules are stored in DB and survive restarts
    // Uses SELECT FOR UPDATE SKIP LOCKED for multi-replica safety
    try {
      await taskScheduler.start();
      logger.info('system', 'Task scheduler started');
    } catch (err: any) {
      // Non-fatal - scheduler can recover on next poll
      logger.warn('system', `Task scheduler startup warning: ${err.message}`);
    }
    app.listen(PORT, () => {
      logger.info('system', `Server running on port ${PORT}`);
      console.log(`🚀 Server running on port ${PORT}`);
--- a/backend/src/multi-state/state-query-service.ts
+++ b/backend/src/multi-state/state-query-service.ts
@@ -702,10 +702,12 @@ export class StateQueryService {
  async getNationalSummary(): Promise<NationalSummary> {
    const stateMetrics = await this.getAllStateMetrics();
    // Get all states count and aggregate metrics
    const result = await this.pool.query(`
      SELECT
        COUNT(DISTINCT s.code) AS total_states,
        COUNT(DISTINCT CASE WHEN EXISTS (
          SELECT 1 FROM dispensaries d WHERE d.state = s.code AND d.menu_type IS NOT NULL
        ) THEN s.code END) AS active_states,
        (SELECT COUNT(*) FROM dispensaries WHERE state IS NOT NULL) AS total_stores,
        (SELECT COUNT(*) FROM store_products sp
         JOIN dispensaries d ON sp.dispensary_id = d.id
@@ -723,7 +725,7 @@ export class StateQueryService {
    return {
      totalStates: parseInt(data.total_states),
-      activeStates: parseInt(data.total_states), // Same as totalStates - all states shown
+      activeStates: parseInt(data.active_states),
      totalStores: parseInt(data.total_stores),
      totalProducts: parseInt(data.total_products),
      totalBrands: parseInt(data.total_brands),
--- a/backend/src/platforms/dutchie/client.ts
+++ b/backend/src/platforms/dutchie/client.ts
@@ -5,35 +5,22 @@
 *
 * DO NOT MODIFY THIS FILE WITHOUT EXPLICIT AUTHORIZATION.
 *
- * Updated: 2025-12-10 per workflow-12102025.md
+ * This is the canonical HTTP client for all Dutchie communication.
- *
+ * All Dutchie workers (Alice, Bella, etc.) MUST use this client.
 * KEY BEHAVIORS (per workflow-12102025.md):
 * 1. startSession() gets identity from PROXY LOCATION, not task params
 * 2. On 403: immediately get new IP + new fingerprint, then retry
 * 3. After 3 consecutive 403s on same proxy → disable it (burned)
 * 4. Language is always English (en-US)
 *
 * IMPLEMENTATION:
 * - Uses curl via child_process.execSync (bypasses TLS fingerprinting)
 * - NO Puppeteer, NO axios, NO fetch
- * - Uses intoli/user-agents via CrawlRotator for realistic fingerprints
+ * - Fingerprint rotation on 403
 * - Residential IP compatible
 *
 * USAGE:
- *   import { curlPost, curlGet, executeGraphQL, startSession } from '@dutchie/client';
+ *   import { curlPost, curlGet, executeGraphQL } from '@dutchie/client';
 *
 * ============================================================
 */
 import { execSync } from 'child_process';
 import {
  buildOrderedHeaders,
  buildRefererFromMenuUrl,
  getCurlBinary,
  isCurlImpersonateAvailable,
  HeaderContext,
  BrowserType,
 } from '../../services/http-fingerprint';
 // ============================================================
 // TYPES
@@ -45,8 +32,6 @@ export interface CurlResponse {
  error?: string;
 }
 // Per workflow-12102025.md: fingerprint comes from CrawlRotator's BrowserFingerprint
 // We keep a simplified interface here for header building
 export interface Fingerprint {
  userAgent: string;
  acceptLanguage: string;
@@ -72,13 +57,15 @@ export const DUTCHIE_CONFIG = {
 // ============================================================
 // PROXY SUPPORT
-// Per workflow-12102025.md:
+// ============================================================
-// - On 403: recordBlock() → increment consecutive_403_count
+// Integrates with the CrawlRotator system from proxy-rotator.ts
-// - After 3 consecutive 403s → proxy disabled
+// On 403 errors:
-// - Immediately rotate to new IP + new fingerprint on 403
+// 1. Record failure on current proxy
 // 2. Rotate to next proxy
 // 3. Retry with new proxy
 // ============================================================
-import type { CrawlRotator, BrowserFingerprint } from '../../services/crawl-rotator';
+import type { CrawlRotator, Proxy } from '../../services/crawl-rotator';
 let currentProxy: string | null = null;
 let crawlRotator: CrawlRotator | null = null;
@@ -105,12 +92,13 @@ export function getProxy(): string | null {
 /**
 * Set CrawlRotator for proxy rotation on 403s
- * Per workflow-12102025.md: enables automatic rotation when blocked
+ * This enables automatic proxy rotation when blocked
 */
 export function setCrawlRotator(rotator: CrawlRotator | null): void {
  crawlRotator = rotator;
  if (rotator) {
    console.log('[Dutchie Client] CrawlRotator attached - proxy rotation enabled');
    // Set initial proxy from rotator
    const proxy = rotator.proxy.getCurrent();
    if (proxy) {
      currentProxy = rotator.proxy.getProxyUrl(proxy);
@@ -127,41 +115,30 @@ export function getCrawlRotator(): CrawlRotator | null {
 }
 /**
- * Handle 403 block - per workflow-12102025.md:
+ * Rotate to next proxy (called on 403)
 * 1. Record block on current proxy (increments consecutive_403_count)
 * 2. Immediately rotate to new proxy (new IP)
 * 3. Rotate fingerprint
 * Returns false if no more proxies available
 */
-async function handle403Block(): Promise<boolean> {
+async function rotateProxyOn403(error?: string): Promise<boolean> {
  if (!crawlRotator) {
    console.warn('[Dutchie Client] No CrawlRotator - cannot handle 403');
    return false;
  }
-  // Per workflow-12102025.md: record block (tracks consecutive 403s)
+  // Record failure on current proxy
-  const wasDisabled = await crawlRotator.recordBlock();
+  await crawlRotator.recordFailure(error || '403 Forbidden');
  if (wasDisabled) {
    console.log('[Dutchie Client] Current proxy was disabled (3 consecutive 403s)');
  }
  // Per workflow-12102025.md: immediately get new IP + new fingerprint
  const { proxy: nextProxy, fingerprint } = crawlRotator.rotateBoth();
  // Rotate to next proxy
  const nextProxy = crawlRotator.rotateProxy();
  if (nextProxy) {
    currentProxy = crawlRotator.proxy.getProxyUrl(nextProxy);
-    console.log(`[Dutchie Client] Rotated to new proxy: ${currentProxy.replace(/:[^:@]+@/, ':***@')}`);
+    console.log(`[Dutchie Client] Rotated proxy: ${currentProxy.replace(/:[^:@]+@/, ':***@')}`);
    console.log(`[Dutchie Client] New fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
    return true;
  }
-  console.error('[Dutchie Client] No more proxies available!');
+  console.warn('[Dutchie Client] No more proxies available');
  return false;
 }
 /**
 * Record success on current proxy
 * Per workflow-12102025.md: resets consecutive_403_count
 */
 async function recordProxySuccess(responseTimeMs?: number): Promise<void> {
  if (crawlRotator) {
@@ -185,69 +162,163 @@ export const GRAPHQL_HASHES = {
  GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
 };
 // ============================================================
 // FINGERPRINTS - Browser profiles for anti-detect
 // ============================================================
 const FINGERPRINTS: Fingerprint[] = [
  // Chrome Windows (latest) - typical residential user, use first
  {
    userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
    acceptLanguage: 'en-US,en;q=0.9',
    secChUa: '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
    secChUaPlatform: '"Windows"',
    secChUaMobile: '?0',
  },
  // Chrome Mac (latest)
  {
    userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
    acceptLanguage: 'en-US,en;q=0.9',
    secChUa: '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
    secChUaPlatform: '"macOS"',
    secChUaMobile: '?0',
  },
  // Chrome Windows (120)
  {
    userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    acceptLanguage: 'en-US,en;q=0.9',
    secChUa: '"Chromium";v="120", "Google Chrome";v="120", "Not-A.Brand";v="99"',
    secChUaPlatform: '"Windows"',
    secChUaMobile: '?0',
  },
  // Firefox Windows
  {
    userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
    acceptLanguage: 'en-US,en;q=0.5',
  },
  // Safari Mac
  {
    userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
    acceptLanguage: 'en-US,en;q=0.9',
  },
  // Edge Windows
  {
    userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
    acceptLanguage: 'en-US,en;q=0.9',
    secChUa: '"Microsoft Edge";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
    secChUaPlatform: '"Windows"',
    secChUaMobile: '?0',
  },
 ];
 let currentFingerprintIndex = 0;
 // Forward declaration for session (actual CrawlSession interface defined later)
 let currentSession: {
  sessionId: string;
  fingerprint: Fingerprint;
  proxyUrl: string | null;
  stateCode?: string;
  timezone?: string;
  startedAt: Date;
 } | null = null;
 /**
 * Get current fingerprint - returns session fingerprint if active, otherwise default
 */
 export function getFingerprint(): Fingerprint {
  // Use session fingerprint if a session is active
  if (currentSession) {
    return currentSession.fingerprint;
  }
  return FINGERPRINTS[currentFingerprintIndex];
 }
 export function rotateFingerprint(): Fingerprint {
  currentFingerprintIndex = (currentFingerprintIndex + 1) % FINGERPRINTS.length;
  const fp = FINGERPRINTS[currentFingerprintIndex];
  console.log(`[Dutchie Client] Rotated to fingerprint: ${fp.userAgent.slice(0, 50)}...`);
  return fp;
 }
 export function resetFingerprint(): void {
  currentFingerprintIndex = 0;
 }
 /**
 * Get a random fingerprint from the pool
 */
 export function getRandomFingerprint(): Fingerprint {
  const index = Math.floor(Math.random() * FINGERPRINTS.length);
  return FINGERPRINTS[index];
 }
 // ============================================================
 // SESSION MANAGEMENT
-// Per workflow-12102025.md:
+// Per-session fingerprint rotation for stealth
 // - Session identity comes from PROXY LOCATION
 // - NOT from task params (no stateCode/timezone params)
 // - Language is always English
 // ============================================================
 export interface CrawlSession {
  sessionId: string;
-  fingerprint: BrowserFingerprint;
+  fingerprint: Fingerprint;
  proxyUrl: string | null;
-  proxyTimezone?: string;
+  stateCode?: string;
-  proxyState?: string;
+  timezone?: string;
  startedAt: Date;
  // Per workflow-12102025.md: Dynamic Referer per dispensary
  menuUrl?: string;
  referer: string;
 }
-let currentSession: CrawlSession | null = null;
+// Note: currentSession variable declared earlier in file for proper scoping
 /**
- * Start a new crawl session
+ * Timezone to Accept-Language mapping
- *
+ * US timezones all use en-US but this can be extended for international
 * Per workflow-12102025.md:
 * - NO state/timezone params - identity comes from proxy location
 * - Gets fingerprint from CrawlRotator (uses intoli/user-agents)
 * - Language is always English (en-US)
 * - Dynamic Referer per dispensary (from menuUrl)
 *
 * @param menuUrl - The dispensary's menu URL for dynamic Referer header
 */
-export function startSession(menuUrl?: string): CrawlSession {
+const TIMEZONE_TO_LOCALE: Record<string, string> = {
-  if (!crawlRotator) {
+  'America/Phoenix': 'en-US,en;q=0.9',
-    throw new Error('[Dutchie Client] Cannot start session without CrawlRotator');
+  'America/Los_Angeles': 'en-US,en;q=0.9',
-  }
+  'America/Denver': 'en-US,en;q=0.9',
  'America/Chicago': 'en-US,en;q=0.9',
  'America/New_York': 'en-US,en;q=0.9',
  'America/Detroit': 'en-US,en;q=0.9',
  'America/Anchorage': 'en-US,en;q=0.9',
  'Pacific/Honolulu': 'en-US,en;q=0.9',
 };
-  // Per workflow-12102025.md: get identity from proxy location
+/**
-  const proxyLocation = crawlRotator.getProxyLocation();
+ * Get Accept-Language header for a given timezone
-  const fingerprint = crawlRotator.userAgent.getCurrent();
+ */
 export function getLocaleForTimezone(timezone?: string): string {
  if (!timezone) return 'en-US,en;q=0.9';
  return TIMEZONE_TO_LOCALE[timezone] || 'en-US,en;q=0.9';
 }
-  // Per workflow-12102025.md: Dynamic Referer per dispensary
+/**
-  const referer = buildRefererFromMenuUrl(menuUrl);
+ * Start a new crawl session with a random fingerprint
 * Call this before crawling a store to get a fresh identity
 */
 export function startSession(stateCode?: string, timezone?: string): CrawlSession {
  const baseFp = getRandomFingerprint();
  // Override Accept-Language based on timezone for geographic consistency
  const fingerprint: Fingerprint = {
    ...baseFp,
    acceptLanguage: getLocaleForTimezone(timezone),
  };
  currentSession = {
    sessionId: `session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
    fingerprint,
    proxyUrl: currentProxy,
-    proxyTimezone: proxyLocation?.timezone,
+    stateCode,
-    proxyState: proxyLocation?.state,
+    timezone,
    startedAt: new Date(),
    menuUrl,
    referer,
  };
  console.log(`[Dutchie Client] Started session ${currentSession.sessionId}`);
-  console.log(`[Dutchie Client]   Browser: ${fingerprint.browserName} (${fingerprint.deviceCategory})`);
+  console.log(`[Dutchie Client]   Fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
-  console.log(`[Dutchie Client]   DNT: ${fingerprint.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`);
+  console.log(`[Dutchie Client]   Accept-Language: ${fingerprint.acceptLanguage}`);
-  console.log(`[Dutchie Client]   TLS: ${fingerprint.httpFingerprint.curlImpersonateBinary}`);
+  if (timezone) {
-  console.log(`[Dutchie Client]   Referer: ${referer}`);
+    console.log(`[Dutchie Client]   Timezone: ${timezone}`);
  if (proxyLocation?.timezone) {
    console.log(`[Dutchie Client]   Proxy: ${proxyLocation.state || 'unknown'} (${proxyLocation.timezone})`);
  }
  return currentSession;
@@ -276,80 +347,48 @@ export function getCurrentSession(): CrawlSession | null {
 // ============================================================
 /**
- * Per workflow-12102025.md: Build headers using HTTP fingerprint system
+ * Build headers for Dutchie requests
 * Returns headers in browser-specific order with all natural variations
 */
-export function buildHeaders(isPost: boolean, contentLength?: number): { headers: Record<string, string>; orderedHeaders: string[] } {
+export function buildHeaders(refererPath: string, fingerprint?: Fingerprint): Record<string, string> {
-  if (!currentSession || !crawlRotator) {
+  const fp = fingerprint || getFingerprint();
-    throw new Error('[Dutchie Client] Cannot build headers without active session');
+  const refererUrl = `https://dutchie.com${refererPath}`;
  }
-  const fp = currentSession.fingerprint;
+  const headers: Record<string, string> = {
-  const httpFp = fp.httpFingerprint;
+    'accept': 'application/json, text/plain, */*',
-
+    'accept-language': fp.acceptLanguage,
-  // Per workflow-12102025.md: Build context for ordered headers
+    'content-type': 'application/json',
-  const context: HeaderContext = {
+    'origin': 'https://dutchie.com',
-    userAgent: fp.userAgent,
+    'referer': refererUrl,
-    secChUa: fp.secChUa,
+    'user-agent': fp.userAgent,
-    secChUaPlatform: fp.secChUaPlatform,
+    'apollographql-client-name': 'Marketplace (production)',
    secChUaMobile: fp.secChUaMobile,
    referer: currentSession.referer,
    isPost,
    contentLength,
  };
-  // Per workflow-12102025.md: Get ordered headers from HTTP fingerprint service
+  if (fp.secChUa) {
-  return buildOrderedHeaders(httpFp, context);
+    headers['sec-ch-ua'] = fp.secChUa;
    headers['sec-ch-ua-mobile'] = fp.secChUaMobile || '?0';
    headers['sec-ch-ua-platform'] = fp.secChUaPlatform || '"Windows"';
    headers['sec-fetch-dest'] = 'empty';
    headers['sec-fetch-mode'] = 'cors';
    headers['sec-fetch-site'] = 'same-site';
  }
  return headers;
 }
 /**
- * Per workflow-12102025.md: Get curl binary for current session's browser
+ * Execute HTTP POST using curl (bypasses TLS fingerprinting)
 * Uses curl-impersonate for TLS fingerprint matching
 */
-function getCurlBinaryForSession(): string {
+export function curlPost(url: string, body: any, headers: Record<string, string>, timeout = 30000): CurlResponse {
-  if (!currentSession) {
+  const filteredHeaders = Object.entries(headers)
-    return 'curl';  // Fallback to standard curl
+    .filter(([k]) => k.toLowerCase() !== 'accept-encoding')
-  }
+    .map(([k, v]) => `-H '${k}: ${v}'`)
  const browserType = currentSession.fingerprint.browserName as BrowserType;
  // Per workflow-12102025.md: Check if curl-impersonate is available
  if (isCurlImpersonateAvailable(browserType)) {
    return getCurlBinary(browserType);
  }
  // Fallback to standard curl with warning
  console.warn(`[Dutchie Client] curl-impersonate not available for ${browserType}, using standard curl`);
  return 'curl';
 }
 /**
 * Per workflow-12102025.md: Execute HTTP POST using curl/curl-impersonate
 * - Uses browser-specific TLS fingerprint via curl-impersonate
 * - Headers sent in browser-specific order
 * - Dynamic Referer per dispensary
 */
 export function curlPost(url: string, body: any, timeout = 30000): CurlResponse {
  const bodyJson = JSON.stringify(body);
  // Per workflow-12102025.md: Build ordered headers for POST request
  const { headers, orderedHeaders } = buildHeaders(true, bodyJson.length);
  // Per workflow-12102025.md: Build header args in browser-specific order
  const headerArgs = orderedHeaders
    .filter(h => h !== 'Host' && h !== 'Content-Length')  // curl handles these
    .map(h => `-H '${h}: ${headers[h]}'`)
    .join(' ');
-  const bodyEscaped = bodyJson.replace(/'/g, "'\\''");
+  const bodyJson = JSON.stringify(body).replace(/'/g, "'\\''");
  const timeoutSec = Math.ceil(timeout / 1000);
  const separator = '___HTTP_STATUS___';
  const proxyArg = getProxyArg();
-
+  const cmd = `curl -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${filteredHeaders} -d '${bodyJson}' '${url}'`;
  // Per workflow-12102025.md: Use curl-impersonate for TLS fingerprint matching
  const curlBinary = getCurlBinaryForSession();
  const cmd = `${curlBinary} -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${headerArgs} -d '${bodyEscaped}' '${url}'`;
  try {
    const output = execSync(cmd, {
@@ -388,29 +427,19 @@ export function curlPost(url: string, body: any, timeout = 30000): CurlResponse
 }
 /**
- * Per workflow-12102025.md: Execute HTTP GET using curl/curl-impersonate
+ * Execute HTTP GET using curl (bypasses TLS fingerprinting)
- * - Uses browser-specific TLS fingerprint via curl-impersonate
+ * Returns HTML or JSON depending on response content-type
 * - Headers sent in browser-specific order
 * - Dynamic Referer per dispensary
 */
-export function curlGet(url: string, timeout = 30000): CurlResponse {
+export function curlGet(url: string, headers: Record<string, string>, timeout = 30000): CurlResponse {
-  // Per workflow-12102025.md: Build ordered headers for GET request
+  const filteredHeaders = Object.entries(headers)
-  const { headers, orderedHeaders } = buildHeaders(false);
+    .filter(([k]) => k.toLowerCase() !== 'accept-encoding')
-
+    .map(([k, v]) => `-H '${k}: ${v}'`)
  // Per workflow-12102025.md: Build header args in browser-specific order
  const headerArgs = orderedHeaders
    .filter(h => h !== 'Host' && h !== 'Content-Length')  // curl handles these
    .map(h => `-H '${h}: ${headers[h]}'`)
    .join(' ');
  const timeoutSec = Math.ceil(timeout / 1000);
  const separator = '___HTTP_STATUS___';
  const proxyArg = getProxyArg();
-
+  const cmd = `curl -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${filteredHeaders} '${url}'`;
  // Per workflow-12102025.md: Use curl-impersonate for TLS fingerprint matching
  const curlBinary = getCurlBinaryForSession();
  const cmd = `${curlBinary} -s --compressed ${proxyArg} -w '${separator}%{http_code}' --max-time ${timeoutSec} ${headerArgs} '${url}'`;
  try {
    const output = execSync(cmd, {
@@ -430,6 +459,7 @@ export function curlGet(url: string, timeout = 30000): CurlResponse {
    const responseBody = output.slice(0, separatorIndex);
    const statusCode = parseInt(output.slice(separatorIndex + separator.length).trim(), 10);
    // Try to parse as JSON, otherwise return as string (HTML)
    try {
      return { status: statusCode, data: JSON.parse(responseBody) };
    } catch {
@@ -446,22 +476,16 @@ export function curlGet(url: string, timeout = 30000): CurlResponse {
 // ============================================================
 // GRAPHQL EXECUTION
 // Per workflow-12102025.md:
 // - On 403: immediately rotate IP + fingerprint (no delay first)
 // - Then retry
 // ============================================================
 export interface ExecuteGraphQLOptions {
  maxRetries?: number;
  retryOn403?: boolean;
-  cName?: string;
+  cName?: string;  // Optional - used for Referer header, defaults to 'cities'
 }
 /**
- * Per workflow-12102025.md: Execute GraphQL query with curl/curl-impersonate
+ * Execute GraphQL query with curl (bypasses TLS fingerprinting)
 * - Uses browser-specific TLS fingerprint
 * - Headers in browser-specific order
 * - On 403: immediately rotate IP + fingerprint, then retry
 */
 export async function executeGraphQL(
  operationName: string,
@@ -469,12 +493,7 @@ export async function executeGraphQL(
  hash: string,
  options: ExecuteGraphQLOptions
 ): Promise<any> {
-  const { maxRetries = 3, retryOn403 = true } = options;
+  const { maxRetries = 3, retryOn403 = true, cName = 'cities' } = options;
  // Per workflow-12102025.md: Session must be active for requests
  if (!currentSession) {
    throw new Error('[Dutchie Client] Cannot execute GraphQL without active session - call startSession() first');
  }
  const body = {
    operationName,
@@ -488,14 +507,14 @@ export async function executeGraphQL(
  let attempt = 0;
  while (attempt <= maxRetries) {
    const fingerprint = getFingerprint();
    const headers = buildHeaders(`/embedded-menu/${cName}`, fingerprint);
    console.log(`[Dutchie Client] curl POST ${operationName} (attempt ${attempt + 1}/${maxRetries + 1})`);
-    const startTime = Date.now();
+    const response = curlPost(DUTCHIE_CONFIG.graphqlEndpoint, body, headers, DUTCHIE_CONFIG.timeout);
    // Per workflow-12102025.md: curlPost now uses ordered headers and curl-impersonate
    const response = curlPost(DUTCHIE_CONFIG.graphqlEndpoint, body, DUTCHIE_CONFIG.timeout);
    const responseTime = Date.now() - startTime;
-    console.log(`[Dutchie Client] Response status: ${response.status} (${responseTime}ms)`);
+    console.log(`[Dutchie Client] Response status: ${response.status}`);
    if (response.error) {
      console.error(`[Dutchie Client] curl error: ${response.error}`);
@@ -508,9 +527,6 @@ export async function executeGraphQL(
    }
    if (response.status === 200) {
      // Per workflow-12102025.md: success resets consecutive 403 count
      await recordProxySuccess(responseTime);
      if (response.data?.errors?.length > 0) {
        console.warn(`[Dutchie Client] GraphQL errors: ${JSON.stringify(response.data.errors[0])}`);
      }
@@ -518,20 +534,11 @@ export async function executeGraphQL(
    }
    if (response.status === 403 && retryOn403) {
-      // Per workflow-12102025.md: immediately rotate IP + fingerprint
+      console.warn(`[Dutchie Client] 403 blocked - rotating proxy and fingerprint...`);
-      console.warn(`[Dutchie Client] 403 blocked - immediately rotating proxy + fingerprint...`);
+      await rotateProxyOn403('403 Forbidden on GraphQL');
-      const hasMoreProxies = await handle403Block();
+      rotateFingerprint();
      if (!hasMoreProxies) {
        throw new Error('All proxies exhausted - no more IPs available');
      }
      // Per workflow-12102025.md: Update session referer after rotation
      currentSession.referer = buildRefererFromMenuUrl(currentSession.menuUrl);
      attempt++;
-      // Per workflow-12102025.md: small backoff after rotation
+      await sleep(1000 * attempt);
      await sleep(500);
      continue;
    }
@@ -560,10 +567,8 @@ export interface FetchPageOptions {
 }
 /**
- * Per workflow-12102025.md: Fetch HTML page from Dutchie
+ * Fetch HTML page from Dutchie (for city pages, dispensary pages, etc.)
- * - Uses browser-specific TLS fingerprint
+ * Returns raw HTML string
 * - Headers in browser-specific order
 * - Same 403 handling as GraphQL
 */
 export async function fetchPage(
  path: string,
@@ -572,22 +577,32 @@ export async function fetchPage(
  const { maxRetries = 3, retryOn403 = true } = options;
  const url = `${DUTCHIE_CONFIG.baseUrl}${path}`;
  // Per workflow-12102025.md: Session must be active for requests
  if (!currentSession) {
    throw new Error('[Dutchie Client] Cannot fetch page without active session - call startSession() first');
  }
  let attempt = 0;
  while (attempt <= maxRetries) {
-    // Per workflow-12102025.md: curlGet now uses ordered headers and curl-impersonate
+    const fingerprint = getFingerprint();
    const headers: Record<string, string> = {
      'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
      'accept-language': fingerprint.acceptLanguage,
      'user-agent': fingerprint.userAgent,
    };
    if (fingerprint.secChUa) {
      headers['sec-ch-ua'] = fingerprint.secChUa;
      headers['sec-ch-ua-mobile'] = fingerprint.secChUaMobile || '?0';
      headers['sec-ch-ua-platform'] = fingerprint.secChUaPlatform || '"Windows"';
      headers['sec-fetch-dest'] = 'document';
      headers['sec-fetch-mode'] = 'navigate';
      headers['sec-fetch-site'] = 'none';
      headers['sec-fetch-user'] = '?1';
      headers['upgrade-insecure-requests'] = '1';
    }
    console.log(`[Dutchie Client] curl GET ${path} (attempt ${attempt + 1}/${maxRetries + 1})`);
-    const startTime = Date.now();
+    const response = curlGet(url, headers, DUTCHIE_CONFIG.timeout);
    const response = curlGet(url, DUTCHIE_CONFIG.timeout);
    const responseTime = Date.now() - startTime;
-    console.log(`[Dutchie Client] Response status: ${response.status} (${responseTime}ms)`);
+    console.log(`[Dutchie Client] Response status: ${response.status}`);
    if (response.error) {
      console.error(`[Dutchie Client] curl error: ${response.error}`);
@@ -599,26 +614,15 @@ export async function fetchPage(
    }
    if (response.status === 200) {
      // Per workflow-12102025.md: success resets consecutive 403 count
      await recordProxySuccess(responseTime);
      return { html: response.data, status: response.status };
    }
    if (response.status === 403 && retryOn403) {
-      // Per workflow-12102025.md: immediately rotate IP + fingerprint
+      console.warn(`[Dutchie Client] 403 blocked - rotating proxy and fingerprint...`);
-      console.warn(`[Dutchie Client] 403 blocked - immediately rotating proxy + fingerprint...`);
+      await rotateProxyOn403('403 Forbidden on page fetch');
-      const hasMoreProxies = await handle403Block();
+      rotateFingerprint();
      if (!hasMoreProxies) {
        throw new Error('All proxies exhausted - no more IPs available');
      }
      // Per workflow-12102025.md: Update session after rotation
      currentSession.referer = buildRefererFromMenuUrl(currentSession.menuUrl);
      attempt++;
-      // Per workflow-12102025.md: small backoff after rotation
+      await sleep(1000 * attempt);
      await sleep(500);
      continue;
    }
--- a/backend/src/platforms/dutchie/index.ts
+++ b/backend/src/platforms/dutchie/index.ts
@@ -6,17 +6,22 @@
 */
 export {
-  // HTTP Client (per workflow-12102025.md: uses curl-impersonate + ordered headers)
+  // HTTP Client
  curlPost,
  curlGet,
  executeGraphQL,
  fetchPage,
  extractNextData,
-  // Headers (per workflow-12102025.md: browser-specific ordering)
+  // Headers & Fingerprints
  buildHeaders,
  getFingerprint,
  rotateFingerprint,
  resetFingerprint,
  getRandomFingerprint,
  getLocaleForTimezone,
-  // Session Management (per workflow-12102025.md: menuUrl for dynamic Referer)
+  // Session Management (per-store fingerprint rotation)
  startSession,
  endSession,
  getCurrentSession,
--- a/backend/src/routes/auth.ts
+++ b/backend/src/routes/auth.ts
@@ -47,27 +47,4 @@ router.post('/refresh', authMiddleware, async (req: AuthRequest, res) => {
  res.json({ token });
 });
 // Verify password for sensitive actions (requires current user to be authenticated)
 router.post('/verify-password', authMiddleware, async (req: AuthRequest, res) => {
  try {
    const { password } = req.body;
    if (!password) {
      return res.status(400).json({ error: 'Password required' });
    }
    // Re-authenticate the current user with the provided password
    const user = await authenticateUser(req.user!.email, password);
    if (!user) {
      return res.status(401).json({ error: 'Invalid password', verified: false });
    }
    res.json({ verified: true });
  } catch (error) {
    console.error('Password verification error:', error);
    res.status(500).json({ error: 'Internal server error' });
  }
 });
 export default router;
--- a/backend/src/_deprecated/routes/crawler-sandbox.ts
+++ b/backend/src/_deprecated/routes/crawler-sandbox.ts
--- a/backend/src/routes/intelligence.ts
+++ b/backend/src/routes/intelligence.ts
@@ -14,25 +14,13 @@ router.use(authMiddleware);
 /**
 * GET /api/admin/intelligence/brands
 * List all brands with state presence, store counts, and pricing
 * Query params:
 *   - state: Filter by state (e.g., "AZ")
 *   - limit: Max results (default 500)
 *   - offset: Pagination offset
 */
 router.get('/brands', async (req: Request, res: Response) => {
  try {
-    const { limit = '500', offset = '0', state } = req.query;
+    const { limit = '500', offset = '0' } = req.query;
    const limitNum = Math.min(parseInt(limit as string, 10), 1000);
    const offsetNum = parseInt(offset as string, 10);
    // Build WHERE clause based on state filter
    let stateFilter = '';
    const params: any[] = [limitNum, offsetNum];
    if (state && state !== 'all') {
      stateFilter = 'AND d.state = $3';
      params.push(state);
    }
    const { rows } = await pool.query(`
      SELECT
        sp.brand_name_raw as brand_name,
@@ -44,26 +32,17 @@ router.get('/brands', async (req: Request, res: Response) => {
      FROM store_products sp
      JOIN dispensaries d ON sp.dispensary_id = d.id
      WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
        ${stateFilter}
      GROUP BY sp.brand_name_raw
      ORDER BY store_count DESC, sku_count DESC
      LIMIT $1 OFFSET $2
-    `, params);
+    `, [limitNum, offsetNum]);
-    // Get total count with same state filter
+    // Get total count
    const countParams: any[] = [];
    let countStateFilter = '';
    if (state && state !== 'all') {
      countStateFilter = 'AND d.state = $1';
      countParams.push(state);
    }
    const { rows: countRows } = await pool.query(`
-      SELECT COUNT(DISTINCT sp.brand_name_raw) as total
+      SELECT COUNT(DISTINCT brand_name_raw) as total
-      FROM store_products sp
+      FROM store_products
-      JOIN dispensaries d ON sp.dispensary_id = d.id
+      WHERE brand_name_raw IS NOT NULL AND brand_name_raw != ''
-      WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
+    `);
        ${countStateFilter}
    `, countParams);
    res.json({
      brands: rows.map((r: any) => ({
@@ -168,42 +147,10 @@ router.get('/brands/:brandName/penetration', async (req: Request, res: Response)
 /**
 * GET /api/admin/intelligence/pricing
 * Get pricing analytics by category
 * Query params:
 *   - state: Filter by state (e.g., "AZ")
 */
 router.get('/pricing', async (req: Request, res: Response) => {
  try {
-    const { state } = req.query;
+    const { rows: categoryRows } = await pool.query(`
    // Build WHERE clause based on state filter
    let stateFilter = '';
    const categoryParams: any[] = [];
    const stateQueryParams: any[] = [];
    const overallParams: any[] = [];
    if (state && state !== 'all') {
      stateFilter = 'AND d.state = $1';
      categoryParams.push(state);
      overallParams.push(state);
    }
    // Category pricing with optional state filter
    const categoryQuery = state && state !== 'all'
      ? `
        SELECT
          sp.category_raw as category,
          ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
          MIN(sp.price_rec) as min_price,
          MAX(sp.price_rec) as max_price,
          ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) as median_price,
          COUNT(*) as product_count
        FROM store_products sp
        JOIN dispensaries d ON sp.dispensary_id = d.id
        WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0 ${stateFilter}
        GROUP BY sp.category_raw
        ORDER BY product_count DESC
      `
      : `
      SELECT
        sp.category_raw as category,
        ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
@@ -215,11 +162,8 @@ router.get('/pricing', async (req: Request, res: Response) => {
      WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0
      GROUP BY sp.category_raw
      ORDER BY product_count DESC
-      `;
+    `);
    const { rows: categoryRows } = await pool.query(categoryQuery, categoryParams);
    // State pricing
    const { rows: stateRows } = await pool.query(`
      SELECT
        d.state,
@@ -234,31 +178,6 @@ router.get('/pricing', async (req: Request, res: Response) => {
      ORDER BY avg_price DESC
    `);
    // Overall stats with optional state filter
    const overallQuery = state && state !== 'all'
      ? `
        SELECT
          ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
          MIN(sp.price_rec) as min_price,
          MAX(sp.price_rec) as max_price,
          COUNT(*) as total_products
        FROM store_products sp
        JOIN dispensaries d ON sp.dispensary_id = d.id
        WHERE sp.price_rec > 0 ${stateFilter}
      `
      : `
        SELECT
          ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
          MIN(sp.price_rec) as min_price,
          MAX(sp.price_rec) as max_price,
          COUNT(*) as total_products
        FROM store_products sp
        WHERE sp.price_rec > 0
      `;
    const { rows: overallRows } = await pool.query(overallQuery, overallParams);
    const overall = overallRows[0];
    res.json({
      byCategory: categoryRows.map((r: any) => ({
        category: r.category,
@@ -275,12 +194,6 @@ router.get('/pricing', async (req: Request, res: Response) => {
        maxPrice: r.max_price ? parseFloat(r.max_price) : null,
        productCount: parseInt(r.product_count, 10),
      })),
      overall: {
        avgPrice: overall?.avg_price ? parseFloat(overall.avg_price) : null,
        minPrice: overall?.min_price ? parseFloat(overall.min_price) : null,
        maxPrice: overall?.max_price ? parseFloat(overall.max_price) : null,
        totalProducts: parseInt(overall?.total_products || '0', 10),
      },
    });
  } catch (error: any) {
    console.error('[Intelligence] Error fetching pricing:', error.message);
@@ -291,23 +204,9 @@ router.get('/pricing', async (req: Request, res: Response) => {
 /**
 * GET /api/admin/intelligence/stores
 * Get store intelligence summary
 * Query params:
 *   - state: Filter by state (e.g., "AZ")
 *   - limit: Max results (default 200)
 */
 router.get('/stores', async (req: Request, res: Response) => {
  try {
    const { state, limit = '200' } = req.query;
    const limitNum = Math.min(parseInt(limit as string, 10), 500);
    // Build WHERE clause based on state filter
    let stateFilter = '';
    const params: any[] = [limitNum];
    if (state && state !== 'all') {
      stateFilter = 'AND d.state = $2';
      params.push(state);
    }
    const { rows: storeRows } = await pool.query(`
      SELECT
        d.id,
@@ -317,22 +216,17 @@ router.get('/stores', async (req: Request, res: Response) => {
        d.state,
        d.menu_type,
        d.crawl_enabled,
-        c.name as chain_name,
+        COUNT(DISTINCT sp.id) as product_count,
        COUNT(DISTINCT sp.id) as sku_count,
        COUNT(DISTINCT sp.brand_name_raw) as brand_count,
        ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
-        MAX(sp.updated_at) as last_crawl,
+        MAX(sp.updated_at) as last_product_update
        (SELECT COUNT(*) FROM store_product_snapshots sps
         WHERE sps.store_product_id IN (SELECT id FROM store_products WHERE dispensary_id = d.id)) as snapshot_count
      FROM dispensaries d
      LEFT JOIN store_products sp ON sp.dispensary_id = d.id
-      LEFT JOIN chains c ON d.chain_id = c.id
+      WHERE d.state IS NOT NULL
-      WHERE d.state IS NOT NULL AND d.crawl_enabled = true
+      GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled
-        ${stateFilter}
+      ORDER BY product_count DESC
-      GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled, c.name
+      LIMIT 200
-      ORDER BY sku_count DESC
+    `);
      LIMIT $1
    `, params);
    res.json({
      stores: storeRows.map((r: any) => ({
@@ -343,13 +237,10 @@ router.get('/stores', async (req: Request, res: Response) => {
        state: r.state,
        menuType: r.menu_type,
        crawlEnabled: r.crawl_enabled,
-        chainName: r.chain_name || null,
+        productCount: parseInt(r.product_count || '0', 10),
        skuCount: parseInt(r.sku_count || '0', 10),
        snapshotCount: parseInt(r.snapshot_count || '0', 10),
        brandCount: parseInt(r.brand_count || '0', 10),
        avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
-        lastCrawl: r.last_crawl,
+        lastProductUpdate: r.last_product_update,
        crawlFrequencyHours: 4, // Default crawl frequency
      })),
      total: storeRows.length,
    });
--- a/backend/src/routes/job-queue.ts
+++ b/backend/src/routes/job-queue.ts
@@ -543,9 +543,6 @@ router.post('/bulk-priority', async (req: Request, res: Response) => {
 /**
 * POST /api/job-queue/enqueue - Add a new job to the queue
 *
 * 2024-12-10: Rewired to use worker_tasks via taskService.
 * Legacy dispensary_crawl_jobs code commented out below.
 */
 router.post('/enqueue', async (req: Request, res: Response) => {
  try {
@@ -555,59 +552,6 @@ router.post('/enqueue', async (req: Request, res: Response) => {
      return res.status(400).json({ success: false, error: 'dispensary_id is required' });
    }
    // 2024-12-10: Map legacy job_type to new task role
    const roleMap: Record<string, string> = {
      'dutchie_product_crawl': 'product_refresh',
      'menu_detection': 'entry_point_discovery',
      'menu_detection_single': 'entry_point_discovery',
      'product_discovery': 'product_discovery',
      'store_discovery': 'store_discovery',
    };
    const role = roleMap[job_type] || 'product_refresh';
    // 2024-12-10: Use taskService to create task in worker_tasks table
    const { taskService } = await import('../tasks/task-service');
    // Check if task already pending for this dispensary
    const existingTasks = await taskService.listTasks({
      dispensary_id,
      role: role as any,
      status: ['pending', 'claimed', 'running'],
      limit: 1,
    });
    if (existingTasks.length > 0) {
      return res.json({
        success: true,
        task_id: existingTasks[0].id,
        message: 'Task already queued'
      });
    }
    const task = await taskService.createTask({
      role: role as any,
      dispensary_id,
      priority,
    });
    res.json({ success: true, task_id: task.id, message: 'Task enqueued' });
  } catch (error: any) {
    console.error('[JobQueue] Error enqueuing task:', error);
    res.status(500).json({ success: false, error: error.message });
  }
 });
 /*
 * LEGACY CODE - 2024-12-10: Commented out, was using orphaned dispensary_crawl_jobs table
 *
 router.post('/enqueue', async (req: Request, res: Response) => {
  try {
    const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
    if (!dispensary_id) {
      return res.status(400).json({ success: false, error: 'dispensary_id is required' });
    }
    // Check if job already pending for this dispensary
    const existing = await pool.query(`
      SELECT id FROM dispensary_crawl_jobs
@@ -641,7 +585,6 @@ router.post('/enqueue', async (req: Request, res: Response) => {
    res.status(500).json({ success: false, error: error.message });
  }
 });
 */
 /**
 * POST /api/job-queue/pause - Pause queue processing
@@ -669,8 +612,6 @@ router.get('/paused', async (_req: Request, res: Response) => {
 /**
 * POST /api/job-queue/enqueue-batch - Queue multiple dispensaries at once
 * Body: { dispensary_ids: number[], job_type?: string, priority?: number }
 *
 * 2024-12-10: Rewired to use worker_tasks via taskService.
 */
 router.post('/enqueue-batch', async (req: Request, res: Response) => {
  try {
@@ -684,30 +625,35 @@ router.post('/enqueue-batch', async (req: Request, res: Response) => {
      return res.status(400).json({ success: false, error: 'Maximum 500 dispensaries per batch' });
    }
-    // 2024-12-10: Map legacy job_type to new task role
+    // Insert jobs, skipping duplicates
-    const roleMap: Record<string, string> = {
+    const { rows } = await pool.query(`
-      'dutchie_product_crawl': 'product_refresh',
+      INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type, status, created_at)
-      'menu_detection': 'entry_point_discovery',
+      SELECT
-      'product_discovery': 'product_discovery',
+        d.id,
-    };
+        $2::text,
-    const role = roleMap[job_type] || 'product_refresh';
+        $3::integer,
-
+        'api_batch',
-    // 2024-12-10: Use taskService to create tasks in worker_tasks table
+        'pending',
-    const { taskService } = await import('../tasks/task-service');
+        NOW()
-
+      FROM dispensaries d
-    const tasks = dispensary_ids.map(dispensary_id => ({
+      WHERE d.id = ANY($1::int[])
-      role: role as any,
+        AND d.crawl_enabled = true
-      dispensary_id,
+        AND d.platform_dispensary_id IS NOT NULL
-      priority,
+        AND NOT EXISTS (
-    }));
+          SELECT 1 FROM dispensary_crawl_jobs cj
-
+          WHERE cj.dispensary_id = d.id
-    const createdCount = await taskService.createTasks(tasks);
+          AND cj.job_type = $2::text
          AND cj.status IN ('pending', 'running')
        )
      RETURNING id, dispensary_id
    `, [dispensary_ids, job_type, priority]);
    res.json({
      success: true,
-      queued: createdCount,
+      queued: rows.length,
      requested: dispensary_ids.length,
-      message: `Queued ${createdCount} of ${dispensary_ids.length} dispensaries`
+      job_ids: rows.map(r => r.id),
      message: `Queued ${rows.length} of ${dispensary_ids.length} dispensaries`
    });
  } catch (error: any) {
    console.error('[JobQueue] Error batch enqueuing:', error);
@@ -718,8 +664,6 @@ router.post('/enqueue-batch', async (req: Request, res: Response) => {
 /**
 * POST /api/job-queue/enqueue-state - Queue all crawl-enabled dispensaries for a state
 * Body: { state_code: string, job_type?: string, priority?: number, limit?: number }
 *
 * 2024-12-10: Rewired to use worker_tasks via taskService.
 */
 router.post('/enqueue-state', async (req: Request, res: Response) => {
  try {
@@ -729,55 +673,52 @@ router.post('/enqueue-state', async (req: Request, res: Response) => {
      return res.status(400).json({ success: false, error: 'state_code is required (e.g., "AZ")' });
    }
-    // 2024-12-10: Map legacy job_type to new task role
+    // Get state_id and queue jobs
-    const roleMap: Record<string, string> = {
+    const { rows } = await pool.query(`
-      'dutchie_product_crawl': 'product_refresh',
+      WITH target_state AS (
-      'menu_detection': 'entry_point_discovery',
+        SELECT id FROM states WHERE code = $1
-      'product_discovery': 'product_discovery',
+      )
-    };
+      INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type, status, created_at)
-    const role = roleMap[job_type] || 'product_refresh';
+      SELECT
-
+        d.id,
-    // Get dispensary IDs for the state
+        $2::text,
-    const dispensaryResult = await pool.query(`
+        $3::integer,
-      SELECT d.id
+        'api_state',
-      FROM dispensaries d
+        'pending',
-      JOIN states s ON s.id = d.state_id
+        NOW()
-      WHERE s.code = $1
+      FROM dispensaries d, target_state
      WHERE d.state_id = target_state.id
        AND d.crawl_enabled = true
        AND d.platform_dispensary_id IS NOT NULL
-      LIMIT $2
+        AND NOT EXISTS (
-    `, [state_code.toUpperCase(), limit]);
+          SELECT 1 FROM dispensary_crawl_jobs cj
-
+          WHERE cj.dispensary_id = d.id
-    const dispensary_ids = dispensaryResult.rows.map((r: any) => r.id);
+          AND cj.job_type = $2::text
-
+          AND cj.status IN ('pending', 'running')
-    // 2024-12-10: Use taskService to create tasks in worker_tasks table
+        )
-    const { taskService } = await import('../tasks/task-service');
+      LIMIT $4::integer
-
+      RETURNING id, dispensary_id
-    const tasks = dispensary_ids.map((dispensary_id: number) => ({
+    `, [state_code.toUpperCase(), job_type, priority, limit]);
      role: role as any,
      dispensary_id,
      priority,
    }));
    const createdCount = await taskService.createTasks(tasks);
    // Get total available count
    const countResult = await pool.query(`
      WITH target_state AS (
        SELECT id FROM states WHERE code = $1
      )
      SELECT COUNT(*) as total
-      FROM dispensaries d
+      FROM dispensaries d, target_state
-      JOIN states s ON s.id = d.state_id
+      WHERE d.state_id = target_state.id
      WHERE s.code = $1
        AND d.crawl_enabled = true
        AND d.platform_dispensary_id IS NOT NULL
    `, [state_code.toUpperCase()]);
    res.json({
      success: true,
-      queued: createdCount,
+      queued: rows.length,
      total_available: parseInt(countResult.rows[0].total),
      state: state_code.toUpperCase(),
-      role,
+      job_type,
-      message: `Queued ${createdCount} dispensaries for ${state_code.toUpperCase()}`
+      message: `Queued ${rows.length} dispensaries for ${state_code.toUpperCase()}`
    });
  } catch (error: any) {
    console.error('[JobQueue] Error enqueuing state:', error);
--- a/backend/src/routes/k8s.ts
+++ b/backend/src/routes/k8s.ts
@@ -1,140 +0,0 @@
 /**
 * Kubernetes Control Routes
 *
 * Provides admin UI control over k8s resources like worker scaling.
 * Uses in-cluster config when running in k8s, or kubeconfig locally.
 */
 import { Router, Request, Response } from 'express';
 import * as k8s from '@kubernetes/client-node';
 const router = Router();
 // K8s client setup - lazy initialization
 let appsApi: k8s.AppsV1Api | null = null;
 let k8sError: string | null = null;
 function getK8sClient(): k8s.AppsV1Api | null {
  if (appsApi) return appsApi;
  if (k8sError) return null;
  try {
    const kc = new k8s.KubeConfig();
    // Try in-cluster config first (when running in k8s)
    try {
      kc.loadFromCluster();
      console.log('[K8s] Loaded in-cluster config');
    } catch {
      // Fall back to default kubeconfig (local dev)
      try {
        kc.loadFromDefault();
        console.log('[K8s] Loaded default kubeconfig');
      } catch (e) {
        k8sError = 'No k8s config available';
        console.log('[K8s] No config available - k8s routes disabled');
        return null;
      }
    }
    appsApi = kc.makeApiClient(k8s.AppsV1Api);
    return appsApi;
  } catch (e: any) {
    k8sError = e.message;
    console.error('[K8s] Failed to initialize client:', e.message);
    return null;
  }
 }
 const NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
 const WORKER_DEPLOYMENT = 'scraper-worker';
 /**
 * GET /api/k8s/workers
 * Get current worker deployment status
 */
 router.get('/workers', async (_req: Request, res: Response) => {
  const client = getK8sClient();
  if (!client) {
    return res.json({
      success: true,
      available: false,
      error: k8sError || 'K8s not available',
      replicas: 0,
      readyReplicas: 0,
    });
  }
  try {
    const deployment = await client.readNamespacedDeployment({
      name: WORKER_DEPLOYMENT,
      namespace: NAMESPACE,
    });
    res.json({
      success: true,
      available: true,
      replicas: deployment.spec?.replicas || 0,
      readyReplicas: deployment.status?.readyReplicas || 0,
      availableReplicas: deployment.status?.availableReplicas || 0,
      updatedReplicas: deployment.status?.updatedReplicas || 0,
    });
  } catch (e: any) {
    console.error('[K8s] Error getting deployment:', e.message);
    res.status(500).json({
      success: false,
      error: e.message,
    });
  }
 });
 /**
 * POST /api/k8s/workers/scale
 * Scale worker deployment
 * Body: { replicas: number }
 */
 router.post('/workers/scale', async (req: Request, res: Response) => {
  const client = getK8sClient();
  if (!client) {
    return res.status(503).json({
      success: false,
      error: k8sError || 'K8s not available',
    });
  }
  const { replicas } = req.body;
  if (typeof replicas !== 'number' || replicas < 0 || replicas > 50) {
    return res.status(400).json({
      success: false,
      error: 'replicas must be a number between 0 and 50',
    });
  }
  try {
    // Patch the deployment to set replicas
    await client.patchNamespacedDeploymentScale({
      name: WORKER_DEPLOYMENT,
      namespace: NAMESPACE,
      body: { spec: { replicas } },
    });
    console.log(`[K8s] Scaled ${WORKER_DEPLOYMENT} to ${replicas} replicas`);
    res.json({
      success: true,
      replicas,
      message: `Scaled to ${replicas} workers`,
    });
  } catch (e: any) {
    console.error('[K8s] Error scaling deployment:', e.message);
    res.status(500).json({
      success: false,
      error: e.message,
    });
  }
 });
 export default router;
--- a/backend/src/routes/markets.ts
+++ b/backend/src/routes/markets.ts
@@ -291,107 +291,6 @@ router.get('/stores/:id/summary', async (req: Request, res: Response) => {
  }
 });
 /**
 * GET /api/markets/stores/:id/crawl-history
 * Get crawl history for a specific store
 */
 router.get('/stores/:id/crawl-history', async (req: Request, res: Response) => {
  try {
    const { id } = req.params;
    const { limit = '50' } = req.query;
    const dispensaryId = parseInt(id, 10);
    const limitNum = Math.min(parseInt(limit as string, 10), 100);
    // Get crawl history from crawl_orchestration_traces
    const { rows: historyRows } = await pool.query(`
      SELECT
        id,
        run_id,
        profile_key,
        crawler_module,
        state_at_start,
        state_at_end,
        total_steps,
        duration_ms,
        success,
        error_message,
        products_found,
        started_at,
        completed_at
      FROM crawl_orchestration_traces
      WHERE dispensary_id = $1
      ORDER BY started_at DESC
      LIMIT $2
    `, [dispensaryId, limitNum]);
    // Get next scheduled crawl if available
    const { rows: scheduleRows } = await pool.query(`
      SELECT
        js.id as schedule_id,
        js.job_name,
        js.enabled,
        js.base_interval_minutes,
        js.jitter_minutes,
        js.next_run_at,
        js.last_run_at,
        js.last_status
      FROM job_schedules js
      WHERE js.enabled = true
        AND js.job_config->>'dispensaryId' = $1::text
      ORDER BY js.next_run_at
      LIMIT 1
    `, [dispensaryId.toString()]);
    // Get dispensary info for slug
    const { rows: dispRows } = await pool.query(`
      SELECT
        id,
        name,
        dba_name,
        slug,
        state,
        city,
        menu_type,
        platform_dispensary_id,
        last_menu_scrape
      FROM dispensaries
      WHERE id = $1
    `, [dispensaryId]);
    res.json({
      dispensary: dispRows[0] || null,
      history: historyRows.map(row => ({
        id: row.id,
        runId: row.run_id,
        profileKey: row.profile_key,
        crawlerModule: row.crawler_module,
        stateAtStart: row.state_at_start,
        stateAtEnd: row.state_at_end,
        totalSteps: row.total_steps,
        durationMs: row.duration_ms,
        success: row.success,
        errorMessage: row.error_message,
        productsFound: row.products_found,
        startedAt: row.started_at?.toISOString() || null,
        completedAt: row.completed_at?.toISOString() || null,
      })),
      nextSchedule: scheduleRows[0] ? {
        scheduleId: scheduleRows[0].schedule_id,
        jobName: scheduleRows[0].job_name,
        enabled: scheduleRows[0].enabled,
        baseIntervalMinutes: scheduleRows[0].base_interval_minutes,
        jitterMinutes: scheduleRows[0].jitter_minutes,
        nextRunAt: scheduleRows[0].next_run_at?.toISOString() || null,
        lastRunAt: scheduleRows[0].last_run_at?.toISOString() || null,
        lastStatus: scheduleRows[0].last_status,
      } : null,
    });
  } catch (error: any) {
    console.error('[Markets] Error fetching crawl history:', error.message);
    res.status(500).json({ error: error.message });
  }
 });
 /**
 * GET /api/markets/stores/:id/products
 * Get products for a store with filtering and pagination
--- a/backend/src/routes/orchestrator-admin.ts
+++ b/backend/src/routes/orchestrator-admin.ts
@@ -78,14 +78,14 @@ router.get('/metrics', async (_req: Request, res: Response) => {
 /**
 * GET /api/admin/orchestrator/states
- * Returns array of states with at least one crawl-enabled dispensary
+ * Returns array of states with at least one known dispensary
 */
 router.get('/states', async (_req: Request, res: Response) => {
  try {
    const { rows } = await pool.query(`
      SELECT DISTINCT state, COUNT(*) as store_count
      FROM dispensaries
-      WHERE state IS NOT NULL AND crawl_enabled = true
+      WHERE state IS NOT NULL
      GROUP BY state
      ORDER BY state
    `);
--- a/backend/src/routes/payloads.ts
+++ b/backend/src/routes/payloads.ts
@@ -1,334 +0,0 @@
 /**
 * Payload Routes
 *
 * Per TASK_WORKFLOW_2024-12-10.md: API access to raw crawl payloads.
 *
 * Endpoints:
 * - GET /api/payloads - List payload metadata (paginated)
 * - GET /api/payloads/:id - Get payload metadata by ID
 * - GET /api/payloads/:id/data - Get full payload JSON
 * - GET /api/payloads/store/:dispensaryId - List payloads for a store
 * - GET /api/payloads/store/:dispensaryId/latest - Get latest payload for a store
 * - GET /api/payloads/store/:dispensaryId/diff - Diff two payloads
 */
 import { Router, Request, Response } from 'express';
 import { getPool } from '../db/pool';
 import {
  loadRawPayloadById,
  getLatestPayload,
  getRecentPayloads,
  listPayloadMetadata,
 } from '../utils/payload-storage';
 import { Pool } from 'pg';
 const router = Router();
 // Get pool instance for queries
 const getDbPool = (): Pool => getPool() as unknown as Pool;
 /**
 * GET /api/payloads
 * List payload metadata (paginated)
 */
 router.get('/', async (req: Request, res: Response) => {
  try {
    const pool = getDbPool();
    const limit = Math.min(parseInt(req.query.limit as string) || 50, 100);
    const offset = parseInt(req.query.offset as string) || 0;
    const dispensaryId = req.query.dispensary_id ? parseInt(req.query.dispensary_id as string) : undefined;
    const payloads = await listPayloadMetadata(pool, {
      dispensaryId,
      limit,
      offset,
    });
    res.json({
      success: true,
      payloads,
      pagination: { limit, offset },
    });
  } catch (error: any) {
    console.error('[Payloads] List error:', error.message);
    res.status(500).json({ success: false, error: error.message });
  }
 });
 /**
 * GET /api/payloads/:id
 * Get payload metadata by ID
 */
 router.get('/:id', async (req: Request, res: Response) => {
  try {
    const pool = getDbPool();
    const id = parseInt(req.params.id);
    const result = await pool.query(`
      SELECT
        p.id,
        p.dispensary_id,
        p.crawl_run_id,
        p.storage_path,
        p.product_count,
        p.size_bytes,
        p.size_bytes_raw,
        p.fetched_at,
        p.processed_at,
        p.checksum_sha256,
        d.name as dispensary_name
      FROM raw_crawl_payloads p
      LEFT JOIN dispensaries d ON d.id = p.dispensary_id
      WHERE p.id = $1
    `, [id]);
    if (result.rows.length === 0) {
      return res.status(404).json({ success: false, error: 'Payload not found' });
    }
    res.json({
      success: true,
      payload: result.rows[0],
    });
  } catch (error: any) {
    console.error('[Payloads] Get error:', error.message);
    res.status(500).json({ success: false, error: error.message });
  }
 });
 /**
 * GET /api/payloads/:id/data
 * Get full payload JSON (decompressed from disk)
 */
 router.get('/:id/data', async (req: Request, res: Response) => {
  try {
    const pool = getDbPool();
    const id = parseInt(req.params.id);
    const result = await loadRawPayloadById(pool, id);
    if (!result) {
      return res.status(404).json({ success: false, error: 'Payload not found' });
    }
    res.json({
      success: true,
      metadata: result.metadata,
      data: result.payload,
    });
  } catch (error: any) {
    console.error('[Payloads] Get data error:', error.message);
    res.status(500).json({ success: false, error: error.message });
  }
 });
 /**
 * GET /api/payloads/store/:dispensaryId
 * List payloads for a specific store
 */
 router.get('/store/:dispensaryId', async (req: Request, res: Response) => {
  try {
    const pool = getDbPool();
    const dispensaryId = parseInt(req.params.dispensaryId);
    const limit = Math.min(parseInt(req.query.limit as string) || 20, 100);
    const offset = parseInt(req.query.offset as string) || 0;
    const payloads = await listPayloadMetadata(pool, {
      dispensaryId,
      limit,
      offset,
    });
    res.json({
      success: true,
      dispensaryId,
      payloads,
      pagination: { limit, offset },
    });
  } catch (error: any) {
    console.error('[Payloads] Store list error:', error.message);
    res.status(500).json({ success: false, error: error.message });
  }
 });
 /**
 * GET /api/payloads/store/:dispensaryId/latest
 * Get the latest payload for a store (with full data)
 */
 router.get('/store/:dispensaryId/latest', async (req: Request, res: Response) => {
  try {
    const pool = getDbPool();
    const dispensaryId = parseInt(req.params.dispensaryId);
    const result = await getLatestPayload(pool, dispensaryId);
    if (!result) {
      return res.status(404).json({
        success: false,
        error: `No payloads found for dispensary ${dispensaryId}`,
      });
    }
    res.json({
      success: true,
      metadata: result.metadata,
      data: result.payload,
    });
  } catch (error: any) {
    console.error('[Payloads] Latest error:', error.message);
    res.status(500).json({ success: false, error: error.message });
  }
 });
 /**
 * GET /api/payloads/store/:dispensaryId/diff
 * Compare two payloads for a store
 *
 * Query params:
 * - from: payload ID (older)
 * - to: payload ID (newer) - optional, defaults to latest
 */
 router.get('/store/:dispensaryId/diff', async (req: Request, res: Response) => {
  try {
    const pool = getDbPool();
    const dispensaryId = parseInt(req.params.dispensaryId);
    const fromId = req.query.from ? parseInt(req.query.from as string) : undefined;
    const toId = req.query.to ? parseInt(req.query.to as string) : undefined;
    let fromPayload: any;
    let toPayload: any;
    if (fromId && toId) {
      // Load specific payloads
      const [from, to] = await Promise.all([
        loadRawPayloadById(pool, fromId),
        loadRawPayloadById(pool, toId),
      ]);
      fromPayload = from;
      toPayload = to;
    } else {
      // Load two most recent
      const recent = await getRecentPayloads(pool, dispensaryId, 2);
      if (recent.length < 2) {
        return res.status(400).json({
          success: false,
          error: 'Need at least 2 payloads to diff. Only found ' + recent.length,
        });
      }
      toPayload = recent[0];   // Most recent
      fromPayload = recent[1]; // Previous
    }
    if (!fromPayload || !toPayload) {
      return res.status(404).json({ success: false, error: 'One or both payloads not found' });
    }
    // Build product maps by ID
    const fromProducts = new Map<string, any>();
    const toProducts = new Map<string, any>();
    for (const p of fromPayload.payload.products || []) {
      const id = p._id || p.id;
      if (id) fromProducts.set(id, p);
    }
    for (const p of toPayload.payload.products || []) {
      const id = p._id || p.id;
      if (id) toProducts.set(id, p);
    }
    // Find differences
    const added: any[] = [];
    const removed: any[] = [];
    const priceChanges: any[] = [];
    const stockChanges: any[] = [];
    // Products in "to" but not in "from" = added
    for (const [id, product] of toProducts) {
      if (!fromProducts.has(id)) {
        added.push({
          id,
          name: product.name,
          brand: product.brand?.name,
          price: product.Prices?.[0]?.price,
        });
      }
    }
    // Products in "from" but not in "to" = removed
    for (const [id, product] of fromProducts) {
      if (!toProducts.has(id)) {
        removed.push({
          id,
          name: product.name,
          brand: product.brand?.name,
          price: product.Prices?.[0]?.price,
        });
      }
    }
    // Products in both - check for changes
    for (const [id, toProduct] of toProducts) {
      const fromProduct = fromProducts.get(id);
      if (!fromProduct) continue;
      const fromPrice = fromProduct.Prices?.[0]?.price;
      const toPrice = toProduct.Prices?.[0]?.price;
      if (fromPrice !== toPrice) {
        priceChanges.push({
          id,
          name: toProduct.name,
          brand: toProduct.brand?.name,
          oldPrice: fromPrice,
          newPrice: toPrice,
          change: toPrice && fromPrice ? toPrice - fromPrice : null,
        });
      }
      const fromStock = fromProduct.Status || fromProduct.status;
      const toStock = toProduct.Status || toProduct.status;
      if (fromStock !== toStock) {
        stockChanges.push({
          id,
          name: toProduct.name,
          brand: toProduct.brand?.name,
          oldStatus: fromStock,
          newStatus: toStock,
        });
      }
    }
    res.json({
      success: true,
      from: {
        id: fromPayload.metadata.id,
        fetchedAt: fromPayload.metadata.fetchedAt,
        productCount: fromPayload.metadata.productCount,
      },
      to: {
        id: toPayload.metadata.id,
        fetchedAt: toPayload.metadata.fetchedAt,
        productCount: toPayload.metadata.productCount,
      },
      diff: {
        added: added.length,
        removed: removed.length,
        priceChanges: priceChanges.length,
        stockChanges: stockChanges.length,
      },
      details: {
        added,
        removed,
        priceChanges,
        stockChanges,
      },
    });
  } catch (error: any) {
    console.error('[Payloads] Diff error:', error.message);
    res.status(500).json({ success: false, error: error.message });
  }
 });
 export default router;
--- a/backend/src/routes/proxies.ts
+++ b/backend/src/routes/proxies.ts
@@ -278,7 +278,7 @@ router.post('/update-locations', requireRole('superadmin', 'admin'), async (req,
    // Run in background
    updateAllProxyLocations().catch(err => {
-      console.error('Location update failed:', err);
+      console.error('❌ Location update failed:', err);
    });
    res.json({ message: 'Location update job started' });
--- a/backend/src/routes/tasks.ts
+++ b/backend/src/routes/tasks.ts
@@ -3,24 +3,6 @@
 *
 * Endpoints for managing worker tasks, viewing capacity metrics,
 * and generating batch tasks.
 *
 * SCHEDULE MANAGEMENT (added 2025-12-12):
 * This file now contains the canonical schedule management endpoints.
 * The job_schedules table has been deprecated and all schedule management
 * is now consolidated into task_schedules:
 *
 * Schedule endpoints:
 *   GET    /api/tasks/schedules           - List all schedules
 *   POST   /api/tasks/schedules           - Create new schedule
 *   GET    /api/tasks/schedules/:id       - Get schedule by ID
 *   PUT    /api/tasks/schedules/:id       - Update schedule
 *   DELETE /api/tasks/schedules/:id       - Delete schedule
 *   DELETE /api/tasks/schedules           - Bulk delete schedules
 *   POST   /api/tasks/schedules/:id/run-now - Trigger schedule immediately
 *   POST   /api/tasks/schedules/:id/toggle  - Toggle schedule enabled/disabled
 *
 * Note: Schedule routes are defined BEFORE /:id to avoid route conflicts
 * (Express matches routes in order, and "schedules" would match /:id otherwise)
 */
 import { Router, Request, Response } from 'express';
@@ -31,12 +13,6 @@ import {
  TaskFilter,
 } from '../tasks/task-service';
 import { pool } from '../db/pool';
 import {
  isTaskPoolPaused,
  pauseTaskPool,
  resumeTaskPool,
  getTaskPoolStatus,
 } from '../tasks/task-pool-state';
 const router = Router();
@@ -149,464 +125,6 @@ router.get('/capacity/:role', async (req: Request, res: Response) => {
  }
 });
 // ============================================================
 // SCHEDULE MANAGEMENT ROUTES
 // (Must be before /:id to avoid route conflicts)
 // ============================================================
 /**
 * GET /api/tasks/schedules
 * List all task schedules
 *
 * Returns schedules with is_immutable flag - immutable schedules can only
 * have their interval_hours, priority, and enabled fields updated (not deleted).
 */
 router.get('/schedules', async (req: Request, res: Response) => {
  try {
    const enabledOnly = req.query.enabled === 'true';
    let query = `
      SELECT id, name, role, description, enabled, interval_hours,
             priority, state_code, platform, method,
             COALESCE(is_immutable, false) as is_immutable,
             last_run_at, next_run_at,
             last_task_count, last_error, created_at, updated_at
      FROM task_schedules
    `;
    if (enabledOnly) {
      query += ` WHERE enabled = true`;
    }
    query += ` ORDER BY
      CASE role
        WHEN 'store_discovery' THEN 1
        WHEN 'product_discovery' THEN 2
        WHEN 'analytics_refresh' THEN 3
        ELSE 4
      END,
      state_code NULLS FIRST,
      name`;
    const result = await pool.query(query);
    res.json({ schedules: result.rows });
  } catch (error: unknown) {
    console.error('Error listing schedules:', error);
    res.status(500).json({ error: 'Failed to list schedules' });
  }
 });
 /**
 * DELETE /api/tasks/schedules
 * Bulk delete schedules
 *
 * Immutable schedules are automatically skipped (not deleted).
 *
 * Body:
 * - ids: number[] (required) - array of schedule IDs to delete
 * - all: boolean (optional) - if true, delete all non-immutable schedules (ids ignored)
 */
 router.delete('/schedules', async (req: Request, res: Response) => {
  try {
    const { ids, all } = req.body;
    let result;
    let skippedImmutable: { id: number; name: string }[] = [];
    if (all === true) {
      // First, find immutable schedules that will be skipped
      const immutableResult = await pool.query(`
        SELECT id, name FROM task_schedules WHERE is_immutable = true
      `);
      skippedImmutable = immutableResult.rows;
      // Delete all non-immutable schedules
      result = await pool.query(`
        DELETE FROM task_schedules
        WHERE COALESCE(is_immutable, false) = false
        RETURNING id, name
      `);
    } else if (Array.isArray(ids) && ids.length > 0) {
      // First, find which of the requested IDs are immutable
      const immutableResult = await pool.query(`
        SELECT id, name FROM task_schedules
        WHERE id = ANY($1) AND is_immutable = true
      `, [ids]);
      skippedImmutable = immutableResult.rows;
      // Delete only non-immutable schedules from the requested IDs
      result = await pool.query(`
        DELETE FROM task_schedules
        WHERE id = ANY($1) AND COALESCE(is_immutable, false) = false
        RETURNING id, name
      `, [ids]);
    } else {
      return res.status(400).json({
        error: 'Either provide ids array or set all=true',
      });
    }
    res.json({
      success: true,
      deleted_count: result.rowCount,
      deleted: result.rows,
      skipped_immutable_count: skippedImmutable.length,
      skipped_immutable: skippedImmutable,
      message: skippedImmutable.length > 0
        ? `Deleted ${result.rowCount} schedule(s), skipped ${skippedImmutable.length} immutable schedule(s)`
        : `Deleted ${result.rowCount} schedule(s)`,
    });
  } catch (error: unknown) {
    console.error('Error bulk deleting schedules:', error);
    res.status(500).json({ error: 'Failed to delete schedules' });
  }
 });
 /**
 * POST /api/tasks/schedules
 * Create a new schedule
 *
 * Body:
 * - name: string (required, unique)
 * - role: TaskRole (required)
 * - description: string (optional)
 * - enabled: boolean (default true)
 * - interval_hours: number (required)
 * - priority: number (default 0)
 * - state_code: string (optional)
 * - platform: string (optional)
 */
 router.post('/schedules', async (req: Request, res: Response) => {
  try {
    const {
      name,
      role,
      description,
      enabled = true,
      interval_hours,
      priority = 0,
      state_code,
      platform,
    } = req.body;
    if (!name || !role || !interval_hours) {
      return res.status(400).json({
        error: 'name, role, and interval_hours are required',
      });
    }
    // Calculate next_run_at based on interval
    const nextRunAt = new Date(Date.now() + interval_hours * 60 * 60 * 1000);
    const result = await pool.query(`
      INSERT INTO task_schedules
        (name, role, description, enabled, interval_hours, priority, state_code, platform, next_run_at)
      VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
      RETURNING id, name, role, description, enabled, interval_hours,
                priority, state_code, platform, last_run_at, next_run_at,
                last_task_count, last_error, created_at, updated_at
    `, [name, role, description, enabled, interval_hours, priority, state_code, platform, nextRunAt]);
    res.status(201).json(result.rows[0]);
  } catch (error: any) {
    if (error.code === '23505') {
      // Unique constraint violation
      return res.status(409).json({ error: 'A schedule with this name already exists' });
    }
    console.error('Error creating schedule:', error);
    res.status(500).json({ error: 'Failed to create schedule' });
  }
 });
 /**
 * GET /api/tasks/schedules/:id
 * Get a specific schedule by ID
 */
 router.get('/schedules/:id', async (req: Request, res: Response) => {
  try {
    const scheduleId = parseInt(req.params.id, 10);
    const result = await pool.query(`
      SELECT id, name, role, description, enabled, interval_hours,
             priority, state_code, platform, last_run_at, next_run_at,
             last_task_count, last_error, created_at, updated_at
      FROM task_schedules
      WHERE id = $1
    `, [scheduleId]);
    if (result.rows.length === 0) {
      return res.status(404).json({ error: 'Schedule not found' });
    }
    res.json(result.rows[0]);
  } catch (error: unknown) {
    console.error('Error getting schedule:', error);
    res.status(500).json({ error: 'Failed to get schedule' });
  }
 });
 /**
 * PUT /api/tasks/schedules/:id
 * Update an existing schedule
 *
 * For IMMUTABLE schedules, only these fields can be updated:
 * - enabled (turn on/off)
 * - interval_hours (change frequency)
 * - priority (change priority)
 *
 * For regular schedules, all fields can be updated.
 */
 router.put('/schedules/:id', async (req: Request, res: Response) => {
  try {
    const scheduleId = parseInt(req.params.id, 10);
    const {
      name,
      role,
      description,
      enabled,
      interval_hours,
      priority,
      state_code,
      platform,
    } = req.body;
    // First check if schedule exists and if it's immutable
    const checkResult = await pool.query(`
      SELECT id, name, COALESCE(is_immutable, false) as is_immutable
      FROM task_schedules WHERE id = $1
    `, [scheduleId]);
    if (checkResult.rows.length === 0) {
      return res.status(404).json({ error: 'Schedule not found' });
    }
    const schedule = checkResult.rows[0];
    const isImmutable = schedule.is_immutable;
    // For immutable schedules, reject attempts to change protected fields
    if (isImmutable) {
      const protectedFields: string[] = [];
      if (name !== undefined) protectedFields.push('name');
      if (role !== undefined) protectedFields.push('role');
      if (description !== undefined) protectedFields.push('description');
      if (state_code !== undefined) protectedFields.push('state_code');
      if (platform !== undefined) protectedFields.push('platform');
      if (protectedFields.length > 0) {
        return res.status(403).json({
          error: 'Cannot modify protected fields on immutable schedule',
          message: `Schedule "${schedule.name}" is immutable. Only enabled, interval_hours, and priority can be changed.`,
          protected_fields: protectedFields,
          allowed_fields: ['enabled', 'interval_hours', 'priority'],
        });
      }
    }
    // Build dynamic update query
    const updates: string[] = [];
    const values: any[] = [];
    let paramIndex = 1;
    // These fields can only be updated on non-immutable schedules
    if (!isImmutable) {
      if (name !== undefined) {
        updates.push(`name = $${paramIndex++}`);
        values.push(name);
      }
      if (role !== undefined) {
        updates.push(`role = $${paramIndex++}`);
        values.push(role);
      }
      if (description !== undefined) {
        updates.push(`description = $${paramIndex++}`);
        values.push(description);
      }
      if (state_code !== undefined) {
        updates.push(`state_code = $${paramIndex++}`);
        values.push(state_code || null);
      }
      if (platform !== undefined) {
        updates.push(`platform = $${paramIndex++}`);
        values.push(platform || null);
      }
    }
    // These fields can be updated on ALL schedules (including immutable)
    if (enabled !== undefined) {
      updates.push(`enabled = $${paramIndex++}`);
      values.push(enabled);
    }
    if (interval_hours !== undefined) {
      updates.push(`interval_hours = $${paramIndex++}`);
      values.push(interval_hours);
      // Recalculate next_run_at if interval changed
      const nextRunAt = new Date(Date.now() + interval_hours * 60 * 60 * 1000);
      updates.push(`next_run_at = $${paramIndex++}`);
      values.push(nextRunAt);
    }
    if (priority !== undefined) {
      updates.push(`priority = $${paramIndex++}`);
      values.push(priority);
    }
    if (updates.length === 0) {
      return res.status(400).json({ error: 'No fields to update' });
    }
    updates.push('updated_at = NOW()');
    values.push(scheduleId);
    const result = await pool.query(`
      UPDATE task_schedules
      SET ${updates.join(', ')}
      WHERE id = $${paramIndex}
      RETURNING id, name, role, description, enabled, interval_hours,
                priority, state_code, platform, method,
                COALESCE(is_immutable, false) as is_immutable,
                last_run_at, next_run_at,
                last_task_count, last_error, created_at, updated_at
    `, values);
    res.json(result.rows[0]);
  } catch (error: any) {
    if (error.code === '23505') {
      return res.status(409).json({ error: 'A schedule with this name already exists' });
    }
    console.error('Error updating schedule:', error);
    res.status(500).json({ error: 'Failed to update schedule' });
  }
 });
 /**
 * DELETE /api/tasks/schedules/:id
 * Delete a schedule
 *
 * Immutable schedules cannot be deleted - they can only be disabled.
 */
 router.delete('/schedules/:id', async (req: Request, res: Response) => {
  try {
    const scheduleId = parseInt(req.params.id, 10);
    // First check if schedule exists and is immutable
    const checkResult = await pool.query(`
      SELECT id, name, COALESCE(is_immutable, false) as is_immutable
      FROM task_schedules WHERE id = $1
    `, [scheduleId]);
    if (checkResult.rows.length === 0) {
      return res.status(404).json({ error: 'Schedule not found' });
    }
    const schedule = checkResult.rows[0];
    // Prevent deletion of immutable schedules
    if (schedule.is_immutable) {
      return res.status(403).json({
        error: 'Cannot delete immutable schedule',
        message: `Schedule "${schedule.name}" is immutable and cannot be deleted. You can disable it instead.`,
        schedule_id: scheduleId,
        is_immutable: true,
      });
    }
    // Delete the schedule
    await pool.query(`DELETE FROM task_schedules WHERE id = $1`, [scheduleId]);
    res.json({
      success: true,
      message: `Schedule "${schedule.name}" deleted`,
    });
  } catch (error: unknown) {
    console.error('Error deleting schedule:', error);
    res.status(500).json({ error: 'Failed to delete schedule' });
  }
 });
 /**
 * POST /api/tasks/schedules/:id/run-now
 * Manually trigger a scheduled task to run immediately
 */
 router.post('/schedules/:id/run-now', async (req: Request, res: Response) => {
  try {
    const scheduleId = parseInt(req.params.id, 10);
    // Get the schedule
    const scheduleResult = await pool.query(`
      SELECT id, name, role, state_code, platform, priority
      FROM task_schedules WHERE id = $1
    `, [scheduleId]);
    if (scheduleResult.rows.length === 0) {
      return res.status(404).json({ error: 'Schedule not found' });
    }
    const schedule = scheduleResult.rows[0];
    // Create a task based on the schedule
    const task = await taskService.createTask({
      role: schedule.role,
      platform: schedule.platform,
      priority: schedule.priority + 10, // Boost priority for manual runs
    });
    // Update last_run_at on the schedule
    await pool.query(`
      UPDATE task_schedules
      SET last_run_at = NOW(),
          next_run_at = NOW() + (interval_hours || ' hours')::interval,
          updated_at = NOW()
      WHERE id = $1
    `, [scheduleId]);
    res.json({
      success: true,
      message: `Schedule "${schedule.name}" triggered`,
      task,
    });
  } catch (error: unknown) {
    console.error('Error running schedule:', error);
    res.status(500).json({ error: 'Failed to run schedule' });
  }
 });
 /**
 * POST /api/tasks/schedules/:id/toggle
 * Toggle a schedule's enabled status
 */
 router.post('/schedules/:id/toggle', async (req: Request, res: Response) => {
  try {
    const scheduleId = parseInt(req.params.id, 10);
    const result = await pool.query(`
      UPDATE task_schedules
      SET enabled = NOT enabled,
          updated_at = NOW()
      WHERE id = $1
      RETURNING id, name, enabled
    `, [scheduleId]);
    if (result.rows.length === 0) {
      return res.status(404).json({ error: 'Schedule not found' });
    }
    res.json({
      success: true,
      schedule: result.rows[0],
      message: result.rows[0].enabled
        ? `Schedule "${result.rows[0].name}" enabled`
        : `Schedule "${result.rows[0].name}" disabled`,
    });
  } catch (error: unknown) {
    console.error('Error toggling schedule:', error);
    res.status(500).json({ error: 'Failed to toggle schedule' });
  }
 });
 // ============================================================
 // TASK-SPECIFIC ROUTES (with :id parameter)
 // ============================================================
 /**
 * GET /api/tasks/:id
 * Get a specific task by ID
@@ -1074,159 +592,4 @@ router.post('/migration/full-migrate', async (req: Request, res: Response) => {
  }
 });
 // ============================================================
 // STAGGERED BATCH TASK CREATION
 // ============================================================
 /**
 * POST /api/tasks/batch/staggered
 * Create multiple tasks with staggered start times
 *
 * This endpoint prevents resource contention when creating many tasks by
 * staggering their scheduled_for timestamps. Each task becomes eligible
 * for claiming only after its scheduled time.
 *
 * WORKFLOW:
 * 1. Tasks created with scheduled_for = NOW() + (index * stagger_seconds)
 * 2. Worker claims task only when scheduled_for <= NOW()
 * 3. Worker runs preflight on EVERY task claim
 * 4. If preflight passes, worker executes task
 * 5. If preflight fails, task released back to pending for another worker
 *
 * Body:
 * - dispensary_ids: number[] (required) - Array of dispensary IDs
 * - role: TaskRole (required) - 'product_refresh' | 'product_discovery'
 * - stagger_seconds: number (default: 15) - Seconds between each task start
 * - platform: string (default: 'dutchie')
 * - method: 'curl' | 'http' | null (default: null)
 */
 router.post('/batch/staggered', async (req: Request, res: Response) => {
  try {
    const {
      dispensary_ids,
      role,
      stagger_seconds = 15,
      platform = 'dutchie',
      method = null,
    } = req.body;
    if (!dispensary_ids || !Array.isArray(dispensary_ids) || dispensary_ids.length === 0) {
      return res.status(400).json({ error: 'dispensary_ids array is required' });
    }
    if (!role) {
      return res.status(400).json({ error: 'role is required' });
    }
    const result = await taskService.createStaggeredTasks(
      dispensary_ids,
      role as TaskRole,
      stagger_seconds,
      platform,
      method
    );
    const totalDuration = (dispensary_ids.length - 1) * stagger_seconds;
    const estimatedEndTime = new Date(Date.now() + totalDuration * 1000);
    res.status(201).json({
      success: true,
      created: result.created,
      task_ids: result.taskIds,
      stagger_seconds,
      total_duration_seconds: totalDuration,
      estimated_completion: estimatedEndTime.toISOString(),
      message: `Created ${result.created} staggered ${role} tasks (${stagger_seconds}s apart, ~${Math.ceil(totalDuration / 60)} min total)`,
    });
  } catch (error: unknown) {
    console.error('Error creating staggered tasks:', error);
    res.status(500).json({ error: 'Failed to create staggered tasks' });
  }
 });
 /**
 * POST /api/tasks/batch/az-stores
 * Convenience endpoint to create staggered tasks for Arizona stores
 *
 * Body:
 * - total_tasks: number (default: 24) - Total tasks to create
 * - stagger_seconds: number (default: 15) - Seconds between each task
 * - split_roles: boolean (default: true) - Split between product_refresh and product_discovery
 */
 router.post('/batch/az-stores', async (req: Request, res: Response) => {
  try {
    const {
      total_tasks = 24,
      stagger_seconds = 15,
      split_roles = true,
    } = req.body;
    const result = await taskService.createAZStoreTasks(
      total_tasks,
      stagger_seconds,
      split_roles
    );
    const totalDuration = (result.total - 1) * stagger_seconds;
    const estimatedEndTime = new Date(Date.now() + totalDuration * 1000);
    res.status(201).json({
      success: true,
      total: result.total,
      product_refresh: result.product_refresh,
      product_discovery: result.product_discovery,
      task_ids: result.taskIds,
      stagger_seconds,
      total_duration_seconds: totalDuration,
      estimated_completion: estimatedEndTime.toISOString(),
      message: `Created ${result.total} staggered tasks for AZ stores (${result.product_refresh} refresh, ${result.product_discovery} discovery)`,
    });
  } catch (error: unknown) {
    console.error('Error creating AZ store tasks:', error);
    res.status(500).json({ error: 'Failed to create AZ store tasks' });
  }
 });
 // ============================================================
 // TASK POOL MANAGEMENT
 // ============================================================
 /**
 * GET /api/tasks/pool/status
 * Check if task pool is paused
 */
 router.get('/pool/status', async (_req: Request, res: Response) => {
  const status = getTaskPoolStatus();
  res.json({
    success: true,
    ...status,
  });
 });
 /**
 * POST /api/tasks/pool/pause
 * Pause the task pool - workers won't pick up new tasks
 */
 router.post('/pool/pause', async (_req: Request, res: Response) => {
  pauseTaskPool();
  res.json({
    success: true,
    paused: true,
    message: 'Task pool paused - workers will not pick up new tasks',
  });
 });
 /**
 * POST /api/tasks/pool/resume
 * Resume the task pool - workers will pick up tasks again
 */
 router.post('/pool/resume', async (_req: Request, res: Response) => {
  resumeTaskPool();
  res.json({
    success: true,
    paused: false,
    message: 'Task pool resumed - workers will pick up new tasks',
  });
 });
 export default router;
--- a/backend/src/routes/worker-registry.ts
+++ b/backend/src/routes/worker-registry.ts
@@ -23,8 +23,6 @@
 import { Router, Request, Response } from 'express';
 import { pool } from '../db/pool';
 import os from 'os';
 import { runPuppeteerPreflightWithRetry } from '../services/puppeteer-preflight';
 import { CrawlRotator } from '../services/crawl-rotator';
 const router = Router();
@@ -72,20 +70,21 @@ router.post('/register', async (req: Request, res: Response) => {
    );
    if (existing.rows.length > 0) {
-      // Re-activate existing worker - keep existing pod_name (fantasy name), don't overwrite with K8s name
+      // Re-activate existing worker
      const { rows } = await pool.query(`
        UPDATE worker_registry
        SET status = 'active',
            role = $1,
-            hostname = $2,
+            pod_name = $2,
-            ip_address = $3,
+            hostname = $3,
            ip_address = $4,
            last_heartbeat_at = NOW(),
            started_at = NOW(),
-            metadata = $4,
+            metadata = $5,
            updated_at = NOW()
-        WHERE worker_id = $5
+        WHERE worker_id = $6
-        RETURNING id, worker_id, friendly_name, pod_name, role
+        RETURNING id, worker_id, friendly_name, role
-      `, [role, finalHostname, clientIp, metadata, finalWorkerId]);
+      `, [role, pod_name, finalHostname, clientIp, metadata, finalWorkerId]);
      const worker = rows[0];
      const roleMsg = role ? `for ${role}` : 'as role-agnostic';
@@ -106,13 +105,13 @@ router.post('/register', async (req: Request, res: Response) => {
    const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]);
    const friendlyName = nameResult.rows[0].name;
-    // Register the worker - use friendlyName as pod_name (not K8s name)
+    // Register the worker
    const { rows } = await pool.query(`
      INSERT INTO worker_registry (
        worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata
      ) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7)
-      RETURNING id, worker_id, friendly_name, pod_name, role
+      RETURNING id, worker_id, friendly_name, role
-    `, [finalWorkerId, friendlyName, role, friendlyName, finalHostname, clientIp, metadata]);
+    `, [finalWorkerId, friendlyName, role, pod_name, finalHostname, clientIp, metadata]);
    const worker = rows[0];
    const roleMsg = role ? `for ${role}` : 'as role-agnostic';
@@ -139,36 +138,17 @@ router.post('/register', async (req: Request, res: Response) => {
 *
 * Body:
 *   - worker_id: string (required)
- *   - current_task_id: number (optional) - task currently being processed (primary task)
+ *   - current_task_id: number (optional) - task currently being processed
 *   - current_task_ids: number[] (optional) - all tasks currently being processed (concurrent)
 *   - active_task_count: number (optional) - number of tasks currently running
 *   - max_concurrent_tasks: number (optional) - max concurrent tasks this worker can handle
 *   - status: string (optional) - 'active', 'idle'
 *   - resources: object (optional) - memory_mb, cpu_user_ms, cpu_system_ms, etc.
 */
 router.post('/heartbeat', async (req: Request, res: Response) => {
  try {
-    const {
+    const { worker_id, current_task_id, status = 'active', resources } = req.body;
      worker_id,
      current_task_id,
      current_task_ids,
      active_task_count,
      max_concurrent_tasks,
      status = 'active',
      resources
    } = req.body;
    if (!worker_id) {
      return res.status(400).json({ success: false, error: 'worker_id is required' });
    }
    // Build metadata object with all the new fields
    const metadata: Record<string, unknown> = {};
    if (resources) Object.assign(metadata, resources);
    if (current_task_ids) metadata.current_task_ids = current_task_ids;
    if (active_task_count !== undefined) metadata.active_task_count = active_task_count;
    if (max_concurrent_tasks !== undefined) metadata.max_concurrent_tasks = max_concurrent_tasks;
    // Store resources in metadata jsonb column
    const { rows } = await pool.query(`
      UPDATE worker_registry
@@ -179,7 +159,7 @@ router.post('/heartbeat', async (req: Request, res: Response) => {
          updated_at = NOW()
      WHERE worker_id = $3
      RETURNING id, friendly_name, status
-    `, [current_task_id || null, status, worker_id, Object.keys(metadata).length > 0 ? JSON.stringify(metadata) : null]);
+    `, [current_task_id || null, status, worker_id, resources ? JSON.stringify(resources) : null]);
    if (rows.length === 0) {
      return res.status(404).json({ success: false, error: 'Worker not found - please register first' });
@@ -252,9 +232,12 @@ router.post('/deregister', async (req: Request, res: Response) => {
    // Release the name back to the pool
    await pool.query('SELECT release_worker_name($1)', [worker_id]);
-    // Delete the worker entry (clean shutdown)
+    // Mark as terminated
    const { rows } = await pool.query(`
-      DELETE FROM worker_registry
+      UPDATE worker_registry
      SET status = 'terminated',
          current_task_id = NULL,
          updated_at = NOW()
      WHERE worker_id = $1
      RETURNING id, friendly_name
    `, [worker_id]);
@@ -347,27 +330,12 @@ router.get('/workers', async (req: Request, res: Response) => {
        tasks_completed,
        tasks_failed,
        current_task_id,
        -- Concurrent task fields from metadata
        (metadata->>'current_task_ids')::jsonb as current_task_ids,
        (metadata->>'active_task_count')::int as active_task_count,
        (metadata->>'max_concurrent_tasks')::int as max_concurrent_tasks,
        -- Decommission fields
        COALESCE(decommission_requested, false) as decommission_requested,
        decommission_reason,
        -- Preflight fields (dual-transport verification)
        curl_ip,
        http_ip,
        preflight_status,
        preflight_at,
        fingerprint_data,
        -- Full metadata for resources
        metadata,
        EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat,
        CASE
          WHEN status = 'offline' OR status = 'terminated' THEN status
          WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
          WHEN current_task_id IS NOT NULL THEN 'busy'
          WHEN (metadata->>'active_task_count')::int > 0 THEN 'busy'
          ELSE 'ready'
        END as health_status,
        created_at
@@ -704,217 +672,4 @@ router.get('/capacity', async (_req: Request, res: Response) => {
  }
 });
 // ============================================================
 // WORKER LIFECYCLE MANAGEMENT
 // ============================================================
 /**
 * POST /api/worker-registry/workers/:workerId/decommission
 * Request graceful decommission of a worker (will stop after current task)
 */
 router.post('/workers/:workerId/decommission', async (req: Request, res: Response) => {
  try {
    const { workerId } = req.params;
    const { reason, issued_by } = req.body;
    // Update worker_registry to flag for decommission
    const result = await pool.query(
      `UPDATE worker_registry
       SET decommission_requested = true,
           decommission_reason = $2,
           decommission_requested_at = NOW()
       WHERE worker_id = $1
       RETURNING friendly_name, status, current_task_id`,
      [workerId, reason || 'Manual decommission from admin']
    );
    if (result.rows.length === 0) {
      return res.status(404).json({ success: false, error: 'Worker not found' });
    }
    const worker = result.rows[0];
    // Also log to worker_commands for audit trail
    await pool.query(
      `INSERT INTO worker_commands (worker_id, command, reason, issued_by)
       VALUES ($1, 'decommission', $2, $3)
       ON CONFLICT DO NOTHING`,
      [workerId, reason || 'Manual decommission', issued_by || 'admin']
    ).catch(() => {
      // Table might not exist yet - ignore
    });
    res.json({
      success: true,
      message: worker.current_task_id
        ? `Worker ${worker.friendly_name} will stop after completing task #${worker.current_task_id}`
        : `Worker ${worker.friendly_name} will stop on next poll`,
      worker: {
        friendly_name: worker.friendly_name,
        status: worker.status,
        current_task_id: worker.current_task_id,
        decommission_requested: true
      }
    });
  } catch (error: any) {
    res.status(500).json({ success: false, error: error.message });
  }
 });
 /**
 * POST /api/worker-registry/workers/:workerId/cancel-decommission
 * Cancel a pending decommission request
 */
 router.post('/workers/:workerId/cancel-decommission', async (req: Request, res: Response) => {
  try {
    const { workerId } = req.params;
    const result = await pool.query(
      `UPDATE worker_registry
       SET decommission_requested = false,
           decommission_reason = NULL,
           decommission_requested_at = NULL
       WHERE worker_id = $1
       RETURNING friendly_name`,
      [workerId]
    );
    if (result.rows.length === 0) {
      return res.status(404).json({ success: false, error: 'Worker not found' });
    }
    res.json({
      success: true,
      message: `Decommission cancelled for ${result.rows[0].friendly_name}`
    });
  } catch (error: any) {
    res.status(500).json({ success: false, error: error.message });
  }
 });
 /**
 * POST /api/worker-registry/spawn
 * Spawn a new worker in the current pod (only works in multi-worker-per-pod mode)
 * For now, this is a placeholder - actual spawning requires the pod supervisor
 */
 router.post('/spawn', async (req: Request, res: Response) => {
  try {
    const { pod_name, role } = req.body;
    // For now, we can't actually spawn workers from the API
    // This would require a supervisor process in each pod that listens for spawn commands
    // Instead, return instructions for how to scale
    res.json({
      success: false,
      error: 'Direct worker spawning not yet implemented',
      instructions: 'To add workers, scale the K8s deployment: kubectl scale deployment/scraper-worker --replicas=N'
    });
  } catch (error: any) {
    res.status(500).json({ success: false, error: error.message });
  }
 });
 /**
 * GET /api/worker-registry/pods
 * Get workers grouped by pod
 */
 router.get('/pods', async (_req: Request, res: Response) => {
  try {
    const { rows } = await pool.query(`
      SELECT
        COALESCE(pod_name, 'Unknown') as pod_name,
        COUNT(*) as worker_count,
        COUNT(*) FILTER (WHERE current_task_id IS NOT NULL) as busy_count,
        COUNT(*) FILTER (WHERE current_task_id IS NULL) as idle_count,
        SUM(tasks_completed) as total_completed,
        SUM(tasks_failed) as total_failed,
        SUM((metadata->>'memory_rss_mb')::int) as total_memory_mb,
        array_agg(json_build_object(
          'worker_id', worker_id,
          'friendly_name', friendly_name,
          'status', status,
          'current_task_id', current_task_id,
          'tasks_completed', tasks_completed,
          'tasks_failed', tasks_failed,
          'decommission_requested', COALESCE(decommission_requested, false),
          'last_heartbeat_at', last_heartbeat_at
        )) as workers
      FROM worker_registry
      WHERE status NOT IN ('offline', 'terminated')
      GROUP BY pod_name
      ORDER BY pod_name
    `);
    res.json({
      success: true,
      pods: rows.map(row => ({
        pod_name: row.pod_name,
        worker_count: parseInt(row.worker_count),
        busy_count: parseInt(row.busy_count),
        idle_count: parseInt(row.idle_count),
        total_completed: parseInt(row.total_completed) || 0,
        total_failed: parseInt(row.total_failed) || 0,
        total_memory_mb: parseInt(row.total_memory_mb) || 0,
        workers: row.workers
      }))
    });
  } catch (error: any) {
    res.status(500).json({ success: false, error: error.message });
  }
 });
 // ============================================================
 // PREFLIGHT SMOKE TEST
 // ============================================================
 /**
 * POST /api/worker-registry/preflight-test
 * Run an HTTP (Puppeteer) preflight test and return results
 *
 * This is a smoke test endpoint to verify the preflight system works.
 * Returns IP, fingerprint data, bot detection results, and products fetched.
 */
 router.post('/preflight-test', async (_req: Request, res: Response) => {
  try {
    console.log('[PreflightTest] Starting HTTP preflight smoke test...');
    // Create a temporary CrawlRotator for the test
    const crawlRotator = new CrawlRotator();
    // Run the Puppeteer preflight (with 1 retry)
    const startTime = Date.now();
    const result = await runPuppeteerPreflightWithRetry(crawlRotator, 1);
    const duration = Date.now() - startTime;
    console.log(`[PreflightTest] Completed in ${duration}ms - passed: ${result.passed}`);
    res.json({
      success: true,
      test: 'http_preflight',
      duration_ms: duration,
      result: {
        passed: result.passed,
        proxy_ip: result.proxyIp,
        fingerprint: result.fingerprint,
        bot_detection: result.botDetection,
        products_returned: result.productsReturned,
        browser_user_agent: result.browserUserAgent,
        ip_verified: result.ipVerified,
        proxy_available: result.proxyAvailable,
        proxy_connected: result.proxyConnected,
        antidetect_ready: result.antidetectReady,
        response_time_ms: result.responseTimeMs,
        error: result.error
      }
    });
  } catch (error: any) {
    console.error('[PreflightTest] Error:', error.message);
    res.status(500).json({
      success: false,
      test: 'http_preflight',
      error: error.message
    });
  }
 });
 export default router;
--- a/backend/src/routes/workers.ts
+++ b/backend/src/routes/workers.ts
@@ -4,25 +4,10 @@
 * Provider-agnostic worker management and job monitoring.
 * Replaces legacy /api/dutchie-az/admin/schedules and /api/dutchie-az/monitor/* routes.
 *
 * DEPRECATION NOTE (2025-12-12):
 * This file still queries job_schedules for backwards compatibility with
 * the /api/workers endpoints that display worker status. However, the
 * job_schedules table is DEPRECATED - all entries have been disabled.
 *
 * Schedule management has been consolidated into task_schedules:
 * - Use /api/tasks/schedules for schedule CRUD operations
 * - Use TasksDashboard.tsx (/admin/tasks) for schedule management UI
 * - task_schedules uses interval_hours (simpler than base_interval_minutes + jitter)
 *
 * The /api/workers endpoints remain useful for:
 * - Monitoring active workers and job status
 * - K8s scaling controls
 * - Job history and logs
 *
 * Endpoints:
 *   GET /api/workers                    - List all workers/schedules
 *   GET /api/workers/active             - List currently active workers
- *   GET /api/workers/schedule           - Get all job schedules (DEPRECATED - use /api/tasks/schedules)
+ *   GET /api/workers/schedule           - Get all job schedules
 *   GET /api/workers/:workerName        - Get specific worker details
 *   GET /api/workers/:workerName/scope  - Get worker's scope (states, etc.)
 *   GET /api/workers/:workerName/stats  - Get worker statistics
@@ -32,234 +17,13 @@
 *   GET /api/monitor/jobs              - Get recent job history
 *   GET /api/monitor/active-jobs       - Get currently running jobs
 *   GET /api/monitor/summary           - Get monitoring summary
 *
 * K8s Scaling (added 2024-12-10):
 *   GET /api/workers/k8s/replicas      - Get current replica count
 *   POST /api/workers/k8s/scale        - Scale worker replicas up/down
 */
 import { Router, Request, Response } from 'express';
 import { pool } from '../db/pool';
 import * as k8s from '@kubernetes/client-node';
 const router = Router();
 // ============================================================
 // K8S SCALING CONFIGURATION (added 2024-12-10)
 // Per TASK_WORKFLOW_2024-12-10.md: Admin can scale workers from UI
 // ============================================================
 const K8S_NAMESPACE = process.env.K8S_NAMESPACE || 'dispensary-scraper';
 const K8S_DEPLOYMENT_NAME = process.env.K8S_WORKER_DEPLOYMENT || 'scraper-worker';
 // Initialize K8s client - uses in-cluster config when running in K8s,
 // or kubeconfig when running locally
 let k8sAppsApi: k8s.AppsV1Api | null = null;
 function getK8sClient(): k8s.AppsV1Api | null {
  if (k8sAppsApi) return k8sAppsApi;
  try {
    const kc = new k8s.KubeConfig();
    // Try in-cluster config first (when running as a pod)
    // Falls back to default kubeconfig (~/.kube/config) for local dev
    try {
      kc.loadFromCluster();
    } catch {
      kc.loadFromDefault();
    }
    k8sAppsApi = kc.makeApiClient(k8s.AppsV1Api);
    return k8sAppsApi;
  } catch (err: any) {
    console.warn('[Workers] K8s client not available:', err.message);
    return null;
  }
 }
 // ============================================================
 // K8S SCALING ROUTES (added 2024-12-10)
 // Per TASK_WORKFLOW_2024-12-10.md: Admin can scale workers from UI
 // ============================================================
 /**
 * GET /api/workers/k8s/replicas - Get current worker replica count
 * Returns current and desired replica counts from the Deployment
 */
 router.get('/k8s/replicas', async (_req: Request, res: Response) => {
  const client = getK8sClient();
  if (!client) {
    return res.status(503).json({
      success: false,
      error: 'K8s client not available (not running in cluster or no kubeconfig)',
      replicas: null,
    });
  }
  try {
    const response = await client.readNamespacedDeployment({
      name: K8S_DEPLOYMENT_NAME,
      namespace: K8S_NAMESPACE,
    });
    const deployment = response;
    res.json({
      success: true,
      replicas: {
        current: deployment.status?.readyReplicas || 0,
        desired: deployment.spec?.replicas || 0,
        available: deployment.status?.availableReplicas || 0,
        updated: deployment.status?.updatedReplicas || 0,
      },
      deployment: K8S_DEPLOYMENT_NAME,
      namespace: K8S_NAMESPACE,
    });
  } catch (err: any) {
    console.error('[Workers] K8s replicas error:', err.body?.message || err.message);
    res.status(500).json({
      success: false,
      error: err.body?.message || err.message,
    });
  }
 });
 /**
 * POST /api/workers/k8s/scale - Scale worker replicas
 * Body: { replicas: number } - desired replica count (0-20)
 */
 router.post('/k8s/scale', async (req: Request, res: Response) => {
  const client = getK8sClient();
  if (!client) {
    return res.status(503).json({
      success: false,
      error: 'K8s client not available (not running in cluster or no kubeconfig)',
    });
  }
  const { replicas } = req.body;
  // Validate replica count
  if (typeof replicas !== 'number' || replicas < 0 || replicas > 20) {
    return res.status(400).json({
      success: false,
      error: 'replicas must be a number between 0 and 20',
    });
  }
  try {
    // Get current state first
    const currentResponse = await client.readNamespacedDeploymentScale({
      name: K8S_DEPLOYMENT_NAME,
      namespace: K8S_NAMESPACE,
    });
    const currentReplicas = currentResponse.spec?.replicas || 0;
    // Update scale using replaceNamespacedDeploymentScale
    await client.replaceNamespacedDeploymentScale({
      name: K8S_DEPLOYMENT_NAME,
      namespace: K8S_NAMESPACE,
      body: {
        apiVersion: 'autoscaling/v1',
        kind: 'Scale',
        metadata: {
          name: K8S_DEPLOYMENT_NAME,
          namespace: K8S_NAMESPACE,
        },
        spec: {
          replicas: replicas,
        },
      },
    });
    console.log(`[Workers] Scaled ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${replicas} replicas`);
    res.json({
      success: true,
      message: `Scaled from ${currentReplicas} to ${replicas} replicas`,
      previous: currentReplicas,
      desired: replicas,
      deployment: K8S_DEPLOYMENT_NAME,
      namespace: K8S_NAMESPACE,
    });
  } catch (err: any) {
    console.error('[Workers] K8s scale error:', err.body?.message || err.message);
    res.status(500).json({
      success: false,
      error: err.body?.message || err.message,
    });
  }
 });
 /**
 * POST /api/workers/k8s/scale-up - Scale up worker replicas by 1
 * Convenience endpoint for adding a single worker
 */
 router.post('/k8s/scale-up', async (_req: Request, res: Response) => {
  const client = getK8sClient();
  if (!client) {
    return res.status(503).json({
      success: false,
      error: 'K8s client not available (not running in cluster or no kubeconfig)',
    });
  }
  try {
    // Get current replica count
    const currentResponse = await client.readNamespacedDeploymentScale({
      name: K8S_DEPLOYMENT_NAME,
      namespace: K8S_NAMESPACE,
    });
    const currentReplicas = currentResponse.spec?.replicas || 0;
    const newReplicas = currentReplicas + 1;
    // Cap at 20 replicas
    if (newReplicas > 20) {
      return res.status(400).json({
        success: false,
        error: 'Maximum replica count (20) reached',
      });
    }
    // Scale up by 1
    await client.replaceNamespacedDeploymentScale({
      name: K8S_DEPLOYMENT_NAME,
      namespace: K8S_NAMESPACE,
      body: {
        apiVersion: 'autoscaling/v1',
        kind: 'Scale',
        metadata: {
          name: K8S_DEPLOYMENT_NAME,
          namespace: K8S_NAMESPACE,
        },
        spec: {
          replicas: newReplicas,
        },
      },
    });
    console.log(`[Workers] Scaled up ${K8S_DEPLOYMENT_NAME} from ${currentReplicas} to ${newReplicas} replicas`);
    res.json({
      success: true,
      message: `Added worker (${currentReplicas} → ${newReplicas} replicas)`,
      previous: currentReplicas,
      desired: newReplicas,
      deployment: K8S_DEPLOYMENT_NAME,
      namespace: K8S_NAMESPACE,
    });
  } catch (err: any) {
    console.error('[Workers] K8s scale-up error:', err.body?.message || err.message);
    res.status(500).json({
      success: false,
      error: err.body?.message || err.message,
    });
  }
 });
 // ============================================================
 // STATIC ROUTES (must come before parameterized routes)
 // ============================================================
--- a/backend/src/_deprecated/scraper-v2/README.md
+++ b/backend/src/_deprecated/scraper-v2/README.md
--- a/backend/src/_deprecated/scraper-v2/canonical-pipeline.ts
+++ b/backend/src/_deprecated/scraper-v2/canonical-pipeline.ts
--- a/backend/src/_deprecated/scraper-v2/downloader.ts
+++ b/backend/src/_deprecated/scraper-v2/downloader.ts
--- a/backend/src/_deprecated/scraper-v2/engine.ts
+++ b/backend/src/_deprecated/scraper-v2/engine.ts
--- a/backend/src/_deprecated/scraper-v2/index.ts
+++ b/backend/src/_deprecated/scraper-v2/index.ts
--- a/backend/src/_deprecated/scraper-v2/middlewares.ts
+++ b/backend/src/_deprecated/scraper-v2/middlewares.ts
--- a/backend/src/_deprecated/scraper-v2/navigation.ts
+++ b/backend/src/_deprecated/scraper-v2/navigation.ts
--- a/backend/src/_deprecated/scraper-v2/pipelines.ts
+++ b/backend/src/_deprecated/scraper-v2/pipelines.ts
--- a/backend/src/_deprecated/scraper-v2/scheduler.ts
+++ b/backend/src/_deprecated/scraper-v2/scheduler.ts
--- a/backend/src/_deprecated/scraper-v2/types.ts
+++ b/backend/src/_deprecated/scraper-v2/types.ts
--- a/backend/src/scripts/crawl-single-store.ts
+++ b/backend/src/scripts/crawl-single-store.ts
@@ -16,11 +16,10 @@ import {
  executeGraphQL,
  startSession,
  endSession,
-  setCrawlRotator,
+  getFingerprint,
  GRAPHQL_HASHES,
  DUTCHIE_CONFIG,
 } from '../platforms/dutchie';
 import { CrawlRotator } from '../services/crawl-rotator';
 dotenv.config();
@@ -109,27 +108,19 @@ async function main() {
    // ============================================================
    // STEP 2: Start stealth session
    // Per workflow-12102025.md: Initialize CrawlRotator and start session with menuUrl
    // ============================================================
    console.log('┌─────────────────────────────────────────────────────────────┐');
    console.log('│ STEP 2: Start Stealth Session                               │');
    console.log('└─────────────────────────────────────────────────────────────┘');
-    // Per workflow-12102025.md: Initialize CrawlRotator (required for sessions)
+    // Use Arizona timezone for this store
-    const rotator = new CrawlRotator();
+    const session = startSession(disp.state || 'AZ', 'America/Phoenix');
    setCrawlRotator(rotator);
-    // Per workflow-12102025.md: startSession takes menuUrl for dynamic Referer
+    const fp = getFingerprint();
    const session = startSession(disp.menu_url);
    const fp = session.fingerprint;
    console.log(`  Session ID:         ${session.sessionId}`);
    console.log(`  Browser:            ${fp.browserName} (${fp.deviceCategory})`);
    console.log(`  User-Agent:         ${fp.userAgent.slice(0, 60)}...`);
    console.log(`  Accept-Language:    ${fp.acceptLanguage}`);
-    console.log(`  Referer:            ${session.referer}`);
+    console.log(`  Sec-CH-UA:          ${fp.secChUa || '(not set)'}`);
    console.log(`  DNT:                ${fp.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`);
    console.log(`  TLS:                ${fp.httpFingerprint.curlImpersonateBinary}`);
    console.log('');
    // ============================================================
--- a/backend/src/scripts/import-proxies.ts
+++ b/backend/src/scripts/import-proxies.ts
@@ -1,284 +0,0 @@
 /**
 * Bulk Proxy Import Script
 *
 * Imports proxies from various formats into the proxies table.
 * Supports:
 *   - Standard format: http://user:pass@host:port
 *   - Colon format: http://host:port:user:pass
 *   - Simple format: host:port:user:pass (defaults to http)
 *
 * Usage:
 *   npx tsx src/scripts/import-proxies.ts < proxies.txt
 *   echo "http://host:port:user:pass" | npx tsx src/scripts/import-proxies.ts
 *   npx tsx src/scripts/import-proxies.ts --file proxies.txt
 *   npx tsx src/scripts/import-proxies.ts --url "http://host:port:user:pass"
 *
 * Options:
 *   --file <path>     Read proxies from file (one per line)
 *   --url <url>       Import a single proxy URL
 *   --max-connections Set max_connections for all imported proxies (default: 1)
 *   --dry-run         Parse and show what would be imported without inserting
 */
 import { getPool } from '../db/pool';
 import * as fs from 'fs';
 import * as readline from 'readline';
 interface ParsedProxy {
  protocol: string;
  host: string;
  port: number;
  username?: string;
  password?: string;
  rawUrl: string;
 }
 /**
 * Parse a proxy URL in various formats
 */
 function parseProxyUrl(input: string): ParsedProxy | null {
  const trimmed = input.trim();
  if (!trimmed || trimmed.startsWith('#')) return null;
  // Format 1: Standard URL format - http://user:pass@host:port
  const standardMatch = trimmed.match(/^(https?|socks5):\/\/([^:]+):([^@]+)@([^:]+):(\d+)$/);
  if (standardMatch) {
    return {
      protocol: standardMatch[1],
      username: standardMatch[2],
      password: standardMatch[3],
      host: standardMatch[4],
      port: parseInt(standardMatch[5], 10),
      rawUrl: trimmed,
    };
  }
  // Format 2: Standard URL without auth - http://host:port
  const noAuthMatch = trimmed.match(/^(https?|socks5):\/\/([^:]+):(\d+)$/);
  if (noAuthMatch) {
    return {
      protocol: noAuthMatch[1],
      host: noAuthMatch[2],
      port: parseInt(noAuthMatch[3], 10),
      rawUrl: trimmed,
    };
  }
  // Format 3: Colon format with protocol - http://host:port:user:pass
  const colonWithProtocolMatch = trimmed.match(/^(https?|socks5):\/\/([^:]+):(\d+):([^:]+):(.+)$/);
  if (colonWithProtocolMatch) {
    return {
      protocol: colonWithProtocolMatch[1],
      host: colonWithProtocolMatch[2],
      port: parseInt(colonWithProtocolMatch[3], 10),
      username: colonWithProtocolMatch[4],
      password: colonWithProtocolMatch[5],
      rawUrl: trimmed, // Keep raw URL for non-standard format
    };
  }
  // Format 4: Colon format without protocol - host:port:user:pass
  const colonMatch = trimmed.match(/^([^:]+):(\d+):([^:]+):(.+)$/);
  if (colonMatch) {
    return {
      protocol: 'http',
      host: colonMatch[1],
      port: parseInt(colonMatch[2], 10),
      username: colonMatch[3],
      password: colonMatch[4],
      rawUrl: `http://${trimmed}`, // Construct raw URL
    };
  }
  // Format 5: Simple host:port
  const simpleMatch = trimmed.match(/^([^:]+):(\d+)$/);
  if (simpleMatch) {
    return {
      protocol: 'http',
      host: simpleMatch[1],
      port: parseInt(simpleMatch[2], 10),
      rawUrl: `http://${trimmed}`,
    };
  }
  console.error(`[ImportProxies] Could not parse: ${trimmed}`);
  return null;
 }
 /**
 * Check if proxy URL is in non-standard format (needs proxy_url column)
 */
 function isNonStandardFormat(rawUrl: string): boolean {
  // Colon format: protocol://host:port:user:pass
  return /^(https?|socks5):\/\/[^:]+:\d+:[^:]+:.+$/.test(rawUrl);
 }
 async function importProxies(proxies: ParsedProxy[], maxConnections: number, dryRun: boolean) {
  if (dryRun) {
    console.log('\n[ImportProxies] DRY RUN - Would import:');
    for (const p of proxies) {
      const needsRawUrl = isNonStandardFormat(p.rawUrl);
      console.log(`  ${p.host}:${p.port} (${p.protocol}) user=${p.username || 'none'} needsProxyUrl=${needsRawUrl}`);
    }
    console.log(`\nTotal: ${proxies.length} proxies`);
    return;
  }
  const pool = getPool();
  let inserted = 0;
  let skipped = 0;
  for (const proxy of proxies) {
    try {
      // Determine if we need to store the raw URL (non-standard format)
      const needsRawUrl = isNonStandardFormat(proxy.rawUrl);
      // Use different conflict resolution based on format
      // Non-standard format: unique by proxy_url (session-based residential proxies)
      // Standard format: unique by host/port/protocol
      const query = needsRawUrl
        ? `
          INSERT INTO proxies (host, port, protocol, username, password, max_connections, proxy_url, active)
          VALUES ($1, $2, $3, $4, $5, $6, $7, true)
          ON CONFLICT (proxy_url) WHERE proxy_url IS NOT NULL
          DO UPDATE SET
            max_connections = EXCLUDED.max_connections,
            active = true,
            updated_at = NOW()
          RETURNING id, (xmax = 0) as is_insert
        `
        : `
          INSERT INTO proxies (host, port, protocol, username, password, max_connections, proxy_url, active)
          VALUES ($1, $2, $3, $4, $5, $6, $7, true)
          ON CONFLICT (host, port, protocol)
          DO UPDATE SET
            username = EXCLUDED.username,
            password = EXCLUDED.password,
            max_connections = EXCLUDED.max_connections,
            proxy_url = EXCLUDED.proxy_url,
            active = true,
            updated_at = NOW()
          RETURNING id, (xmax = 0) as is_insert
        `;
      const result = await pool.query(query, [
        proxy.host,
        proxy.port,
        proxy.protocol,
        proxy.username || null,
        proxy.password || null,
        maxConnections,
        needsRawUrl ? proxy.rawUrl : null,
      ]);
      const isInsert = result.rows[0]?.is_insert;
      const sessionId = proxy.password?.match(/session-([A-Z0-9]+)/)?.[1] || '';
      const displayName = sessionId ? `session ${sessionId}` : `${proxy.host}:${proxy.port}`;
      if (isInsert) {
        inserted++;
        console.log(`[ImportProxies] Inserted: ${displayName}`);
      } else {
        console.log(`[ImportProxies] Updated: ${displayName}`);
        inserted++; // Count updates too
      }
    } catch (err: any) {
      const sessionId = proxy.password?.match(/session-([A-Z0-9]+)/)?.[1] || '';
      const displayName = sessionId ? `session ${sessionId}` : `${proxy.host}:${proxy.port}`;
      console.error(`[ImportProxies] Error inserting ${displayName}: ${err.message}`);
      skipped++;
    }
  }
  console.log(`\n[ImportProxies] Complete: ${inserted} imported, ${skipped} skipped`);
  // Notify any listening workers
  try {
    await pool.query(`NOTIFY proxy_added, 'bulk import'`);
    console.log('[ImportProxies] Sent proxy_added notification to workers');
  } catch {
    // Ignore notification errors
  }
 }
 async function readFromStdin(): Promise<string[]> {
  return new Promise((resolve) => {
    const lines: string[] = [];
    const rl = readline.createInterface({
      input: process.stdin,
      output: process.stdout,
      terminal: false,
    });
    rl.on('line', (line) => {
      lines.push(line);
    });
    rl.on('close', () => {
      resolve(lines);
    });
  });
 }
 async function main() {
  const args = process.argv.slice(2);
  let lines: string[] = [];
  let maxConnections = 1;
  let dryRun = false;
  // Parse arguments
  for (let i = 0; i < args.length; i++) {
    if (args[i] === '--file' && args[i + 1]) {
      const content = fs.readFileSync(args[i + 1], 'utf-8');
      lines.push(...content.split('\n'));
      i++;
    } else if (args[i] === '--url' && args[i + 1]) {
      lines.push(args[i + 1]);
      i++;
    } else if (args[i] === '--max-connections' && args[i + 1]) {
      maxConnections = parseInt(args[i + 1], 10);
      i++;
    } else if (args[i] === '--dry-run') {
      dryRun = true;
    } else if (!args[i].startsWith('--')) {
      // Treat as URL directly
      lines.push(args[i]);
    }
  }
  // If no lines yet, read from stdin
  if (lines.length === 0) {
    console.log('[ImportProxies] Reading from stdin...');
    lines = await readFromStdin();
  }
  // Parse all lines
  const proxies: ParsedProxy[] = [];
  for (const line of lines) {
    const parsed = parseProxyUrl(line);
    if (parsed) {
      proxies.push(parsed);
    }
  }
  if (proxies.length === 0) {
    console.error('[ImportProxies] No valid proxies found');
    console.error('\nUsage:');
    console.error('  npx tsx src/scripts/import-proxies.ts --url "http://host:port:user:pass"');
    console.error('  npx tsx src/scripts/import-proxies.ts --file proxies.txt');
    console.error('  echo "host:port:user:pass" | npx tsx src/scripts/import-proxies.ts');
    console.error('\nSupported formats:');
    console.error('  http://user:pass@host:port (standard)');
    console.error('  http://host:port:user:pass (colon format)');
    console.error('  host:port:user:pass (simple)');
    process.exit(1);
  }
  console.log(`[ImportProxies] Parsed ${proxies.length} proxies (max_connections=${maxConnections})`);
  await importProxies(proxies, maxConnections, dryRun);
 }
 main().catch((err) => {
  console.error('[ImportProxies] Fatal error:', err);
  process.exit(1);
 });
--- a/backend/src/_deprecated/scripts/queue-dispensaries.ts
+++ b/backend/src/_deprecated/scripts/queue-dispensaries.ts
--- a/backend/src/_deprecated/scripts/run-backfill.ts
+++ b/backend/src/_deprecated/scripts/run-backfill.ts
--- a/backend/src/_deprecated/scripts/run-hydration.ts
+++ b/backend/src/_deprecated/scripts/run-hydration.ts
--- a/backend/src/_deprecated/scripts/test-crawl-to-canonical.ts
+++ b/backend/src/_deprecated/scripts/test-crawl-to-canonical.ts
--- a/backend/src/scripts/test-stealth-session.ts
+++ b/backend/src/scripts/test-stealth-session.ts
@@ -1,10 +1,10 @@
 /**
 * Test script for stealth session management
 *
- * Per workflow-12102025.md:
+ * Tests:
- * - Tests HTTP fingerprinting (browser-specific headers + ordering)
+ * 1. Per-session fingerprint rotation
- * - Tests UA generation (device distribution, browser filtering)
+ * 2. Geographic consistency (timezone → Accept-Language)
- * - Tests dynamic Referer per dispensary
+ * 3. Proxy location loading from database
 *
 * Usage:
 *   npx tsx src/scripts/test-stealth-session.ts
@@ -14,142 +14,104 @@ import {
  startSession,
  endSession,
  getCurrentSession,
  getFingerprint,
  getRandomFingerprint,
  getLocaleForTimezone,
  buildHeaders,
  setCrawlRotator,
 } from '../platforms/dutchie';
 import { CrawlRotator } from '../services/crawl-rotator';
 import {
  generateHTTPFingerprint,
  buildRefererFromMenuUrl,
  BrowserType,
 } from '../services/http-fingerprint';
 console.log('='.repeat(60));
-console.log('STEALTH SESSION TEST (per workflow-12102025.md)');
+console.log('STEALTH SESSION TEST');
 console.log('='.repeat(60));
-// Initialize CrawlRotator (required for sessions)
+// Test 1: Timezone to Locale mapping
-console.log('\n[Setup] Initializing CrawlRotator...');
+console.log('\n[Test 1] Timezone to Locale Mapping:');
-const rotator = new CrawlRotator();
+const testTimezones = [
-setCrawlRotator(rotator);
+  'America/Phoenix',
-console.log('  CrawlRotator initialized');
+  'America/Los_Angeles',
-
+  'America/New_York',
-// Test 1: HTTP Fingerprint Generation
+  'America/Chicago',
 console.log('\n[Test 1] HTTP Fingerprint Generation:');
 const browsers: BrowserType[] = ['Chrome', 'Firefox', 'Safari', 'Edge'];
 for (const browser of browsers) {
  const httpFp = generateHTTPFingerprint(browser);
  console.log(`  ${browser}:`);
  console.log(`    TLS binary: ${httpFp.curlImpersonateBinary}`);
  console.log(`    DNT: ${httpFp.hasDNT ? 'enabled' : 'disabled'}`);
  console.log(`    Header order: ${httpFp.headerOrder.slice(0, 5).join(', ')}...`);
 }
 // Test 2: Dynamic Referer from menu URLs
 console.log('\n[Test 2] Dynamic Referer from Menu URLs:');
 const testUrls = [
  'https://dutchie.com/embedded-menu/harvest-of-tempe',
  'https://dutchie.com/dispensary/zen-leaf-mesa',
  '/embedded-menu/deeply-rooted',
  '/dispensary/curaleaf-phoenix',
  null,
  undefined,
  'Invalid/Timezone',
 ];
-for (const url of testUrls) {
+for (const tz of testTimezones) {
-  const referer = buildRefererFromMenuUrl(url);
+  const locale = getLocaleForTimezone(tz);
-  console.log(`  ${url || '(null/undefined)'}`);
+  console.log(`  ${tz || '(undefined)'} → ${locale}`);
  console.log(`    → ${referer}`);
 }
-// Test 3: Session with Dynamic Referer
+// Test 2: Random fingerprint selection
-console.log('\n[Test 3] Session with Dynamic Referer:');
+console.log('\n[Test 2] Random Fingerprint Selection (5 samples):');
-const testMenuUrl = 'https://dutchie.com/dispensary/harvest-of-tempe';
+for (let i = 0; i < 5; i++) {
-console.log(`  Starting session with menuUrl: ${testMenuUrl}`);
+  const fp = getRandomFingerprint();
  console.log(`  ${i + 1}. ${fp.userAgent.slice(0, 60)}...`);
 }
-const session1 = startSession(testMenuUrl);
+// Test 3: Session Management
 console.log('\n[Test 3] Session Management:');
 // Before session - should use default fingerprint
 console.log('  Before session:');
 const beforeFp = getFingerprint();
 console.log(`    getFingerprint(): ${beforeFp.userAgent.slice(0, 50)}...`);
 console.log(`    getCurrentSession(): ${getCurrentSession()}`);
 // Start session with Arizona timezone
 console.log('\n  Starting session (AZ, America/Phoenix):');
 const session1 = startSession('AZ', 'America/Phoenix');
 console.log(`    Session ID: ${session1.sessionId}`);
-console.log(`  Browser: ${session1.fingerprint.browserName}`);
+console.log(`    Fingerprint UA: ${session1.fingerprint.userAgent.slice(0, 50)}...`);
-console.log(`  Device: ${session1.fingerprint.deviceCategory}`);
+console.log(`    Accept-Language: ${session1.fingerprint.acceptLanguage}`);
-console.log(`  Referer: ${session1.referer}`);
+console.log(`    Timezone: ${session1.timezone}`);
 console.log(`  DNT: ${session1.fingerprint.httpFingerprint.hasDNT ? 'enabled' : 'disabled'}`);
 console.log(`  TLS: ${session1.fingerprint.httpFingerprint.curlImpersonateBinary}`);
-// Test 4: Build Headers (browser-specific order)
+// During session - should use session fingerprint
-console.log('\n[Test 4] Build Headers (browser-specific order):');
+console.log('\n  During session:');
-const { headers, orderedHeaders } = buildHeaders(true, 1000);
+const duringFp = getFingerprint();
-console.log(`  Headers built for ${session1.fingerprint.browserName}:`);
+console.log(`    getFingerprint(): ${duringFp.userAgent.slice(0, 50)}...`);
-console.log(`  Order: ${orderedHeaders.join(' → ')}`);
+console.log(`    Same as session? ${duringFp.userAgent === session1.fingerprint.userAgent}`);
 console.log(`  Sample headers:`);
 console.log(`    User-Agent: ${headers['User-Agent']?.slice(0, 50)}...`);
 console.log(`    Accept: ${headers['Accept']}`);
 console.log(`    Accept-Language: ${headers['Accept-Language']}`);
 console.log(`    Referer: ${headers['Referer']}`);
 if (headers['sec-ch-ua']) {
  console.log(`    sec-ch-ua: ${headers['sec-ch-ua']}`);
 }
 if (headers['DNT']) {
  console.log(`    DNT: ${headers['DNT']}`);
 }
 // Test buildHeaders with session
 console.log('\n  buildHeaders() during session:');
 const headers = buildHeaders('/embedded-menu/test-store');
 console.log(`    User-Agent: ${headers['user-agent'].slice(0, 50)}...`);
 console.log(`    Accept-Language: ${headers['accept-language']}`);
 console.log(`    Origin: ${headers['origin']}`);
 console.log(`    Referer: ${headers['referer']}`);
 // End session
 console.log('\n  Ending session:');
 endSession();
 console.log(`    getCurrentSession(): ${getCurrentSession()}`);
-// Test 5: Multiple Sessions (UA variety)
+// Test 4: Multiple sessions should have different fingerprints
-console.log('\n[Test 5] Multiple Sessions (UA & fingerprint variety):');
+console.log('\n[Test 4] Multiple Sessions (fingerprint variety):');
-const sessions: {
+const fingerprints: string[] = [];
  browser: string;
  device: string;
  hasDNT: boolean;
 }[] = [];
 for (let i = 0; i < 10; i++) {
-  const session = startSession(`/dispensary/store-${i}`);
+  const session = startSession('CA', 'America/Los_Angeles');
-  sessions.push({
+  fingerprints.push(session.fingerprint.userAgent);
    browser: session.fingerprint.browserName,
    device: session.fingerprint.deviceCategory,
    hasDNT: session.fingerprint.httpFingerprint.hasDNT,
  });
  endSession();
 }
-// Count distribution
+const uniqueCount = new Set(fingerprints).size;
-const browserCounts: Record<string, number> = {};
+console.log(`  10 sessions created, ${uniqueCount} unique fingerprints`);
-const deviceCounts: Record<string, number> = {};
+console.log(`  Variety: ${uniqueCount >= 3 ? '✅ Good' : '⚠️ Low - may need more fingerprint options'}`);
 let dntCount = 0;
-for (const s of sessions) {
+// Test 5: Geographic consistency check
-  browserCounts[s.browser] = (browserCounts[s.browser] || 0) + 1;
+console.log('\n[Test 5] Geographic Consistency:');
-  deviceCounts[s.device] = (deviceCounts[s.device] || 0) + 1;
+const geoTests = [
-  if (s.hasDNT) dntCount++;
+  { state: 'AZ', tz: 'America/Phoenix' },
-}
+  { state: 'CA', tz: 'America/Los_Angeles' },
  { state: 'NY', tz: 'America/New_York' },
  { state: 'IL', tz: 'America/Chicago' },
 ];
-console.log(`  10 sessions created:`);
+for (const { state, tz } of geoTests) {
-console.log(`    Browsers: ${JSON.stringify(browserCounts)}`);
+  const session = startSession(state, tz);
-console.log(`    Devices: ${JSON.stringify(deviceCounts)}`);
+  const consistent = session.fingerprint.acceptLanguage.includes('en-US');
-console.log(`    DNT enabled: ${dntCount}/10 (expected ~30%)`);
+  console.log(`  ${state} (${tz}): Accept-Language=${session.fingerprint.acceptLanguage} ${consistent ? '✅' : '❌'}`);
 // Test 6: Device distribution check (per workflow-12102025.md: 62/36/2)
 console.log('\n[Test 6] Device Distribution (larger sample):');
 const deviceSamples: string[] = [];
 for (let i = 0; i < 100; i++) {
  const session = startSession();
  deviceSamples.push(session.fingerprint.deviceCategory);
  endSession();
 }
 const mobileCount = deviceSamples.filter(d => d === 'mobile').length;
 const desktopCount = deviceSamples.filter(d => d === 'desktop').length;
 const tabletCount = deviceSamples.filter(d => d === 'tablet').length;
 console.log(`  100 sessions (expected: 62% mobile, 36% desktop, 2% tablet):`);
 console.log(`    Mobile: ${mobileCount}%`);
 console.log(`    Desktop: ${desktopCount}%`);
 console.log(`    Tablet: ${tabletCount}%`);
 console.log(`    Distribution: ${Math.abs(mobileCount - 62) < 15 && Math.abs(desktopCount - 36) < 15 ? '✅ Reasonable' : '⚠️ Off target'}`);
 console.log('\n' + '='.repeat(60));
 console.log('TEST COMPLETE');
 console.log('='.repeat(60));
--- a/backend/src/_deprecated/services/DiscoveryGeoService.ts
+++ b/backend/src/_deprecated/services/DiscoveryGeoService.ts
--- a/backend/src/_deprecated/services/GeoValidationService.ts
+++ b/backend/src/_deprecated/services/GeoValidationService.ts
--- a/backend/src/_deprecated/services/availability.ts
+++ b/backend/src/_deprecated/services/availability.ts
--- a/backend/src/services/crawl-rotator.ts
+++ b/backend/src/services/crawl-rotator.ts
@@ -1,53 +1,49 @@
 /**
 * Crawl Rotator - Proxy & User Agent Rotation for Crawlers
 *
- * Updated: 2025-12-10 per workflow-12102025.md
+ * Manages rotation of proxies and user agents to avoid blocks.
- *
+ * Used by platform-specific crawlers (Dutchie, Jane, etc.)
 * KEY BEHAVIORS (per workflow-12102025.md):
 * 1. Task determines WHAT work to do, proxy determines SESSION IDENTITY
 * 2. Proxy location (timezone) sets Accept-Language headers (always English)
 * 3. On 403: immediately get new IP, new fingerprint, retry
 * 4. After 3 consecutive 403s on same proxy with different fingerprints → disable proxy
 *
 * USER-AGENT GENERATION (per workflow-12102025.md):
 * - Device distribution: Mobile 62%, Desktop 36%, Tablet 2%
 * - Browser whitelist: Chrome, Safari, Edge, Firefox only
 * - UA sticks until IP rotates
 * - Failure = alert admin + stop crawl (no fallback)
 *
 * Uses intoli/user-agents for realistic UA generation with daily-updated data.
 *
 * Canonical location: src/services/crawl-rotator.ts
 */
 import { Pool } from 'pg';
 import UserAgent from 'user-agents';
 import {
  HTTPFingerprint,
  generateHTTPFingerprint,
  BrowserType,
 } from './http-fingerprint';
 // ============================================================
-// UA CONSTANTS (per workflow-12102025.md)
+// USER AGENT CONFIGURATION
 // ============================================================
 /**
- * Per workflow-12102025.md: Device category distribution (hardcoded)
+ * Modern browser user agents (Chrome, Firefox, Safari, Edge on various platforms)
- * Mobile: 62%, Desktop: 36%, Tablet: 2%
+ * Updated: 2024
 */
-const DEVICE_WEIGHTS = {
+export const USER_AGENTS = [
-  mobile: 62,
+  // Chrome on Windows
-  desktop: 36,
+  'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-  tablet: 2,
+  'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
-} as const;
+  'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
-/**
+  // Chrome on macOS
- * Per workflow-12102025.md: Browser whitelist
+  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
- * Only Chrome (67%), Safari (20%), Edge (6%), Firefox (3%)
+  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
- * Samsung Internet, Opera, and other niche browsers are filtered out
+
- */
+  // Firefox on Windows
-const ALLOWED_BROWSERS = ['Chrome', 'Safari', 'Edge', 'Firefox'] as const;
+  'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
  'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0',
  // Firefox on macOS
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0',
  // Safari on macOS
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15',
  // Edge on Windows
  'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
  // Chrome on Linux
  'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
 ];
 // ============================================================
 // PROXY TYPES
@@ -65,23 +61,13 @@ export interface Proxy {
  failureCount: number;
  successCount: number;
  avgResponseTimeMs: number | null;
-  maxConnections: number;
+  maxConnections: number;  // Number of concurrent connections allowed (for rotating proxies)
-  /**
+  // Location info (if known)
   * Per workflow-12102025.md: Track consecutive 403s with different fingerprints.
   * After 3 consecutive 403s → disable proxy (it's burned).
   */
  consecutive403Count: number;
  // Location info - determines session headers per workflow-12102025.md
  city?: string;
  state?: string;
  country?: string;
  countryCode?: string;
  timezone?: string;
  /**
   * Raw proxy URL override. If set, used directly instead of constructing from parts.
   * Supports non-standard formats like: http://host:port:user:pass
   */
  proxyUrl?: string;
 }
 export interface ProxyStats {
@@ -91,40 +77,6 @@ export interface ProxyStats {
  avgSuccessRate: number;
 }
 // ============================================================
 // FINGERPRINT TYPE
 // Per workflow-12102025.md: Full browser fingerprint from user-agents
 // ============================================================
 export interface BrowserFingerprint {
  userAgent: string;
  platform: string;
  screenWidth: number;
  screenHeight: number;
  viewportWidth: number;
  viewportHeight: number;
  deviceCategory: string;
  browserName: string;  // Per workflow-12102025.md: for session logging
  // Derived headers for anti-detect
  acceptLanguage: string;
  secChUa?: string;
  secChUaPlatform?: string;
  secChUaMobile?: string;
  // Per workflow-12102025.md: HTTP Fingerprinting section
  httpFingerprint: HTTPFingerprint;
 }
 /**
 * Per workflow-12102025.md: Session log entry for debugging blocked sessions
 */
 export interface UASessionLog {
  deviceCategory: string;
  browserName: string;
  userAgent: string;
  proxyIp: string | null;
  sessionStartedAt: Date;
 }
 // ============================================================
 // PROXY ROTATOR CLASS
 // ============================================================
@@ -134,26 +86,18 @@ export class ProxyRotator {
  private proxies: Proxy[] = [];
  private currentIndex: number = 0;
  private lastRotation: Date = new Date();
  private lastReloadAt: Date = new Date();
  // Proxy reload interval - how often to check for proxy changes (default: 60 seconds)
  private reloadIntervalMs: number = 60000;
  constructor(pool?: Pool) {
    this.pool = pool || null;
  }
  /**
   * Initialize with database pool
   */
  setPool(pool: Pool): void {
    this.pool = pool;
  }
  /**
   * Set the reload interval for periodic proxy checks
   */
  setReloadInterval(ms: number): void {
    this.reloadIntervalMs = ms;
  }
  /**
   * Load proxies from database
   */
@@ -178,87 +122,35 @@ export class ProxyRotator {
          0 as "successCount",
          response_time_ms as "avgResponseTimeMs",
          COALESCE(max_connections, 1) as "maxConnections",
          COALESCE(consecutive_403_count, 0) as "consecutive403Count",
          city,
          state,
          country,
          country_code as "countryCode",
-          timezone,
+          timezone
          proxy_url as "proxyUrl"
        FROM proxies
        WHERE active = true
        ORDER BY failure_count ASC, last_tested_at ASC NULLS FIRST
      `);
      this.proxies = result.rows;
      this.lastReloadAt = new Date();
      // Calculate total concurrent capacity
      const totalCapacity = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
-      console.log(`[ProxyRotator] Loaded ${this.proxies.length} active proxies (${totalCapacity} max concurrent connections / threads)`);
+      console.log(`[ProxyRotator] Loaded ${this.proxies.length} active proxies (${totalCapacity} max concurrent connections)`);
    } catch (error) {
      // Table might not exist - that's okay
      console.warn(`[ProxyRotator] Could not load proxies: ${error}`);
      this.proxies = [];
    }
  }
  /**
   * Check if proxy list is stale and needs reload
   */
  isStale(): boolean {
    const elapsed = Date.now() - this.lastReloadAt.getTime();
    return elapsed > this.reloadIntervalMs;
  }
  /**
   * Reload proxies if the cache is stale.
   * This ensures workers pick up new proxies or see disabled proxies removed.
   * Returns true if proxies were reloaded.
   */
  async reloadIfStale(): Promise<boolean> {
    if (!this.isStale()) {
      return false;
    }
    const oldCount = this.proxies.length;
    const oldCapacity = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
    const oldIds = new Set(this.proxies.map(p => p.id));
    await this.loadProxies();
    const newCount = this.proxies.length;
    const newCapacity = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
    const newIds = new Set(this.proxies.map(p => p.id));
    // Log changes
    const added = this.proxies.filter(p => !oldIds.has(p.id));
    const removed = [...oldIds].filter(id => !newIds.has(id));
    if (added.length > 0 || removed.length > 0 || oldCapacity !== newCapacity) {
      console.log(`[ProxyRotator] Reloaded proxies: ${oldCount}→${newCount} proxies, ${oldCapacity}→${newCapacity} threads`);
      if (added.length > 0) {
        console.log(`[ProxyRotator]   Added: ${added.map(p => `${p.host}:${p.port} (${p.maxConnections} threads)`).join(', ')}`);
      }
      if (removed.length > 0) {
        console.log(`[ProxyRotator]   Removed: ${removed.join(', ')}`);
      }
    }
    return true;
  }
  /**
   * Get time since last reload in seconds
   */
  getSecondsSinceReload(): number {
    return Math.floor((Date.now() - this.lastReloadAt.getTime()) / 1000);
  }
  /**
   * Get next proxy in rotation
   */
  getNext(): Proxy | null {
    if (this.proxies.length === 0) return null;
    // Round-robin rotation
    this.currentIndex = (this.currentIndex + 1) % this.proxies.length;
    this.lastRotation = new Date();
@@ -293,68 +185,23 @@ export class ProxyRotator {
  }
  /**
-   * Mark proxy as blocked (403 received)
+   * Mark proxy as failed (temporarily remove from rotation)
   * Per workflow-12102025.md:
   * - Increment consecutive_403_count
   * - After 3 consecutive 403s with different fingerprints → disable proxy
   * - This is separate from general failures (timeouts, etc.)
   */
  async markBlocked(proxyId: number): Promise<boolean> {
    const proxy = this.proxies.find(p => p.id === proxyId);
    let shouldDisable = false;
    if (proxy) {
      proxy.consecutive403Count++;
      // Per workflow-12102025.md: 3 consecutive 403s → proxy is burned
      if (proxy.consecutive403Count >= 3) {
        proxy.isActive = false;
        this.proxies = this.proxies.filter(p => p.id !== proxyId);
        console.log(`[ProxyRotator] Proxy ${proxyId} DISABLED after ${proxy.consecutive403Count} consecutive 403s (burned)`);
        shouldDisable = true;
      } else {
        console.log(`[ProxyRotator] Proxy ${proxyId} blocked (403 #${proxy.consecutive403Count}/3)`);
      }
    }
    // Update database
    if (this.pool) {
      try {
        await this.pool.query(`
          UPDATE proxies
          SET
            consecutive_403_count = COALESCE(consecutive_403_count, 0) + 1,
            last_failure_at = NOW(),
            test_result = '403 Forbidden',
            active = CASE WHEN COALESCE(consecutive_403_count, 0) >= 2 THEN false ELSE active END,
            updated_at = NOW()
          WHERE id = $1
        `, [proxyId]);
      } catch (err) {
        console.error(`[ProxyRotator] Failed to update proxy ${proxyId}:`, err);
      }
    }
    return shouldDisable;
  }
  /**
   * Mark proxy as failed (general error - timeout, connection error, etc.)
   * Separate from 403 blocking per workflow-12102025.md
   */
  async markFailed(proxyId: number, error?: string): Promise<void> {
    // Update in-memory
    const proxy = this.proxies.find(p => p.id === proxyId);
    if (proxy) {
      proxy.failureCount++;
-      // Deactivate if too many general failures
+      // Deactivate if too many failures
      if (proxy.failureCount >= 5) {
        proxy.isActive = false;
        this.proxies = this.proxies.filter(p => p.id !== proxyId);
-        console.log(`[ProxyRotator] Proxy ${proxyId} deactivated after ${proxy.failureCount} general failures`);
+        console.log(`[ProxyRotator] Proxy ${proxyId} deactivated after ${proxy.failureCount} failures`);
      }
    }
    // Update database
    if (this.pool) {
      try {
        await this.pool.query(`
@@ -373,22 +220,23 @@ export class ProxyRotator {
  }
  /**
-   * Mark proxy as successful - resets consecutive 403 count
+   * Mark proxy as successful
   * Per workflow-12102025.md: successful request clears the 403 counter
   */
  async markSuccess(proxyId: number, responseTimeMs?: number): Promise<void> {
    // Update in-memory
    const proxy = this.proxies.find(p => p.id === proxyId);
    if (proxy) {
      proxy.successCount++;
      proxy.consecutive403Count = 0; // Reset on success per workflow-12102025.md
      proxy.lastUsedAt = new Date();
      if (responseTimeMs !== undefined) {
        // Rolling average
        proxy.avgResponseTimeMs = proxy.avgResponseTimeMs
          ? (proxy.avgResponseTimeMs * 0.8) + (responseTimeMs * 0.2)
          : responseTimeMs;
      }
    }
    // Update database
    if (this.pool) {
      try {
        await this.pool.query(`
@@ -396,7 +244,6 @@ export class ProxyRotator {
          SET
            last_tested_at = NOW(),
            test_result = 'success',
            consecutive_403_count = 0,
            response_time_ms = CASE
              WHEN response_time_ms IS NULL THEN $2
              ELSE (response_time_ms * 0.8 + $2 * 0.2)::integer
@@ -412,24 +259,8 @@ export class ProxyRotator {
  /**
   * Get proxy URL for HTTP client
   * If proxy.proxyUrl is set, uses it directly (supports non-standard formats).
   * Otherwise constructs standard format: protocol://user:pass@host:port
   */
  getProxyUrl(proxy: Proxy): string {
    // If proxyUrl is set, check if it needs conversion from non-standard format
    if (proxy.proxyUrl) {
      // Check if it's in non-standard format: http://host:port:user:pass
      const colonFormatMatch = proxy.proxyUrl.match(/^(https?):\/\/([^:]+):(\d+):([^:]+):(.+)$/);
      if (colonFormatMatch) {
        // Convert to standard format: http://user:pass@host:port
        const [, protocol, host, port, username, password] = colonFormatMatch;
        return `${protocol}://${encodeURIComponent(username)}:${encodeURIComponent(password)}@${host}:${port}`;
      }
      // Already in standard format or unknown format - return as-is
      return proxy.proxyUrl;
    }
    // Construct standard format from individual fields
    const auth = proxy.username && proxy.password
      ? `${proxy.username}:${proxy.password}@`
      : '';
@@ -441,8 +272,8 @@ export class ProxyRotator {
   */
  getStats(): ProxyStats {
    const totalProxies = this.proxies.length;
-    const activeProxies = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
+    const activeProxies = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);  // Total concurrent capacity
-    const blockedProxies = this.proxies.filter(p => p.failureCount >= 5 || p.consecutive403Count >= 3).length;
+    const blockedProxies = this.proxies.filter(p => p.failureCount >= 5).length;
    const successRates = this.proxies
      .filter(p => p.successCount + p.failureCount > 0)
@@ -454,12 +285,15 @@ export class ProxyRotator {
    return {
      totalProxies,
-      activeProxies,
+      activeProxies,  // Total concurrent capacity across all proxies
      blockedProxies,
      avgSuccessRate,
    };
  }
  /**
   * Check if proxy pool has available proxies
   */
  hasAvailableProxies(): boolean {
    return this.proxies.length > 0;
  }
@@ -467,194 +301,53 @@ export class ProxyRotator {
 // ============================================================
 // USER AGENT ROTATOR CLASS
 // Per workflow-12102025.md: Uses intoli/user-agents for realistic fingerprints
 // ============================================================
 export class UserAgentRotator {
-  private currentFingerprint: BrowserFingerprint | null = null;
+  private userAgents: string[];
-  private sessionLog: UASessionLog | null = null;
+  private currentIndex: number = 0;
  private lastRotation: Date = new Date();
-  constructor() {
+  constructor(userAgents: string[] = USER_AGENTS) {
-    // Per workflow-12102025.md: Initialize with first fingerprint
+    this.userAgents = userAgents;
-    this.rotate();
+    // Start at random index to avoid patterns
    this.currentIndex = Math.floor(Math.random() * userAgents.length);
  }
  /**
-   * Per workflow-12102025.md: Roll device category based on distribution
+   * Get next user agent in rotation
   * Mobile: 62%, Desktop: 36%, Tablet: 2%
   */
-  private rollDeviceCategory(): 'mobile' | 'desktop' | 'tablet' {
+  getNext(): string {
-    const roll = Math.random() * 100;
+    this.currentIndex = (this.currentIndex + 1) % this.userAgents.length;
-    if (roll < DEVICE_WEIGHTS.mobile) {
+    this.lastRotation = new Date();
-      return 'mobile';
+    return this.userAgents[this.currentIndex];
    } else if (roll < DEVICE_WEIGHTS.mobile + DEVICE_WEIGHTS.desktop) {
      return 'desktop';
    } else {
      return 'tablet';
    }
  }
  /**
-   * Per workflow-12102025.md: Extract browser name from UA string
+   * Get current user agent without rotating
   */
-  private extractBrowserName(userAgent: string): string {
+  getCurrent(): string {
-    if (userAgent.includes('Edg/')) return 'Edge';
+    return this.userAgents[this.currentIndex];
    if (userAgent.includes('Firefox/')) return 'Firefox';
    if (userAgent.includes('Safari/') && !userAgent.includes('Chrome/')) return 'Safari';
    if (userAgent.includes('Chrome/')) return 'Chrome';
    return 'Unknown';
  }
  /**
-   * Per workflow-12102025.md: Check if browser is in whitelist
+   * Get a random user agent
   */
-  private isAllowedBrowser(userAgent: string): boolean {
+  getRandom(): string {
-    const browserName = this.extractBrowserName(userAgent);
+    const index = Math.floor(Math.random() * this.userAgents.length);
-    return ALLOWED_BROWSERS.includes(browserName as typeof ALLOWED_BROWSERS[number]);
+    return this.userAgents[index];
  }
  /**
-   * Generate a new random fingerprint
+   * Get total available user agents
   * Per workflow-12102025.md:
   * - Roll device category (62/36/2)
   * - Filter to top 4 browsers only
   * - Failure = alert admin + stop (no fallback)
   */
  rotate(proxyIp?: string): BrowserFingerprint {
    // Per workflow-12102025.md: Roll device category
    const deviceCategory = this.rollDeviceCategory();
    // Per workflow-12102025.md: Generate UA filtered to device category
    const generator = new UserAgent({ deviceCategory });
    // Per workflow-12102025.md: Try to get an allowed browser (max 50 attempts)
    let ua: ReturnType<typeof generator>;
    let attempts = 0;
    const maxAttempts = 50;
    do {
      ua = generator();
      attempts++;
    } while (!this.isAllowedBrowser(ua.data.userAgent) && attempts < maxAttempts);
    // Per workflow-12102025.md: If we can't get allowed browser, this is a failure
    if (!this.isAllowedBrowser(ua.data.userAgent)) {
      const errorMsg = `[UserAgentRotator] CRITICAL: Failed to generate allowed browser after ${maxAttempts} attempts. Device: ${deviceCategory}. Last UA: ${ua.data.userAgent}`;
      console.error(errorMsg);
      // Per workflow-12102025.md: Alert admin + stop crawl
      // TODO: Post alert to admin dashboard
      throw new Error(errorMsg);
    }
    const data = ua.data;
    const browserName = this.extractBrowserName(data.userAgent);
    // Build sec-ch-ua headers from user agent string
    const secChUa = this.buildSecChUa(data.userAgent, deviceCategory);
    // Per workflow-12102025.md: HTTP Fingerprinting - generate full HTTP fingerprint
    const httpFingerprint = generateHTTPFingerprint(browserName as BrowserType);
    this.currentFingerprint = {
      userAgent: data.userAgent,
      platform: data.platform,
      screenWidth: data.screenWidth,
      screenHeight: data.screenHeight,
      viewportWidth: data.viewportWidth,
      viewportHeight: data.viewportHeight,
      deviceCategory: data.deviceCategory,
      browserName,  // Per workflow-12102025.md: for session logging
      // Per workflow-12102025.md: always English
      acceptLanguage: 'en-US,en;q=0.9',
      ...secChUa,
      // Per workflow-12102025.md: HTTP Fingerprinting section
      httpFingerprint,
    };
    // Per workflow-12102025.md: Log session data
    this.sessionLog = {
      deviceCategory,
      browserName,
      userAgent: data.userAgent,
      proxyIp: proxyIp || null,
      sessionStartedAt: new Date(),
    };
    console.log(`[UserAgentRotator] New fingerprint: device=${deviceCategory}, browser=${browserName}, UA=${data.userAgent.slice(0, 50)}...`);
    return this.currentFingerprint;
  }
  /**
   * Get current fingerprint without rotating
   */
  getCurrent(): BrowserFingerprint {
    if (!this.currentFingerprint) {
      return this.rotate();
    }
    return this.currentFingerprint;
  }
  /**
   * Get a random fingerprint (rotates and returns)
   */
  getRandom(proxyIp?: string): BrowserFingerprint {
    return this.rotate(proxyIp);
  }
  /**
   * Per workflow-12102025.md: Get session log for debugging
   */
  getSessionLog(): UASessionLog | null {
    return this.sessionLog;
  }
  /**
   * Build sec-ch-ua headers from user agent string
   * Per workflow-12102025.md: Include mobile indicator based on device category
   */
  private buildSecChUa(userAgent: string, deviceCategory: string): { secChUa?: string; secChUaPlatform?: string; secChUaMobile?: string } {
    const isMobile = deviceCategory === 'mobile' || deviceCategory === 'tablet';
    // Extract Chrome version if present
    const chromeMatch = userAgent.match(/Chrome\/(\d+)/);
    const edgeMatch = userAgent.match(/Edg\/(\d+)/);
    if (edgeMatch) {
      const version = edgeMatch[1];
      return {
        secChUa: `"Microsoft Edge";v="${version}", "Chromium";v="${version}", "Not_A Brand";v="24"`,
        secChUaPlatform: userAgent.includes('Windows') ? '"Windows"' : userAgent.includes('Android') ? '"Android"' : '"macOS"',
        secChUaMobile: isMobile ? '?1' : '?0',
      };
    }
    if (chromeMatch) {
      const version = chromeMatch[1];
      let platform = '"Linux"';
      if (userAgent.includes('Windows')) platform = '"Windows"';
      else if (userAgent.includes('Mac')) platform = '"macOS"';
      else if (userAgent.includes('Android')) platform = '"Android"';
      else if (userAgent.includes('iPhone') || userAgent.includes('iPad')) platform = '"iOS"';
      return {
        secChUa: `"Google Chrome";v="${version}", "Chromium";v="${version}", "Not_A Brand";v="24"`,
        secChUaPlatform: platform,
        secChUaMobile: isMobile ? '?1' : '?0',
      };
    }
    // Firefox/Safari don't send sec-ch-ua
    return {};
  }
  getCount(): number {
-    return 1; // user-agents generates dynamically
+    return this.userAgents.length;
  }
 }
 // ============================================================
-// COMBINED ROTATOR
+// COMBINED ROTATOR (for convenience)
 // Per workflow-12102025.md: Coordinates proxy + fingerprint rotation
 // ============================================================
 export class CrawlRotator {
@@ -666,68 +359,49 @@ export class CrawlRotator {
    this.userAgent = new UserAgentRotator();
  }
  /**
   * Initialize rotator (load proxies from DB)
   */
  async initialize(): Promise<void> {
    await this.proxy.loadProxies();
  }
  /**
-   * Reload proxy list if stale.
+   * Rotate proxy only
   * Workers should call this periodically to pick up proxy changes.
   * Returns true if proxies were reloaded.
   */
  async reloadIfStale(): Promise<boolean> {
    return this.proxy.reloadIfStale();
  }
  /**
   * Set proxy reload interval in milliseconds.
   * Default is 60 seconds.
   */
  setProxyReloadInterval(ms: number): void {
    this.proxy.setReloadInterval(ms);
  }
  /**
   * Rotate proxy only (get new IP)
   */
  rotateProxy(): Proxy | null {
    return this.proxy.getNext();
  }
  /**
-   * Rotate fingerprint only (new UA, screen size, etc.)
+   * Rotate user agent only
   */
-  rotateFingerprint(): BrowserFingerprint {
+  rotateUserAgent(): string {
-    return this.userAgent.rotate();
+    return this.userAgent.getNext();
  }
  /**
-   * Rotate both proxy and fingerprint
+   * Rotate both proxy and user agent
   * Per workflow-12102025.md: called on 403 for fresh identity
   * Passes proxy IP to UA rotation for session logging
   */
-  rotateBoth(): { proxy: Proxy | null; fingerprint: BrowserFingerprint } {
+  rotateBoth(): { proxy: Proxy | null; userAgent: string } {
    const proxy = this.proxy.getNext();
    const proxyIp = proxy ? proxy.host : undefined;
    return {
-      proxy,
+      proxy: this.proxy.getNext(),
-      fingerprint: this.userAgent.rotate(proxyIp),
+      userAgent: this.userAgent.getNext(),
    };
  }
  /**
-   * Get current proxy and fingerprint without rotating
+   * Get current proxy and user agent without rotating
   */
-  getCurrent(): { proxy: Proxy | null; fingerprint: BrowserFingerprint } {
+  getCurrent(): { proxy: Proxy | null; userAgent: string } {
    return {
      proxy: this.proxy.getCurrent(),
-      fingerprint: this.userAgent.getCurrent(),
+      userAgent: this.userAgent.getCurrent(),
    };
  }
  /**
   * Record success for current proxy
   * Per workflow-12102025.md: resets consecutive 403 count
   */
  async recordSuccess(responseTimeMs?: number): Promise<void> {
    const current = this.proxy.getCurrent();
@@ -737,20 +411,7 @@ export class CrawlRotator {
  }
  /**
-   * Record 403 block for current proxy
+   * Record failure for current proxy
   * Per workflow-12102025.md: increments consecutive_403_count, disables after 3
   * Returns true if proxy was disabled
   */
  async recordBlock(): Promise<boolean> {
    const current = this.proxy.getCurrent();
    if (current) {
      return await this.proxy.markBlocked(current.id);
    }
    return false;
  }
  /**
   * Record general failure (not 403)
   */
  async recordFailure(error?: string): Promise<void> {
    const current = this.proxy.getCurrent();
@@ -760,13 +421,14 @@ export class CrawlRotator {
  }
  /**
-   * Get current proxy location info
+   * Get current proxy location info (for reporting)
-   * Per workflow-12102025.md: proxy location determines session headers
+   * Note: For rotating proxies (like IPRoyal), the actual exit location varies per request
   */
  getProxyLocation(): { city?: string; state?: string; country?: string; timezone?: string; isRotating: boolean } | null {
    const current = this.proxy.getCurrent();
    if (!current) return null;
    // Check if this is a rotating proxy (max_connections > 1 usually indicates rotating)
    const isRotating = current.maxConnections > 1;
    return {
@@ -777,127 +439,6 @@ export class CrawlRotator {
      isRotating
    };
  }
  /**
   * Get timezone from current proxy
   * Per workflow-12102025.md: used for Accept-Language header
   */
  getProxyTimezone(): string | undefined {
    const current = this.proxy.getCurrent();
    return current?.timezone;
  }
  /**
   * Preflight check - verifies proxy and anti-detect are working
   * MUST be called before any task execution to ensure anonymity.
   *
   * Tests:
   * 1. Proxy available - a proxy must be loaded and active
   * 2. Proxy connectivity - makes HTTP request through proxy to verify connection
   * 3. Anti-detect headers - verifies fingerprint is set with required headers
   *
   * @returns Promise<PreflightResult> with pass/fail status and details
   */
  async preflight(): Promise<PreflightResult> {
    const result: PreflightResult = {
      passed: false,
      proxyAvailable: false,
      proxyConnected: false,
      antidetectReady: false,
      proxyIp: null,
      fingerprint: null,
      error: null,
      responseTimeMs: null,
    };
    // Step 1: Check proxy is available
    const currentProxy = this.proxy.getCurrent();
    if (!currentProxy) {
      result.error = 'No proxy available';
      console.log('[Preflight] FAILED - No proxy available');
      return result;
    }
    result.proxyAvailable = true;
    result.proxyIp = currentProxy.host;
    // Step 2: Check fingerprint/anti-detect is ready
    const fingerprint = this.userAgent.getCurrent();
    if (!fingerprint || !fingerprint.userAgent) {
      result.error = 'Anti-detect fingerprint not initialized';
      console.log('[Preflight] FAILED - No fingerprint');
      return result;
    }
    result.antidetectReady = true;
    result.fingerprint = {
      userAgent: fingerprint.userAgent,
      browserName: fingerprint.browserName,
      deviceCategory: fingerprint.deviceCategory,
    };
    // Step 3: Test proxy connectivity with an actual HTTP request
    // Use httpbin.org/ip to verify request goes through proxy
    const proxyUrl = this.proxy.getProxyUrl(currentProxy);
    const testUrl = 'https://httpbin.org/ip';
    try {
      const { default: axios } = await import('axios');
      const { HttpsProxyAgent } = await import('https-proxy-agent');
      const agent = new HttpsProxyAgent(proxyUrl);
      const startTime = Date.now();
      const response = await axios.get(testUrl, {
        httpsAgent: agent,
        timeout: 15000, // 15 second timeout
        headers: {
          'User-Agent': fingerprint.userAgent,
          'Accept-Language': fingerprint.acceptLanguage,
          ...(fingerprint.secChUa && { 'sec-ch-ua': fingerprint.secChUa }),
          ...(fingerprint.secChUaPlatform && { 'sec-ch-ua-platform': fingerprint.secChUaPlatform }),
          ...(fingerprint.secChUaMobile && { 'sec-ch-ua-mobile': fingerprint.secChUaMobile }),
        },
      });
      result.responseTimeMs = Date.now() - startTime;
      result.proxyConnected = true;
      result.passed = true;
      // Mark success on proxy stats
      await this.proxy.markSuccess(currentProxy.id, result.responseTimeMs);
      console.log(`[Preflight] PASSED - Proxy ${currentProxy.host} connected (${result.responseTimeMs}ms), UA: ${fingerprint.browserName}/${fingerprint.deviceCategory}`);
    } catch (err: any) {
      result.error = `Proxy connection failed: ${err.message || 'Unknown error'}`;
      console.log(`[Preflight] FAILED - Proxy connection error: ${err.message}`);
      // Mark failure on proxy stats
      await this.proxy.markFailed(currentProxy.id, err.message);
    }
    return result;
  }
 }
 /**
 * Result from preflight check
 */
 export interface PreflightResult {
  /** Overall pass/fail */
  passed: boolean;
  /** Step 1: Is a proxy loaded? */
  proxyAvailable: boolean;
  /** Step 2: Did HTTP request through proxy succeed? */
  proxyConnected: boolean;
  /** Step 3: Is fingerprint/anti-detect ready? */
  antidetectReady: boolean;
  /** Current proxy IP */
  proxyIp: string | null;
  /** Fingerprint summary */
  fingerprint: { userAgent: string; browserName: string; deviceCategory: string } | null;
  /** Error message if failed */
  error: string | null;
  /** Proxy response time in ms */
  responseTimeMs: number | null;
 }
 // ============================================================
--- a/backend/src/_deprecated/services/crawler-jobs.ts
+++ b/backend/src/_deprecated/services/crawler-jobs.ts
--- a/backend/src/_deprecated/services/crawler-logger.ts
+++ b/backend/src/_deprecated/services/crawler-logger.ts
--- a/backend/src/_deprecated/services/crawler-profiles.ts
+++ b/backend/src/_deprecated/services/crawler-profiles.ts
--- a/backend/src/services/curl-preflight.ts
+++ b/backend/src/services/curl-preflight.ts
@@ -1,100 +0,0 @@
 /**
 * Curl Preflight - Verify curl/axios transport works through proxy
 *
 * Tests:
 * 1. Proxy is available and active
 * 2. HTTP request through proxy succeeds
 * 3. Anti-detect headers are properly set
 *
 * Use case: Fast, simple API requests that don't need browser fingerprint
 */
 import axios from 'axios';
 import { HttpsProxyAgent } from 'https-proxy-agent';
 import { CrawlRotator, PreflightResult } from './crawl-rotator';
 export interface CurlPreflightResult extends PreflightResult {
  method: 'curl';
 }
 /**
 * Run curl preflight check
 * Tests proxy connectivity using axios/curl through the proxy
 */
 export async function runCurlPreflight(
  crawlRotator: CrawlRotator
 ): Promise<CurlPreflightResult> {
  const result: CurlPreflightResult = {
    method: 'curl',
    passed: false,
    proxyAvailable: false,
    proxyConnected: false,
    antidetectReady: false,
    proxyIp: null,
    fingerprint: null,
    error: null,
    responseTimeMs: null,
  };
  // Step 1: Check proxy is available
  const currentProxy = crawlRotator.proxy.getCurrent();
  if (!currentProxy) {
    result.error = 'No proxy available';
    console.log('[CurlPreflight] FAILED - No proxy available');
    return result;
  }
  result.proxyAvailable = true;
  result.proxyIp = currentProxy.host;
  // Step 2: Check fingerprint/anti-detect is ready
  const fingerprint = crawlRotator.userAgent.getCurrent();
  if (!fingerprint || !fingerprint.userAgent) {
    result.error = 'Anti-detect fingerprint not initialized';
    console.log('[CurlPreflight] FAILED - No fingerprint');
    return result;
  }
  result.antidetectReady = true;
  result.fingerprint = {
    userAgent: fingerprint.userAgent,
    browserName: fingerprint.browserName,
    deviceCategory: fingerprint.deviceCategory,
  };
  // Step 3: Test proxy connectivity with an actual HTTP request
  const proxyUrl = crawlRotator.proxy.getProxyUrl(currentProxy);
  const testUrl = 'https://httpbin.org/ip';
  try {
    const agent = new HttpsProxyAgent(proxyUrl);
    const startTime = Date.now();
    const response = await axios.get(testUrl, {
      httpsAgent: agent,
      timeout: 15000, // 15 second timeout
      headers: {
        'User-Agent': fingerprint.userAgent,
        'Accept-Language': fingerprint.acceptLanguage,
        ...(fingerprint.secChUa && { 'sec-ch-ua': fingerprint.secChUa }),
        ...(fingerprint.secChUaPlatform && { 'sec-ch-ua-platform': fingerprint.secChUaPlatform }),
        ...(fingerprint.secChUaMobile && { 'sec-ch-ua-mobile': fingerprint.secChUaMobile }),
      },
    });
    result.responseTimeMs = Date.now() - startTime;
    result.proxyConnected = true;
    result.passed = true;
    // Mark success on proxy stats
    await crawlRotator.proxy.markSuccess(currentProxy.id, result.responseTimeMs);
    console.log(`[CurlPreflight] PASSED - Proxy ${currentProxy.host} connected (${result.responseTimeMs}ms), UA: ${fingerprint.browserName}/${fingerprint.deviceCategory}`);
  } catch (err: any) {
    result.error = `Proxy connection failed: ${err.message || 'Unknown error'}`;
    console.log(`[CurlPreflight] FAILED - Proxy connection error: ${err.message}`);
    // Mark failure on proxy stats
    await crawlRotator.proxy.markFailed(currentProxy.id, err.message);
  }
  return result;
 }
--- a/backend/src/services/http-fingerprint.ts
+++ b/backend/src/services/http-fingerprint.ts
@@ -1,315 +0,0 @@
 /**
 * HTTP Fingerprinting Service
 *
 * Per workflow-12102025.md - HTTP Fingerprinting section:
 * - Full header set per browser type
 * - Browser-specific header ordering
 * - Natural randomization (DNT, Accept quality)
 * - Dynamic Referer per dispensary
 *
 * Canonical location: src/services/http-fingerprint.ts
 */
 // ============================================================
 // TYPES
 // ============================================================
 export type BrowserType = 'Chrome' | 'Firefox' | 'Safari' | 'Edge';
 /**
 * Per workflow-12102025.md: Full HTTP fingerprint for a session
 */
 export interface HTTPFingerprint {
  browserType: BrowserType;
  headers: Record<string, string>;
  headerOrder: string[];
  curlImpersonateBinary: string;
  hasDNT: boolean;
 }
 /**
 * Per workflow-12102025.md: Context for building headers
 */
 export interface HeaderContext {
  userAgent: string;
  secChUa?: string;
  secChUaPlatform?: string;
  secChUaMobile?: string;
  referer: string;
  isPost: boolean;
  contentLength?: number;
 }
 // ============================================================
 // CONSTANTS (per workflow-12102025.md)
 // ============================================================
 /**
 * Per workflow-12102025.md: DNT header distribution (~30% of users)
 */
 const DNT_PROBABILITY = 0.30;
 /**
 * Per workflow-12102025.md: Accept header variations for natural traffic
 */
 const ACCEPT_VARIATIONS = [
  'application/json, text/plain, */*',
  'application/json,text/plain,*/*',
  '*/*',
 ];
 /**
 * Per workflow-12102025.md: Accept-Language variations
 */
 const ACCEPT_LANGUAGE_VARIATIONS = [
  'en-US,en;q=0.9',
  'en-US,en;q=0.8',
  'en-US;q=0.9,en;q=0.8',
 ];
 /**
 * Per workflow-12102025.md: curl-impersonate binaries per browser
 */
 const CURL_IMPERSONATE_BINARIES: Record<BrowserType, string> = {
  Chrome: 'curl_chrome131',
  Edge: 'curl_chrome131',  // Edge uses Chromium
  Firefox: 'curl_ff133',
  Safari: 'curl_safari17',
 };
 // ============================================================
 // HEADER ORDERING (per workflow-12102025.md)
 // ============================================================
 /**
 * Per workflow-12102025.md: Chrome header order for GraphQL requests
 */
 const CHROME_HEADER_ORDER = [
  'Host',
  'Connection',
  'Content-Length',
  'sec-ch-ua',
  'DNT',
  'sec-ch-ua-mobile',
  'User-Agent',
  'sec-ch-ua-platform',
  'Content-Type',
  'Accept',
  'Origin',
  'sec-fetch-site',
  'sec-fetch-mode',
  'sec-fetch-dest',
  'Referer',
  'Accept-Encoding',
  'Accept-Language',
 ];
 /**
 * Per workflow-12102025.md: Firefox header order for GraphQL requests
 */
 const FIREFOX_HEADER_ORDER = [
  'Host',
  'User-Agent',
  'Accept',
  'Accept-Language',
  'Accept-Encoding',
  'Content-Type',
  'Content-Length',
  'Origin',
  'DNT',
  'Connection',
  'Referer',
  'sec-fetch-dest',
  'sec-fetch-mode',
  'sec-fetch-site',
 ];
 /**
 * Per workflow-12102025.md: Safari header order for GraphQL requests
 */
 const SAFARI_HEADER_ORDER = [
  'Host',
  'Connection',
  'Content-Length',
  'Accept',
  'User-Agent',
  'Content-Type',
  'Origin',
  'Referer',
  'Accept-Encoding',
  'Accept-Language',
 ];
 /**
 * Per workflow-12102025.md: Edge uses Chrome order (Chromium-based)
 */
 const HEADER_ORDERS: Record<BrowserType, string[]> = {
  Chrome: CHROME_HEADER_ORDER,
  Edge: CHROME_HEADER_ORDER,
  Firefox: FIREFOX_HEADER_ORDER,
  Safari: SAFARI_HEADER_ORDER,
 };
 // ============================================================
 // FINGERPRINT GENERATION
 // ============================================================
 /**
 * Per workflow-12102025.md: Generate HTTP fingerprint for a session
 * Randomization is done once per session for consistency
 */
 export function generateHTTPFingerprint(browserType: BrowserType): HTTPFingerprint {
  // Per workflow-12102025.md: DNT randomized per session (~30%)
  const hasDNT = Math.random() < DNT_PROBABILITY;
  return {
    browserType,
    headers: {},  // Built dynamically per request
    headerOrder: HEADER_ORDERS[browserType],
    curlImpersonateBinary: CURL_IMPERSONATE_BINARIES[browserType],
    hasDNT,
  };
 }
 /**
 * Per workflow-12102025.md: Build complete headers for a request
 * Returns headers in browser-specific order
 */
 export function buildOrderedHeaders(
  fingerprint: HTTPFingerprint,
  context: HeaderContext
 ): { headers: Record<string, string>; orderedHeaders: string[] } {
  const { browserType, hasDNT, headerOrder } = fingerprint;
  const { userAgent, secChUa, secChUaPlatform, secChUaMobile, referer, isPost, contentLength } = context;
  // Per workflow-12102025.md: Natural randomization for Accept
  const accept = ACCEPT_VARIATIONS[Math.floor(Math.random() * ACCEPT_VARIATIONS.length)];
  const acceptLanguage = ACCEPT_LANGUAGE_VARIATIONS[Math.floor(Math.random() * ACCEPT_LANGUAGE_VARIATIONS.length)];
  // Build all possible headers
  const allHeaders: Record<string, string> = {
    'Connection': 'keep-alive',
    'User-Agent': userAgent,
    'Accept': accept,
    'Accept-Language': acceptLanguage,
    'Accept-Encoding': 'gzip, deflate, br',
  };
  // Per workflow-12102025.md: POST-only headers
  if (isPost) {
    allHeaders['Content-Type'] = 'application/json';
    allHeaders['Origin'] = 'https://dutchie.com';
    if (contentLength !== undefined) {
      allHeaders['Content-Length'] = String(contentLength);
    }
  }
  // Per workflow-12102025.md: Dynamic Referer per dispensary
  allHeaders['Referer'] = referer;
  // Per workflow-12102025.md: DNT randomized per session
  if (hasDNT) {
    allHeaders['DNT'] = '1';
  }
  // Per workflow-12102025.md: Chromium-only headers (Chrome, Edge)
  if (browserType === 'Chrome' || browserType === 'Edge') {
    if (secChUa) allHeaders['sec-ch-ua'] = secChUa;
    if (secChUaMobile) allHeaders['sec-ch-ua-mobile'] = secChUaMobile;
    if (secChUaPlatform) allHeaders['sec-ch-ua-platform'] = secChUaPlatform;
    allHeaders['sec-fetch-site'] = 'same-origin';
    allHeaders['sec-fetch-mode'] = 'cors';
    allHeaders['sec-fetch-dest'] = 'empty';
  }
  // Per workflow-12102025.md: Firefox has sec-fetch but no sec-ch
  if (browserType === 'Firefox') {
    allHeaders['sec-fetch-site'] = 'same-origin';
    allHeaders['sec-fetch-mode'] = 'cors';
    allHeaders['sec-fetch-dest'] = 'empty';
  }
  // Per workflow-12102025.md: Safari has no sec-* headers
  // Filter to only headers that exist and order them
  const orderedHeaders: string[] = [];
  const headers: Record<string, string> = {};
  for (const headerName of headerOrder) {
    if (allHeaders[headerName]) {
      orderedHeaders.push(headerName);
      headers[headerName] = allHeaders[headerName];
    }
  }
  return { headers, orderedHeaders };
 }
 /**
 * Per workflow-12102025.md: Build curl command arguments for headers
 * Headers are added in browser-specific order
 */
 export function buildCurlHeaderArgs(
  fingerprint: HTTPFingerprint,
  context: HeaderContext
 ): string[] {
  const { headers, orderedHeaders } = buildOrderedHeaders(fingerprint, context);
  const args: string[] = [];
  for (const headerName of orderedHeaders) {
    // Skip Host and Content-Length - curl handles these
    if (headerName === 'Host' || headerName === 'Content-Length') continue;
    args.push('-H', `${headerName}: ${headers[headerName]}`);
  }
  return args;
 }
 /**
 * Per workflow-12102025.md: Extract Referer from dispensary menu_url
 */
 export function buildRefererFromMenuUrl(menuUrl: string | null | undefined): string {
  if (!menuUrl) {
    return 'https://dutchie.com/';
  }
  // Extract slug from menu_url
  // Formats: /embedded-menu/<slug> or /dispensary/<slug> or full URL
  let slug: string | null = null;
  const embeddedMatch = menuUrl.match(/\/embedded-menu\/([^/?]+)/);
  const dispensaryMatch = menuUrl.match(/\/dispensary\/([^/?]+)/);
  if (embeddedMatch) {
    slug = embeddedMatch[1];
  } else if (dispensaryMatch) {
    slug = dispensaryMatch[1];
  }
  if (slug) {
    return `https://dutchie.com/dispensary/${slug}`;
  }
  return 'https://dutchie.com/';
 }
 /**
 * Per workflow-12102025.md: Get curl-impersonate binary for browser
 */
 export function getCurlBinary(browserType: BrowserType): string {
  return CURL_IMPERSONATE_BINARIES[browserType];
 }
 /**
 * Per workflow-12102025.md: Check if curl-impersonate is available
 */
 export function isCurlImpersonateAvailable(browserType: BrowserType): boolean {
  const binary = CURL_IMPERSONATE_BINARIES[browserType];
  try {
    const { execSync } = require('child_process');
    execSync(`which ${binary}`, { stdio: 'ignore' });
    return true;
  } catch {
    return false;
  }
 }
--- a/backend/src/_deprecated/services/intelligence-detector.ts
+++ b/backend/src/_deprecated/services/intelligence-detector.ts
--- a/backend/src/_deprecated/services/menu-provider-detector.ts
+++ b/backend/src/_deprecated/services/menu-provider-detector.ts
--- a/backend/src/services/puppeteer-preflight.ts
+++ b/backend/src/services/puppeteer-preflight.ts
@@ -1,290 +0,0 @@
 /**
 * Puppeteer Preflight - Verify browser-based transport works with anti-detect
 *
 * Uses Puppeteer + StealthPlugin to:
 * 1. Launch headless browser with stealth mode + PROXY
 * 2. Visit fingerprint.com demo to verify anti-detect and confirm proxy IP
 * 3. Establish session by visiting Dutchie embedded menu
 * 4. Make GraphQL request from browser context
 * 5. Verify we get a valid response (not blocked)
 *
 * Use case: Anti-detect scraping that needs real browser fingerprint through proxy
 *
 * Based on test-intercept.js which successfully captures 1000+ products
 */
 import { PreflightResult, CrawlRotator } from './crawl-rotator';
 // GraphQL hash for FilteredProducts query - MUST match CLAUDE.md
 const FILTERED_PRODUCTS_HASH = 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0';
 // Test dispensary - AZ-Deeply-Rooted (known working)
 const TEST_CNAME = 'AZ-Deeply-Rooted';
 const TEST_PLATFORM_ID = '6405ef617056e8014d79101b';
 // Anti-detect verification sites (primary + fallback)
 const FINGERPRINT_DEMO_URL = 'https://demo.fingerprint.com/';
 const AMIUNIQUE_URL = 'https://amiunique.org/fingerprint';
 // IP geolocation API for timezone lookup (free, no key required)
 const IP_API_URL = 'http://ip-api.com/json';
 /**
 * Look up timezone from IP address using ip-api.com
 * Returns IANA timezone (e.g., 'America/New_York') or null on failure
 */
 async function getTimezoneFromIp(ip: string): Promise<{ timezone: string; city?: string; region?: string } | null> {
  try {
    const axios = require('axios');
    const response = await axios.get(`${IP_API_URL}/${ip}?fields=status,timezone,city,regionName`, {
      timeout: 5000,
    });
    if (response.data?.status === 'success' && response.data?.timezone) {
      return {
        timezone: response.data.timezone,
        city: response.data.city,
        region: response.data.regionName,
      };
    }
    return null;
  } catch (err: any) {
    console.log(`[PuppeteerPreflight] IP geolocation lookup failed: ${err.message}`);
    return null;
  }
 }
 export interface PuppeteerPreflightResult extends PreflightResult {
  method: 'http';
  /** Number of products returned (proves API access) */
  productsReturned?: number;
  /** Browser user agent used */
  browserUserAgent?: string;
  /** Bot detection result from fingerprint.com */
  botDetection?: {
    detected: boolean;
    probability?: number;
    type?: string;
  };
  /** Expected proxy IP (from pool) */
  expectedProxyIp?: string;
  /** Whether IP verification passed (detected IP matches proxy) */
  ipVerified?: boolean;
  /** Detected timezone from IP geolocation */
  detectedTimezone?: string;
  /** Detected location from IP geolocation */
  detectedLocation?: {
    city?: string;
    region?: string;
  };
 }
 /**
 * Run Puppeteer preflight check with proxy
 * Tests browser-based access with anti-detect verification via fingerprint.com
 *
 * @param crawlRotator - CrawlRotator instance to get proxy from pool
 */
 export async function runPuppeteerPreflight(
  crawlRotator?: CrawlRotator
 ): Promise<PuppeteerPreflightResult> {
  const result: PuppeteerPreflightResult = {
    method: 'http',
    passed: false,
    proxyAvailable: false,
    proxyConnected: false,
    antidetectReady: false,
    proxyIp: null,
    fingerprint: null,
    error: null,
    responseTimeMs: null,
    productsReturned: 0,
    ipVerified: false,
  };
  let browser: any = null;
  try {
    // Step 0: Get a proxy from the pool
    let proxyUrl: string | null = null;
    let expectedProxyHost: string | null = null;
    if (crawlRotator) {
      const currentProxy = crawlRotator.proxy.getCurrent();
      if (currentProxy) {
        result.proxyAvailable = true;
        proxyUrl = crawlRotator.proxy.getProxyUrl(currentProxy);
        expectedProxyHost = currentProxy.host;
        result.expectedProxyIp = expectedProxyHost;
        console.log(`[PuppeteerPreflight] Using proxy: ${currentProxy.host}:${currentProxy.port}`);
      } else {
        result.error = 'No proxy available from pool';
        console.log(`[PuppeteerPreflight] FAILED - No proxy available`);
        return result;
      }
    } else {
      console.log(`[PuppeteerPreflight] WARNING: No CrawlRotator provided - using direct connection`);
      result.proxyAvailable = true; // No proxy needed for direct
    }
    // Dynamic imports to avoid loading Puppeteer unless needed
    const puppeteer = require('puppeteer-extra');
    const StealthPlugin = require('puppeteer-extra-plugin-stealth');
    puppeteer.use(StealthPlugin());
    const startTime = Date.now();
    // Build browser args
    const browserArgs = ['--no-sandbox', '--disable-setuid-sandbox'];
    if (proxyUrl) {
      // Extract host:port for Puppeteer (it handles auth separately)
      const proxyUrlParsed = new URL(proxyUrl);
      browserArgs.push(`--proxy-server=${proxyUrlParsed.host}`);
    }
    // Launch browser with stealth + proxy
    browser = await puppeteer.launch({
      headless: 'new',
      args: browserArgs,
    });
    const page = await browser.newPage();
    // If proxy has auth, set it up
    if (proxyUrl) {
      const proxyUrlParsed = new URL(proxyUrl);
      if (proxyUrlParsed.username && proxyUrlParsed.password) {
        await page.authenticate({
          username: decodeURIComponent(proxyUrlParsed.username),
          password: decodeURIComponent(proxyUrlParsed.password),
        });
      }
    }
    // Get browser user agent
    const userAgent = await page.evaluate(() => navigator.userAgent);
    result.browserUserAgent = userAgent;
    result.fingerprint = {
      userAgent,
      browserName: 'Chrome (Puppeteer)',
      deviceCategory: 'desktop',
    };
    // =========================================================================
    // STEP 1a: Get IP address directly via simple API (more reliable than scraping)
    // =========================================================================
    console.log(`[PuppeteerPreflight] Getting proxy IP address...`);
    try {
      const ipApiResponse = await page.evaluate(async () => {
        try {
          const response = await fetch('https://api.ipify.org?format=json');
          const data = await response.json();
          return { ip: data.ip, error: null };
        } catch (err: any) {
          return { ip: null, error: err.message };
        }
      });
      if (ipApiResponse.ip) {
        result.proxyIp = ipApiResponse.ip;
        result.proxyConnected = true;
        console.log(`[PuppeteerPreflight] Detected proxy IP: ${ipApiResponse.ip}`);
        // Look up timezone from IP
        const geoData = await getTimezoneFromIp(ipApiResponse.ip);
        if (geoData) {
          result.detectedTimezone = geoData.timezone;
          result.detectedLocation = { city: geoData.city, region: geoData.region };
          console.log(`[PuppeteerPreflight] IP Geolocation: ${geoData.city}, ${geoData.region} (${geoData.timezone})`);
          // Set browser timezone to match proxy location via CDP
          try {
            const client = await page.target().createCDPSession();
            await client.send('Emulation.setTimezoneOverride', { timezoneId: geoData.timezone });
            console.log(`[PuppeteerPreflight] Browser timezone set to: ${geoData.timezone}`);
          } catch (tzErr: any) {
            console.log(`[PuppeteerPreflight] Failed to set browser timezone: ${tzErr.message}`);
          }
        } else {
          console.log(`[PuppeteerPreflight] WARNING: Could not determine timezone from IP - timezone mismatch possible`);
        }
      } else {
        console.log(`[PuppeteerPreflight] IP lookup failed: ${ipApiResponse.error || 'unknown error'}`);
      }
    } catch (ipErr: any) {
      console.log(`[PuppeteerPreflight] IP API error: ${ipErr.message}`);
    }
    // =========================================================================
    // STEP 2: Preflight complete - proxy verified via ipify.org
    // We skip heavy fingerprint.com/amiunique.org tests - just verify proxy works
    // The actual Dutchie test happens at task time.
    // =========================================================================
    // If we got an IP from ipify.org, proxy is working
    if (result.proxyIp) {
      result.proxyConnected = true;
      result.antidetectReady = true; // Assume stealth plugin is working
    }
    result.responseTimeMs = Date.now() - startTime;
    // If we got here with proxyConnected=true and antidetectReady=true, we're good
    if (result.proxyConnected && result.antidetectReady) {
      result.passed = true;
      console.log(
        `[PuppeteerPreflight] PASSED - Proxy connected, anti-detect ready (${result.responseTimeMs}ms)`
      );
      if (result.proxyIp) {
        console.log(`[PuppeteerPreflight] Browser IP via proxy: ${result.proxyIp}`);
      }
    } else if (result.proxyConnected) {
      // Proxy works but anti-detect check failed - still pass (anti-detect is best-effort)
      result.passed = true;
      result.antidetectReady = true; // Assume ready since proxy works
      console.log(
        `[PuppeteerPreflight] PASSED - Proxy connected (anti-detect check skipped, ${result.responseTimeMs}ms)`
      );
    } else {
      result.error = result.error || 'Proxy connection failed';
      console.log(`[PuppeteerPreflight] FAILED - ${result.error}`);
    }
  } catch (err: any) {
    result.error = `Browser error: ${err.message || 'Unknown error'}`;
    console.log(`[PuppeteerPreflight] FAILED - ${result.error}`);
  } finally {
    if (browser) {
      await browser.close().catch(() => {});
    }
  }
  return result;
 }
 /**
 * Run Puppeteer preflight with retry
 * Retries once on failure to handle transient issues
 *
 * @param crawlRotator - CrawlRotator instance to get proxy from pool
 * @param maxRetries - Number of retry attempts (default 1)
 */
 export async function runPuppeteerPreflightWithRetry(
  crawlRotator?: CrawlRotator,
  maxRetries: number = 1
 ): Promise<PuppeteerPreflightResult> {
  let lastResult: PuppeteerPreflightResult | null = null;
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
    if (attempt > 0) {
      console.log(`[PuppeteerPreflight] Retry attempt ${attempt}/${maxRetries}...`);
      await new Promise((r) => setTimeout(r, 5000)); // Wait 5s between retries
    }
    lastResult = await runPuppeteerPreflight(crawlRotator);
    if (lastResult.passed) {
      return lastResult;
    }
  }
  return lastResult!;
 }
--- a/backend/src/services/scheduler.ts
+++ b/backend/src/services/scheduler.ts
@@ -1,38 +1,116 @@
-/**
+import cron from 'node-cron';
- * LEGACY SCHEDULER - DEPRECATED 2024-12-10
+import { pool } from '../db/pool';
- *
+import { scrapeStore, scrapeCategory } from '../scraper-v2';
- * DO NOT USE THIS FILE.
+
- *
+let scheduledJobs: cron.ScheduledTask[] = [];
- * Per TASK_WORKFLOW_2024-12-10.md:
+
- * This node-cron scheduler has been replaced by the database-driven
+async function getSettings(): Promise<{
- * task scheduler in src/services/task-scheduler.ts
+  scrapeIntervalHours: number;
- *
+  scrapeSpecialsTime: string;
- * The new scheduler:
+}> {
- * - Stores schedules in PostgreSQL (survives restarts)
+  const result = await pool.query(`
- * - Uses SELECT FOR UPDATE SKIP LOCKED (multi-replica safe)
+    SELECT key, value FROM settings
- * - Creates tasks in worker_tasks table (processed by task-worker.ts)
+    WHERE key IN ('scrape_interval_hours', 'scrape_specials_time')
- *
+  `);
- * This file is kept for reference only. All exports are no-ops.
+  
- * Legacy code has been removed - see git history for original implementation.
+  const settings: Record<string, string> = {};
- */
+  result.rows.forEach((row: { key: string; value: string }) => {
    settings[row.key] = row.value;
  });
  return {
    scrapeIntervalHours: parseInt(settings.scrape_interval_hours || '4'),
    scrapeSpecialsTime: settings.scrape_specials_time || '00:01'
  };
 }
 async function scrapeAllStores(): Promise<void> {
  console.log('🔄 Starting scheduled scrape for all stores...');
  const result = await pool.query(`
    SELECT id, name FROM stores WHERE active = true AND scrape_enabled = true
  `);
  for (const store of result.rows) {
    try {
      console.log(`Scraping store: ${store.name}`);
      await scrapeStore(store.id);
    } catch (error) {
      console.error(`Failed to scrape store ${store.name}:`, error);
    }
  }
  console.log('✅ Scheduled scrape completed');
 }
 async function scrapeSpecials(): Promise<void> {
  console.log('🌟 Starting scheduled specials scrape...');
  const result = await pool.query(`
    SELECT s.id, s.name, c.id as category_id
    FROM stores s
    JOIN categories c ON c.store_id = s.id
    WHERE s.active = true AND s.scrape_enabled = true
      AND c.slug = 'specials' AND c.scrape_enabled = true
  `);
  for (const row of result.rows) {
    try {
      console.log(`Scraping specials for: ${row.name}`);
      await scrapeCategory(row.id, row.category_id);
    } catch (error) {
      console.error(`Failed to scrape specials for ${row.name}:`, error);
    }
  }
  console.log('✅ Specials scrape completed');
 }
 // 2024-12-10: All functions are now no-ops
 export async function startScheduler(): Promise<void> {
-  console.warn('[DEPRECATED] startScheduler() called - use taskScheduler from task-scheduler.ts instead');
+  // Stop any existing jobs
  stopScheduler();
  const settings = await getSettings();
  // Schedule regular store scrapes (every N hours)
  const scrapeIntervalCron = `0 */${settings.scrapeIntervalHours} * * *`;
  const storeJob = cron.schedule(scrapeIntervalCron, scrapeAllStores);
  scheduledJobs.push(storeJob);
  console.log(`📅 Scheduled store scraping: every ${settings.scrapeIntervalHours} hours`);
  // Schedule specials scraping (daily at specified time)
  const [hours, minutes] = settings.scrapeSpecialsTime.split(':');
  const specialsCron = `${minutes} ${hours} * * *`;
  const specialsJob = cron.schedule(specialsCron, scrapeSpecials);
  scheduledJobs.push(specialsJob);
  console.log(`📅 Scheduled specials scraping: daily at ${settings.scrapeSpecialsTime}`);
  // Initial scrape on startup (after 10 seconds)
  setTimeout(() => {
    console.log('🚀 Running initial scrape...');
    scrapeAllStores().catch(console.error);
  }, 10000);
 }
 export function stopScheduler(): void {
-  console.warn('[DEPRECATED] stopScheduler() called - use taskScheduler from task-scheduler.ts instead');
+  scheduledJobs.forEach(job => job.stop());
  scheduledJobs = [];
  console.log('🛑 Scheduler stopped');
 }
 export async function restartScheduler(): Promise<void> {
-  console.warn('[DEPRECATED] restartScheduler() called - use taskScheduler from task-scheduler.ts instead');
+  console.log('🔄 Restarting scheduler...');
  stopScheduler();
  await startScheduler();
 }
-export async function triggerStoreScrape(_storeId: number): Promise<void> {
+// Manual trigger functions for admin
-  console.warn('[DEPRECATED] triggerStoreScrape() called - use taskService.createTask() instead');
+export async function triggerStoreScrape(storeId: number): Promise<void> {
  console.log(`🔧 Manual scrape triggered for store ID: ${storeId}`);
  await scrapeStore(storeId);
 }
 export async function triggerAllStoresScrape(): Promise<void> {
-  console.warn('[DEPRECATED] triggerAllStoresScrape() called - use taskScheduler.triggerSchedule() instead');
+  console.log('🔧 Manual scrape triggered for all stores');
  await scrapeAllStores();
 }
--- a/backend/src/_deprecated/services/scraper-debug.ts
+++ b/backend/src/_deprecated/services/scraper-debug.ts
--- a/backend/src/_deprecated/services/scraper.ts
+++ b/backend/src/_deprecated/services/scraper.ts
--- a/backend/src/services/task-scheduler.ts
+++ b/backend/src/services/task-scheduler.ts
@@ -1,526 +0,0 @@
 /**
 * Database-Driven Task Scheduler
 *
 * Per TASK_WORKFLOW_2024-12-10.md:
 * - Schedules stored in DB (survives restarts)
 * - Uses SELECT FOR UPDATE to prevent duplicate execution across replicas
 * - Polls every 60s to check if schedules are due
 * - Generates tasks into worker_tasks table for task-worker.ts to process
 *
 * 2024-12-10: Created to replace legacy node-cron scheduler
 */
 import { pool } from '../db/pool';
 import { taskService, TaskRole } from '../tasks/task-service';
 // Per TASK_WORKFLOW_2024-12-10.md: Poll interval for checking schedules
 const POLL_INTERVAL_MS = 60_000; // 60 seconds
 interface TaskSchedule {
  id: number;
  name: string;
  role: TaskRole;
  enabled: boolean;
  interval_hours: number;
  last_run_at: Date | null;
  next_run_at: Date | null;
  state_code: string | null;
  priority: number;
  method: 'curl' | 'http' | null;
  is_immutable: boolean;
  description: string | null;
  platform: string | null;
  last_task_count: number | null;
  last_error: string | null;
 }
 class TaskScheduler {
  private pollTimer: NodeJS.Timeout | null = null;
  private isRunning = false;
  /**
   * Start the scheduler
   * Per TASK_WORKFLOW_2024-12-10.md: Called on API server startup
   */
  async start(): Promise<void> {
    if (this.isRunning) {
      console.log('[TaskScheduler] Already running');
      return;
    }
    console.log('[TaskScheduler] Starting database-driven scheduler...');
    this.isRunning = true;
    // Per TASK_WORKFLOW_2024-12-10.md: On startup, recover stale tasks
    try {
      const recovered = await taskService.recoverStaleTasks(10);
      if (recovered > 0) {
        console.log(`[TaskScheduler] Recovered ${recovered} stale tasks from dead workers`);
      }
    } catch (err: any) {
      console.error('[TaskScheduler] Failed to recover stale tasks:', err.message);
    }
    // Per TASK_WORKFLOW_2024-12-10.md: Ensure default schedules exist
    await this.ensureDefaultSchedules();
    // Per TASK_WORKFLOW_2024-12-10.md: Check immediately on startup
    await this.checkAndRunDueSchedules();
    // Per TASK_WORKFLOW_2024-12-10.md: Then poll every 60 seconds
    this.pollTimer = setInterval(async () => {
      await this.checkAndRunDueSchedules();
    }, POLL_INTERVAL_MS);
    console.log('[TaskScheduler] Started - polling every 60s');
  }
  /**
   * Stop the scheduler
   */
  stop(): void {
    if (this.pollTimer) {
      clearInterval(this.pollTimer);
      this.pollTimer = null;
    }
    this.isRunning = false;
    console.log('[TaskScheduler] Stopped');
  }
  /**
   * Ensure default schedules exist in the database
   * Per TASK_WORKFLOW_2024-12-10.md: Creates schedules if they don't exist
   *
   * NOTE: Per-state product_discovery schedules are created by migration 089.
   * This only creates core immutable schedules that should exist regardless.
   */
  private async ensureDefaultSchedules(): Promise<void> {
    // Core schedules - all use HTTP transport for browser-based scraping
    const defaults = [
      {
        name: 'store_discovery_dutchie',
        role: 'store_discovery' as TaskRole,
        interval_hours: 168, // Weekly
        priority: 5,
        description: 'Discover new Dutchie stores weekly (HTTP transport)',
        method: 'http',
        is_immutable: true,
        platform: 'dutchie',
      },
      {
        name: 'analytics_refresh',
        role: 'analytics_refresh' as TaskRole,
        interval_hours: 6,
        priority: 0,
        description: 'Refresh analytics materialized views every 6 hours',
        method: 'http',
        is_immutable: true,
        platform: null,
      },
    ];
    for (const sched of defaults) {
      try {
        await pool.query(`
          INSERT INTO task_schedules (name, role, interval_hours, priority, description, method, is_immutable, platform, enabled, next_run_at)
          VALUES ($1, $2, $3, $4, $5, $6, $7, $8, true, NOW())
          ON CONFLICT (name) DO UPDATE SET
            method = EXCLUDED.method,
            is_immutable = EXCLUDED.is_immutable
        `, [sched.name, sched.role, sched.interval_hours, sched.priority, sched.description, sched.method, sched.is_immutable, sched.platform]);
      } catch (err: any) {
        // Table may not exist yet - will be created by migration
        if (!err.message.includes('does not exist')) {
          console.error(`[TaskScheduler] Failed to create default schedule ${sched.name}:`, err.message);
        }
      }
    }
  }
  /**
   * Check for and run any due schedules
   * Per TASK_WORKFLOW_2024-12-10.md: Uses SELECT FOR UPDATE SKIP LOCKED to prevent duplicates
   */
  private async checkAndRunDueSchedules(): Promise<void> {
    const client = await pool.connect();
    try {
      await client.query('BEGIN');
      // Per TASK_WORKFLOW_2024-12-10.md: Atomic claim of due schedules
      const result = await client.query<TaskSchedule>(`
        SELECT *
        FROM task_schedules
        WHERE enabled = true
          AND (next_run_at IS NULL OR next_run_at <= NOW())
        FOR UPDATE SKIP LOCKED
      `);
      for (const schedule of result.rows) {
        console.log(`[TaskScheduler] Running schedule: ${schedule.name} (${schedule.role})`);
        try {
          const tasksCreated = await this.executeSchedule(schedule);
          console.log(`[TaskScheduler] Schedule ${schedule.name} created ${tasksCreated} tasks`);
          // Per TASK_WORKFLOW_2024-12-10.md: Update last_run_at and calculate next_run_at
          await client.query(`
            UPDATE task_schedules
            SET
              last_run_at = NOW(),
              next_run_at = NOW() + ($1 || ' hours')::interval,
              last_task_count = $2,
              updated_at = NOW()
            WHERE id = $3
          `, [schedule.interval_hours, tasksCreated, schedule.id]);
        } catch (err: any) {
          console.error(`[TaskScheduler] Schedule ${schedule.name} failed:`, err.message);
          // Still update next_run_at to prevent infinite retry loop
          await client.query(`
            UPDATE task_schedules
            SET
              next_run_at = NOW() + ($1 || ' hours')::interval,
              last_error = $2,
              updated_at = NOW()
            WHERE id = $3
          `, [schedule.interval_hours, err.message, schedule.id]);
        }
      }
      await client.query('COMMIT');
    } catch (err: any) {
      await client.query('ROLLBACK');
      console.error('[TaskScheduler] Failed to check schedules:', err.message);
    } finally {
      client.release();
    }
  }
  /**
   * Execute a schedule and create tasks
   * Per TASK_WORKFLOW_2024-12-10.md: Different logic per role
   *
   * TRANSPORT MODES:
   * - All schedules now use HTTP transport (Puppeteer/browser)
   * - Per-state product_discovery schedules process one state at a time
   * - Workers must pass HTTP preflight to claim HTTP tasks
   */
  private async executeSchedule(schedule: TaskSchedule): Promise<number> {
    switch (schedule.role) {
      case 'product_discovery':
        // Per-state product discovery using HTTP transport
        return this.generateProductDiscoveryTasks(schedule);
      case 'payload_fetch':
        // DEPRECATED: Legacy payload_fetch redirects to product_discovery
        console.log(`[TaskScheduler] payload_fetch is deprecated, using product_discovery instead`);
        return this.generateProductDiscoveryTasks(schedule);
      case 'product_refresh':
        // DEPRECATED: Legacy product_refresh redirects to product_discovery
        console.log(`[TaskScheduler] product_refresh is deprecated, using product_discovery instead`);
        return this.generateProductDiscoveryTasks(schedule);
      case 'store_discovery':
        return this.generateStoreDiscoveryTasks(schedule);
      case 'analytics_refresh':
        return this.generateAnalyticsRefreshTasks(schedule);
      default:
        console.warn(`[TaskScheduler] Unknown role: ${schedule.role}`);
        return 0;
    }
  }
  /**
   * Generate product_discovery tasks for stores in a specific state
   * Uses HTTP transport (Puppeteer/browser) for all tasks
   *
   * Per-state scheduling allows:
   * - Different crawl frequencies per state (e.g., AZ=4h, MI=6h)
   * - Better rate limit management (one state at a time)
   * - Easier debugging and monitoring per state
   */
  private async generateProductDiscoveryTasks(schedule: TaskSchedule): Promise<number> {
    // state_code is required for per-state schedules
    if (!schedule.state_code) {
      console.warn(`[TaskScheduler] Schedule ${schedule.name} has no state_code, skipping`);
      return 0;
    }
    // Find stores in this state needing refresh
    const result = await pool.query(`
      SELECT d.id
      FROM dispensaries d
      JOIN states s ON d.state_id = s.id
      WHERE d.crawl_enabled = true
        AND d.platform_dispensary_id IS NOT NULL
        AND s.code = $1
        -- No pending/running product_discovery task already
        AND NOT EXISTS (
          SELECT 1 FROM worker_tasks t
          WHERE t.dispensary_id = d.id
            AND t.role = 'product_discovery'
            AND t.status IN ('pending', 'claimed', 'running')
        )
        -- Never fetched OR last fetch > interval ago
        AND (
          d.last_fetch_at IS NULL
          OR d.last_fetch_at < NOW() - ($2 || ' hours')::interval
        )
      ORDER BY d.last_fetch_at NULLS FIRST, d.id
    `, [schedule.state_code, schedule.interval_hours]);
    const dispensaryIds = result.rows.map((r: { id: number }) => r.id);
    if (dispensaryIds.length === 0) {
      console.log(`[TaskScheduler] No stores in ${schedule.state_code} need refresh`);
      return 0;
    }
    console.log(`[TaskScheduler] Creating ${dispensaryIds.length} product_discovery tasks for ${schedule.state_code}`);
    // Create product_discovery tasks with HTTP transport
    // Stagger by 15 seconds to prevent overwhelming proxies
    const { created } = await taskService.createStaggeredTasks(
      dispensaryIds,
      'product_discovery',
      15, // 15 seconds apart
      schedule.platform || 'dutchie',
      'http' // Force HTTP transport
    );
    return created;
  }
  /**
   * Generate store_discovery tasks
   * Uses HTTP transport (Puppeteer/browser) for browser-based discovery
   */
  private async generateStoreDiscoveryTasks(schedule: TaskSchedule): Promise<number> {
    // Check if discovery task already pending
    const existing = await taskService.listTasks({
      role: 'store_discovery',
      status: ['pending', 'claimed', 'running'],
      limit: 1,
    });
    if (existing.length > 0) {
      console.log('[TaskScheduler] Store discovery task already pending, skipping');
      return 0;
    }
    await taskService.createTask({
      role: 'store_discovery',
      platform: schedule.platform || 'dutchie',
      priority: schedule.priority,
      method: 'http', // Force HTTP transport for browser-based discovery
    });
    return 1;
  }
  /**
   * Generate analytics_refresh tasks
   * Per TASK_WORKFLOW_2024-12-10.md: Single task to refresh all MVs
   */
  private async generateAnalyticsRefreshTasks(schedule: TaskSchedule): Promise<number> {
    // Check if analytics task already pending
    const existing = await taskService.listTasks({
      role: 'analytics_refresh',
      status: ['pending', 'claimed', 'running'],
      limit: 1,
    });
    if (existing.length > 0) {
      console.log('[TaskScheduler] Analytics refresh task already pending, skipping');
      return 0;
    }
    await taskService.createTask({
      role: 'analytics_refresh',
      priority: schedule.priority,
    });
    return 1;
  }
  /**
   * Get all schedules for dashboard display
   * Returns schedules with full metadata including immutability flag
   */
  async getSchedules(): Promise<TaskSchedule[]> {
    try {
      const result = await pool.query(`
        SELECT
          id,
          name,
          role,
          enabled,
          interval_hours,
          last_run_at,
          next_run_at,
          state_code,
          priority,
          method,
          COALESCE(is_immutable, false) as is_immutable,
          description,
          platform,
          last_task_count,
          last_error,
          created_at,
          updated_at
        FROM task_schedules
        ORDER BY
          CASE role
            WHEN 'store_discovery' THEN 1
            WHEN 'product_discovery' THEN 2
            WHEN 'analytics_refresh' THEN 3
            ELSE 4
          END,
          state_code NULLS FIRST,
          name
      `);
      return result.rows as TaskSchedule[];
    } catch {
      return [];
    }
  }
  /**
   * Get a single schedule by ID
   */
  async getSchedule(id: number): Promise<TaskSchedule | null> {
    try {
      const result = await pool.query(`
        SELECT * FROM task_schedules WHERE id = $1
      `, [id]);
      return result.rows[0] as TaskSchedule || null;
    } catch {
      return null;
    }
  }
  /**
   * Update a schedule
   * Allows updating: enabled, interval_hours, priority
   * Does NOT allow updating: name, role, state_code, is_immutable
   */
  async updateSchedule(id: number, updates: Partial<TaskSchedule>): Promise<void> {
    const setClauses: string[] = [];
    const values: any[] = [];
    let paramIndex = 1;
    if (updates.enabled !== undefined) {
      setClauses.push(`enabled = $${paramIndex++}`);
      values.push(updates.enabled);
    }
    if (updates.interval_hours !== undefined) {
      setClauses.push(`interval_hours = $${paramIndex++}`);
      values.push(updates.interval_hours);
    }
    if (updates.priority !== undefined) {
      setClauses.push(`priority = $${paramIndex++}`);
      values.push(updates.priority);
    }
    if (setClauses.length === 0) return;
    setClauses.push('updated_at = NOW()');
    values.push(id);
    await pool.query(`
      UPDATE task_schedules
      SET ${setClauses.join(', ')}
      WHERE id = $${paramIndex}
    `, values);
  }
  /**
   * Delete a schedule (only if not immutable)
   * Returns true if deleted, false if immutable
   */
  async deleteSchedule(id: number): Promise<{ deleted: boolean; reason?: string }> {
    // Check if schedule is immutable
    const result = await pool.query(`
      SELECT name, is_immutable FROM task_schedules WHERE id = $1
    `, [id]);
    if (result.rows.length === 0) {
      return { deleted: false, reason: 'Schedule not found' };
    }
    const schedule = result.rows[0];
    if (schedule.is_immutable) {
      return {
        deleted: false,
        reason: `Schedule "${schedule.name}" is immutable and cannot be deleted. You can disable it instead.`
      };
    }
    await pool.query(`DELETE FROM task_schedules WHERE id = $1`, [id]);
    return { deleted: true };
  }
  /**
   * Trigger a schedule to run immediately
   */
  async triggerSchedule(id: number): Promise<number> {
    const result = await pool.query(`
      SELECT * FROM task_schedules WHERE id = $1
    `, [id]);
    if (result.rows.length === 0) {
      throw new Error(`Schedule ${id} not found`);
    }
    return this.executeSchedule(result.rows[0] as TaskSchedule);
  }
  /**
   * Get schedule statistics for dashboard
   */
  async getScheduleStats(): Promise<{
    total: number;
    enabled: number;
    byRole: Record<string, number>;
    byState: Record<string, number>;
  }> {
    try {
      const result = await pool.query(`
        SELECT
          COUNT(*)::int as total,
          SUM(CASE WHEN enabled THEN 1 ELSE 0 END)::int as enabled_count,
          role,
          state_code
        FROM task_schedules
        GROUP BY role, state_code
      `);
      let total = 0;
      let enabled = 0;
      const byRole: Record<string, number> = {};
      const byState: Record<string, number> = {};
      for (const row of result.rows) {
        total += row.total;
        enabled += row.enabled_count;
        byRole[row.role] = (byRole[row.role] || 0) + row.total;
        if (row.state_code) {
          byState[row.state_code] = (byState[row.state_code] || 0) + row.total;
        }
      }
      return { total, enabled, byRole, byState };
    } catch {
      return { total: 0, enabled: 0, byRole: {}, byState: {} };
    }
  }
 }
 // Per TASK_WORKFLOW_2024-12-10.md: Singleton instance
 export const taskScheduler = new TaskScheduler();
--- a/backend/src/system/routes/index.ts
+++ b/backend/src/system/routes/index.ts
@@ -1,30 +1,566 @@
 /**
- * System API Routes (Stub)
+ * System API Routes
 *
- * The full system routes depend on SyncOrchestrator which was moved to _deprecated.
+ * Provides REST API endpoints for system monitoring and control:
- * This stub provides empty routers to maintain backward compatibility.
+ * - /api/system/sync/* - Sync orchestrator
 * - /api/system/dlq/* - Dead-letter queue
 * - /api/system/integrity/* - Integrity checks
 * - /api/system/fix/* - Auto-fix routines
 * - /api/system/alerts/* - System alerts
 * - /metrics - Prometheus metrics
 *
- * Full implementation available at: src/_deprecated/system/routes/index.ts
+ * Phase 5: Full Production Sync + Monitoring
 */
 import { Router, Request, Response } from 'express';
 import { Pool } from 'pg';
-import { MetricsService } from '../services';
+import {
  SyncOrchestrator,
  MetricsService,
  DLQService,
  AlertService,
  IntegrityService,
  AutoFixService,
 } from '../services';
-export function createSystemRouter(_pool: Pool): Router {
+export function createSystemRouter(pool: Pool): Router {
  const router = Router();
-  // Stub - full sync/dlq/integrity/fix/alerts routes moved to _deprecated
+  // Initialize services
-  router.get('/status', (_req: Request, res: Response) => {
+  const metrics = new MetricsService(pool);
-    res.json({
+  const dlq = new DLQService(pool);
-      message: 'System routes temporarily disabled - see _deprecated/system/routes',
+  const alerts = new AlertService(pool);
-      status: 'stub',
+  const integrity = new IntegrityService(pool, alerts);
  const autoFix = new AutoFixService(pool, alerts);
  const orchestrator = new SyncOrchestrator(pool, metrics, dlq, alerts);
  // ============================================================
  // SYNC ORCHESTRATOR ENDPOINTS
  // ============================================================
  /**
   * GET /api/system/sync/status
   * Get current sync status
   */
  router.get('/sync/status', async (_req: Request, res: Response) => {
    try {
      const status = await orchestrator.getStatus();
      res.json(status);
    } catch (error) {
      console.error('[System] Sync status error:', error);
      res.status(500).json({ error: 'Failed to get sync status' });
    }
  });
  /**
   * POST /api/system/sync/run
   * Trigger a sync run
   */
  router.post('/sync/run', async (req: Request, res: Response) => {
    try {
      const triggeredBy = req.body.triggeredBy || 'api';
      const result = await orchestrator.runSync();
      res.json({
        success: true,
        triggeredBy,
        metrics: result,
      });
    } catch (error) {
      console.error('[System] Sync run error:', error);
      res.status(500).json({
        success: false,
        error: error instanceof Error ? error.message : 'Sync run failed',
      });
    }
  });
  /**
   * GET /api/system/sync/queue-depth
   * Get queue depth information
   */
  router.get('/sync/queue-depth', async (_req: Request, res: Response) => {
    try {
      const depth = await orchestrator.getQueueDepth();
      res.json(depth);
    } catch (error) {
      console.error('[System] Queue depth error:', error);
      res.status(500).json({ error: 'Failed to get queue depth' });
    }
  });
  /**
   * GET /api/system/sync/health
   * Get sync health status
   */
  router.get('/sync/health', async (_req: Request, res: Response) => {
    try {
      const health = await orchestrator.getHealth();
      res.status(health.healthy ? 200 : 503).json(health);
    } catch (error) {
      console.error('[System] Health check error:', error);
      res.status(500).json({ healthy: false, error: 'Health check failed' });
    }
  });
  /**
   * POST /api/system/sync/pause
   * Pause the orchestrator
   */
  router.post('/sync/pause', async (req: Request, res: Response) => {
    try {
      const reason = req.body.reason || 'Manual pause';
      await orchestrator.pause(reason);
      res.json({ success: true, message: 'Orchestrator paused' });
    } catch (error) {
      console.error('[System] Pause error:', error);
      res.status(500).json({ error: 'Failed to pause orchestrator' });
    }
  });
  /**
   * POST /api/system/sync/resume
   * Resume the orchestrator
   */
  router.post('/sync/resume', async (_req: Request, res: Response) => {
    try {
      await orchestrator.resume();
      res.json({ success: true, message: 'Orchestrator resumed' });
    } catch (error) {
      console.error('[System] Resume error:', error);
      res.status(500).json({ error: 'Failed to resume orchestrator' });
    }
  });
  // ============================================================
  // DLQ ENDPOINTS
  // ============================================================
  /**
   * GET /api/system/dlq
   * List DLQ payloads
   */
  router.get('/dlq', async (req: Request, res: Response) => {
    try {
      const options = {
        status: req.query.status as string,
        errorType: req.query.errorType as string,
        dispensaryId: req.query.dispensaryId ? parseInt(req.query.dispensaryId as string) : undefined,
        limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
        offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
      };
      const result = await dlq.listPayloads(options);
      res.json(result);
    } catch (error) {
      console.error('[System] DLQ list error:', error);
      res.status(500).json({ error: 'Failed to list DLQ payloads' });
    }
  });
  /**
   * GET /api/system/dlq/stats
   * Get DLQ statistics
   */
  router.get('/dlq/stats', async (_req: Request, res: Response) => {
    try {
      const stats = await dlq.getStats();
      res.json(stats);
    } catch (error) {
      console.error('[System] DLQ stats error:', error);
      res.status(500).json({ error: 'Failed to get DLQ stats' });
    }
  });
  /**
   * GET /api/system/dlq/summary
   * Get DLQ summary by error type
   */
  router.get('/dlq/summary', async (_req: Request, res: Response) => {
    try {
      const summary = await dlq.getSummary();
      res.json(summary);
    } catch (error) {
      console.error('[System] DLQ summary error:', error);
      res.status(500).json({ error: 'Failed to get DLQ summary' });
    }
  });
  /**
   * GET /api/system/dlq/:id
   * Get a specific DLQ payload
   */
  router.get('/dlq/:id', async (req: Request, res: Response) => {
    try {
      const payload = await dlq.getPayload(req.params.id);
      if (!payload) {
        return res.status(404).json({ error: 'Payload not found' });
      }
      res.json(payload);
    } catch (error) {
      console.error('[System] DLQ get error:', error);
      res.status(500).json({ error: 'Failed to get DLQ payload' });
    }
  });
  /**
   * POST /api/system/dlq/:id/retry
   * Retry a DLQ payload
   */
  router.post('/dlq/:id/retry', async (req: Request, res: Response) => {
    try {
      const result = await dlq.retryPayload(req.params.id);
      if (result.success) {
        res.json(result);
      } else {
        res.status(400).json(result);
      }
    } catch (error) {
      console.error('[System] DLQ retry error:', error);
      res.status(500).json({ error: 'Failed to retry payload' });
    }
  });
  /**
   * POST /api/system/dlq/:id/abandon
   * Abandon a DLQ payload
   */
  router.post('/dlq/:id/abandon', async (req: Request, res: Response) => {
    try {
      const reason = req.body.reason || 'Manually abandoned';
      const abandonedBy = req.body.abandonedBy || 'api';
      const success = await dlq.abandonPayload(req.params.id, reason, abandonedBy);
      res.json({ success });
    } catch (error) {
      console.error('[System] DLQ abandon error:', error);
      res.status(500).json({ error: 'Failed to abandon payload' });
    }
  });
  /**
   * POST /api/system/dlq/bulk-retry
   * Bulk retry payloads by error type
   */
  router.post('/dlq/bulk-retry', async (req: Request, res: Response) => {
    try {
      const { errorType } = req.body;
      if (!errorType) {
        return res.status(400).json({ error: 'errorType is required' });
      }
      const result = await dlq.bulkRetryByErrorType(errorType);
      res.json(result);
    } catch (error) {
      console.error('[System] DLQ bulk retry error:', error);
      res.status(500).json({ error: 'Failed to bulk retry' });
    }
  });
  // ============================================================
  // INTEGRITY CHECK ENDPOINTS
  // ============================================================
  /**
   * POST /api/system/integrity/run
   * Run all integrity checks
   */
  router.post('/integrity/run', async (req: Request, res: Response) => {
    try {
      const triggeredBy = req.body.triggeredBy || 'api';
      const result = await integrity.runAllChecks(triggeredBy);
      res.json(result);
    } catch (error) {
      console.error('[System] Integrity run error:', error);
      res.status(500).json({ error: 'Failed to run integrity checks' });
    }
  });
  /**
   * GET /api/system/integrity/runs
   * Get recent integrity check runs
   */
  router.get('/integrity/runs', async (req: Request, res: Response) => {
    try {
      const limit = req.query.limit ? parseInt(req.query.limit as string) : 10;
      const runs = await integrity.getRecentRuns(limit);
      res.json(runs);
    } catch (error) {
      console.error('[System] Integrity runs error:', error);
      res.status(500).json({ error: 'Failed to get integrity runs' });
    }
  });
  /**
   * GET /api/system/integrity/runs/:runId
   * Get results for a specific integrity run
   */
  router.get('/integrity/runs/:runId', async (req: Request, res: Response) => {
    try {
      const results = await integrity.getRunResults(req.params.runId);
      res.json(results);
    } catch (error) {
      console.error('[System] Integrity run results error:', error);
      res.status(500).json({ error: 'Failed to get run results' });
    }
  });
  // ============================================================
  // AUTO-FIX ENDPOINTS
  // ============================================================
  /**
   * GET /api/system/fix/routines
   * Get available fix routines
   */
  router.get('/fix/routines', (_req: Request, res: Response) => {
    try {
      const routines = autoFix.getAvailableRoutines();
      res.json(routines);
    } catch (error) {
      console.error('[System] Get routines error:', error);
      res.status(500).json({ error: 'Failed to get routines' });
    }
  });
  /**
   * POST /api/system/fix/:routine
   * Run a fix routine
   */
  router.post('/fix/:routine', async (req: Request, res: Response) => {
    try {
      const routineName = req.params.routine;
      const dryRun = req.body.dryRun === true;
      const triggeredBy = req.body.triggeredBy || 'api';
      const result = await autoFix.runRoutine(routineName as any, triggeredBy, { dryRun });
      res.json(result);
    } catch (error) {
      console.error('[System] Fix routine error:', error);
      res.status(500).json({ error: 'Failed to run fix routine' });
    }
  });
  /**
   * GET /api/system/fix/runs
   * Get recent fix runs
   */
  router.get('/fix/runs', async (req: Request, res: Response) => {
    try {
      const limit = req.query.limit ? parseInt(req.query.limit as string) : 20;
      const runs = await autoFix.getRecentRuns(limit);
      res.json(runs);
    } catch (error) {
      console.error('[System] Fix runs error:', error);
      res.status(500).json({ error: 'Failed to get fix runs' });
    }
  });
  // ============================================================
  // ALERTS ENDPOINTS
  // ============================================================
  /**
   * GET /api/system/alerts
   * List alerts
   */
  router.get('/alerts', async (req: Request, res: Response) => {
    try {
      const options = {
        status: req.query.status as any,
        severity: req.query.severity as any,
        type: req.query.type as string,
        limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
        offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
      };
      const result = await alerts.listAlerts(options);
      res.json(result);
    } catch (error) {
      console.error('[System] Alerts list error:', error);
      res.status(500).json({ error: 'Failed to list alerts' });
    }
  });
  /**
   * GET /api/system/alerts/active
   * Get active alerts
   */
  router.get('/alerts/active', async (_req: Request, res: Response) => {
    try {
      const activeAlerts = await alerts.getActiveAlerts();
      res.json(activeAlerts);
    } catch (error) {
      console.error('[System] Active alerts error:', error);
      res.status(500).json({ error: 'Failed to get active alerts' });
    }
  });
  /**
   * GET /api/system/alerts/summary
   * Get alert summary
   */
  router.get('/alerts/summary', async (_req: Request, res: Response) => {
    try {
      const summary = await alerts.getSummary();
      res.json(summary);
    } catch (error) {
      console.error('[System] Alerts summary error:', error);
      res.status(500).json({ error: 'Failed to get alerts summary' });
    }
  });
  /**
   * POST /api/system/alerts/:id/acknowledge
   * Acknowledge an alert
   */
  router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
    try {
      const alertId = parseInt(req.params.id);
      const acknowledgedBy = req.body.acknowledgedBy || 'api';
      const success = await alerts.acknowledgeAlert(alertId, acknowledgedBy);
      res.json({ success });
    } catch (error) {
      console.error('[System] Acknowledge alert error:', error);
      res.status(500).json({ error: 'Failed to acknowledge alert' });
    }
  });
  /**
   * POST /api/system/alerts/:id/resolve
   * Resolve an alert
   */
  router.post('/alerts/:id/resolve', async (req: Request, res: Response) => {
    try {
      const alertId = parseInt(req.params.id);
      const resolvedBy = req.body.resolvedBy || 'api';
      const success = await alerts.resolveAlert(alertId, resolvedBy);
      res.json({ success });
    } catch (error) {
      console.error('[System] Resolve alert error:', error);
      res.status(500).json({ error: 'Failed to resolve alert' });
    }
  });
  /**
   * POST /api/system/alerts/bulk-acknowledge
   * Bulk acknowledge alerts
   */
  router.post('/alerts/bulk-acknowledge', async (req: Request, res: Response) => {
    try {
      const { ids, acknowledgedBy } = req.body;
      if (!ids || !Array.isArray(ids)) {
        return res.status(400).json({ error: 'ids array is required' });
      }
      const count = await alerts.bulkAcknowledge(ids, acknowledgedBy || 'api');
      res.json({ acknowledged: count });
    } catch (error) {
      console.error('[System] Bulk acknowledge error:', error);
      res.status(500).json({ error: 'Failed to bulk acknowledge' });
    }
  });
  // ============================================================
  // METRICS ENDPOINTS
  // ============================================================
  /**
   * GET /api/system/metrics
   * Get all current metrics
   */
  router.get('/metrics', async (_req: Request, res: Response) => {
    try {
      const allMetrics = await metrics.getAllMetrics();
      res.json(allMetrics);
    } catch (error) {
      console.error('[System] Metrics error:', error);
      res.status(500).json({ error: 'Failed to get metrics' });
    }
  });
  /**
   * GET /api/system/metrics/:name
   * Get a specific metric
   */
  router.get('/metrics/:name', async (req: Request, res: Response) => {
    try {
      const metric = await metrics.getMetric(req.params.name);
      if (!metric) {
        return res.status(404).json({ error: 'Metric not found' });
      }
      res.json(metric);
    } catch (error) {
      console.error('[System] Metric error:', error);
      res.status(500).json({ error: 'Failed to get metric' });
    }
  });
  /**
   * GET /api/system/metrics/:name/history
   * Get metric time series
   */
  router.get('/metrics/:name/history', async (req: Request, res: Response) => {
    try {
      const hours = req.query.hours ? parseInt(req.query.hours as string) : 24;
      const history = await metrics.getMetricHistory(req.params.name, hours);
      res.json(history);
    } catch (error) {
      console.error('[System] Metric history error:', error);
      res.status(500).json({ error: 'Failed to get metric history' });
    }
  });
  /**
   * GET /api/system/errors
   * Get error summary
   */
  router.get('/errors', async (_req: Request, res: Response) => {
    try {
      const summary = await metrics.getErrorSummary();
      res.json(summary);
    } catch (error) {
      console.error('[System] Error summary error:', error);
      res.status(500).json({ error: 'Failed to get error summary' });
    }
  });
  /**
   * GET /api/system/errors/recent
   * Get recent errors
   */
  router.get('/errors/recent', async (req: Request, res: Response) => {
    try {
      const limit = req.query.limit ? parseInt(req.query.limit as string) : 50;
      const errorType = req.query.type as string;
      const errors = await metrics.getRecentErrors(limit, errorType);
      res.json(errors);
    } catch (error) {
      console.error('[System] Recent errors error:', error);
      res.status(500).json({ error: 'Failed to get recent errors' });
    }
  });
  /**
   * POST /api/system/errors/acknowledge
   * Acknowledge errors
   */
  router.post('/errors/acknowledge', async (req: Request, res: Response) => {
    try {
      const { ids, acknowledgedBy } = req.body;
      if (!ids || !Array.isArray(ids)) {
        return res.status(400).json({ error: 'ids array is required' });
      }
      const count = await metrics.acknowledgeErrors(ids, acknowledgedBy || 'api');
      res.json({ acknowledged: count });
    } catch (error) {
      console.error('[System] Acknowledge errors error:', error);
      res.status(500).json({ error: 'Failed to acknowledge errors' });
    }
  });
  return router;
 }
 /**
 * Create Prometheus metrics endpoint (standalone)
 */
 export function createPrometheusRouter(pool: Pool): Router {
  const router = Router();
  const metrics = new MetricsService(pool);
--- a/backend/src/system/services/index.ts
+++ b/backend/src/system/services/index.ts
@@ -4,7 +4,7 @@
 * Phase 5: Full Production Sync + Monitoring
 */
-// SyncOrchestrator moved to _deprecated (depends on hydration module)
+export { SyncOrchestrator, type SyncStatus, type QueueDepth, type SyncRunMetrics, type OrchestratorStatus } from './sync-orchestrator';
 export { MetricsService, ERROR_TYPES, type Metric, type MetricTimeSeries, type ErrorBucket, type ErrorType } from './metrics';
 export { DLQService, type DLQPayload, type DLQStats } from './dlq';
 export { AlertService, type SystemAlert, type AlertSummary, type AlertSeverity, type AlertStatus } from './alerts';
--- a/backend/src/_deprecated/system/services/sync-orchestrator.ts
+++ b/backend/src/_deprecated/system/services/sync-orchestrator.ts
--- a/backend/src/tasks/handlers/entry-point-discovery.ts
+++ b/backend/src/tasks/handlers/entry-point-discovery.ts
@@ -94,8 +94,7 @@ export async function handleEntryPointDiscovery(ctx: TaskContext): Promise<TaskR
    // ============================================================
    // STEP 3: Start stealth session
    // ============================================================
-    // Per workflow-12102025.md: session identity comes from proxy location, not task params
+    const session = startSession(dispensary.state || 'AZ', 'America/Phoenix');
    const session = startSession();
    console.log(`[EntryPointDiscovery] Session started: ${session.sessionId}`);
    try {
--- a/backend/src/tasks/handlers/index.ts
+++ b/backend/src/tasks/handlers/index.ts
@@ -2,18 +2,10 @@
 * Task Handlers Index
 *
 * Exports all task handlers for the task worker.
 *
 * Product Discovery:
 * - handleProductDiscoveryCurl: curl/axios based (for curl transport)
 * - handleProductDiscoveryHttp: Puppeteer browser-based (for http transport)
 */
 export { handleProductDiscovery as handleProductDiscoveryCurl } from './product-discovery-curl';
 export { handleProductDiscoveryHttp } from './product-discovery-http';
 export { handlePayloadFetch as handlePayloadFetchCurl } from './payload-fetch-curl';
 export { handleProductRefresh } from './product-refresh';
 export { handleProductDiscovery } from './product-discovery';
 export { handleStoreDiscovery } from './store-discovery';
 export { handleStoreDiscoveryHttp } from './store-discovery-http';
 export { handleEntryPointDiscovery } from './entry-point-discovery';
 export { handleAnalyticsRefresh } from './analytics-refresh';
 export { handleWhoami } from './whoami';
--- a/backend/src/tasks/handlers/payload-fetch-curl.ts
+++ b/backend/src/tasks/handlers/payload-fetch-curl.ts
@@ -1,221 +0,0 @@
 /**
 * Payload Fetch Handler
 *
 * Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing.
 *
 * This handler ONLY:
 * 1. Hits Dutchie GraphQL API
 * 2. Saves raw payload to filesystem (gzipped)
 * 3. Records metadata in raw_crawl_payloads table
 * 4. Queues a product_refresh task to process the payload
 *
 * Benefits of separation:
 * - Retry-friendly: If normalize fails, re-run refresh without re-crawling
 * - Faster refreshes: Local file read vs network call
 * - Replay-able: Run refresh against any historical payload
 * - Less API pressure: Only this role hits Dutchie
 */
 import { TaskContext, TaskResult } from '../task-worker';
 import {
  executeGraphQL,
  startSession,
  endSession,
  GRAPHQL_HASHES,
  DUTCHIE_CONFIG,
 } from '../../platforms/dutchie';
 import { saveRawPayload } from '../../utils/payload-storage';
 import { taskService } from '../task-service';
 export async function handlePayloadFetch(ctx: TaskContext): Promise<TaskResult> {
  const { pool, task } = ctx;
  const dispensaryId = task.dispensary_id;
  if (!dispensaryId) {
    return { success: false, error: 'No dispensary_id specified for payload_fetch task' };
  }
  try {
    // ============================================================
    // STEP 1: Load dispensary info
    // ============================================================
    const dispResult = await pool.query(`
      SELECT
        id, name, platform_dispensary_id, menu_url, menu_type, city, state
      FROM dispensaries
      WHERE id = $1 AND crawl_enabled = true
    `, [dispensaryId]);
    if (dispResult.rows.length === 0) {
      return { success: false, error: `Dispensary ${dispensaryId} not found or not crawl_enabled` };
    }
    const dispensary = dispResult.rows[0];
    const platformId = dispensary.platform_dispensary_id;
    if (!platformId) {
      return { success: false, error: `Dispensary ${dispensaryId} has no platform_dispensary_id` };
    }
    // Extract cName from menu_url
    const cNameMatch = dispensary.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
    const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
    console.log(`[PayloadFetch] Starting fetch for ${dispensary.name} (ID: ${dispensaryId})`);
    console.log(`[PayloadFetch] Platform ID: ${platformId}, cName: ${cName}`);
    // ============================================================
    // STEP 2: Start stealth session
    // ============================================================
    const session = startSession();
    console.log(`[PayloadFetch] Session started: ${session.sessionId}`);
    await ctx.heartbeat();
    // ============================================================
    // STEP 3: Fetch products via GraphQL (Status: 'All')
    // ============================================================
    const allProducts: any[] = [];
    let page = 0;
    let totalCount = 0;
    const perPage = DUTCHIE_CONFIG.perPage;
    const maxPages = DUTCHIE_CONFIG.maxPages;
    try {
      while (page < maxPages) {
        const variables = {
          includeEnterpriseSpecials: false,
          productsFilter: {
            dispensaryId: platformId,
            pricingType: 'rec',
            Status: 'All',
            types: [],
            useCache: false,
            isDefaultSort: true,
            sortBy: 'popularSortIdx',
            sortDirection: 1,
            bypassOnlineThresholds: true,
            isKioskMenu: false,
            removeProductsBelowOptionThresholds: false,
          },
          page,
          perPage,
        };
        console.log(`[PayloadFetch] Fetching page ${page + 1}...`);
        const result = await executeGraphQL(
          'FilteredProducts',
          variables,
          GRAPHQL_HASHES.FilteredProducts,
          { cName, maxRetries: 3 }
        );
        const data = result?.data?.filteredProducts;
        if (!data || !data.products) {
          if (page === 0) {
            throw new Error('No product data returned from GraphQL');
          }
          break;
        }
        const products = data.products;
        allProducts.push(...products);
        if (page === 0) {
          totalCount = data.queryInfo?.totalCount || products.length;
          console.log(`[PayloadFetch] Total products reported: ${totalCount}`);
        }
        if (allProducts.length >= totalCount || products.length < perPage) {
          break;
        }
        page++;
        if (page < maxPages) {
          await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs));
        }
        if (page % 5 === 0) {
          await ctx.heartbeat();
        }
      }
      console.log(`[PayloadFetch] Fetched ${allProducts.length} products in ${page + 1} pages`);
    } finally {
      endSession();
    }
    if (allProducts.length === 0) {
      return {
        success: false,
        error: 'No products returned from GraphQL',
        productsProcessed: 0,
      };
    }
    await ctx.heartbeat();
    // ============================================================
    // STEP 4: Save raw payload to filesystem
    // Per TASK_WORKFLOW_2024-12-10.md: Metadata/Payload separation
    // ============================================================
    const rawPayload = {
      dispensaryId,
      platformId,
      cName,
      fetchedAt: new Date().toISOString(),
      productCount: allProducts.length,
      products: allProducts,
    };
    const payloadResult = await saveRawPayload(
      pool,
      dispensaryId,
      rawPayload,
      null, // crawl_run_id - not using crawl_runs in new system
      allProducts.length
    );
    console.log(`[PayloadFetch] Saved payload #${payloadResult.id} (${(payloadResult.sizeBytes / 1024).toFixed(1)}KB)`);
    // ============================================================
    // STEP 5: Update dispensary last_fetch_at
    // ============================================================
    await pool.query(`
      UPDATE dispensaries
      SET last_fetch_at = NOW()
      WHERE id = $1
    `, [dispensaryId]);
    // ============================================================
    // STEP 6: Queue product_refresh task to process the payload
    // Per TASK_WORKFLOW_2024-12-10.md: Task chaining
    // ============================================================
    await taskService.createTask({
      role: 'product_refresh',
      dispensary_id: dispensaryId,
      priority: task.priority || 0,
      payload: { payload_id: payloadResult.id },
    });
    console.log(`[PayloadFetch] Queued product_refresh task for payload #${payloadResult.id}`);
    return {
      success: true,
      payloadId: payloadResult.id,
      productCount: allProducts.length,
      sizeBytes: payloadResult.sizeBytes,
    };
  } catch (error: unknown) {
    const errorMessage = error instanceof Error ? error.message : 'Unknown error';
    console.error(`[PayloadFetch] Error for dispensary ${dispensaryId}:`, errorMessage);
    return {
      success: false,
      error: errorMessage,
    };
  }
 }
--- a/backend/src/tasks/handlers/product-discovery-curl.ts
+++ b/backend/src/tasks/handlers/product-discovery-curl.ts
@@ -1,31 +0,0 @@
 /**
 * Product Discovery Handler
 *
 * Per TASK_WORKFLOW_2024-12-10.md: Initial product fetch for newly discovered stores.
 *
 * Flow:
 * 1. Triggered after store_discovery promotes a new dispensary
 * 2. Chains to payload_fetch to get initial product data
 * 3. payload_fetch chains to product_refresh for DB upsert
 *
 * Chaining:
 * store_discovery → (newStoreIds) → product_discovery → payload_fetch → product_refresh
 */
 import { TaskContext, TaskResult } from '../task-worker';
 import { handlePayloadFetch } from './payload-fetch-curl';
 export async function handleProductDiscovery(ctx: TaskContext): Promise<TaskResult> {
  const { task } = ctx;
  const dispensaryId = task.dispensary_id;
  if (!dispensaryId) {
    return { success: false, error: 'No dispensary_id provided' };
  }
  console.log(`[ProductDiscovery] Starting initial product discovery for dispensary ${dispensaryId}`);
  // Per TASK_WORKFLOW_2024-12-10.md: Chain to payload_fetch for API → disk
  // payload_fetch will then chain to product_refresh for disk → DB
  return handlePayloadFetch(ctx);
 }
--- a/Show More
+++ b/Show More