fix(preflight): Correct parameter order and add IP/fingerprint reporting

- Fix update_worker_preflight call to use correct parameter order: (worker_id, transport, status, ip, response_ms, error, fingerprint) - Add proxyIp to both curl and http preflight reports - Add fingerprint JSONB with timezone, location, and bot detection data - Log HTTP IP and timezone after preflight completes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Merge pull request 'feat(workers): Preflight phase 1 - Schema, StatefulSet, and timezone matching' (#55 ) from feat/preflight-phase1-schema into master
2025-12-12 00:32:45 -07:00 · 2025-12-12 07:30:53 +00:00 · 2025-12-12 00:25:39 -07:00 · 2025-12-12 07:14:40 +00:00 · 2025-12-11 23:45:04 -07:00 · 2025-12-12 06:22:06 +00:00
280 changed files with 45883 additions and 5292 deletions
--- a/.woodpecker/.ci.yml
+++ b/.woodpecker/.ci.yml
@@ -1,38 +1,76 @@
-when:
-  - event: [push, pull_request]
-
 steps:
-  # Build checks
+  # ===========================================
+  # PR VALIDATION: Parallel type checks (PRs only)
+  # ===========================================
  typecheck-backend:
-    image: node:20
+    image: code.cannabrands.app/creationshop/node:20
    commands:
      - cd backend
-      - npm ci
-      - npx tsc --noEmit || true
+      - npm ci --prefer-offline
+      - npx tsc --noEmit
+    depends_on: []
+    when:
+      event: pull_request

-  build-cannaiq:
-    image: node:20
+  typecheck-cannaiq:
+    image: code.cannabrands.app/creationshop/node:20
    commands:
      - cd cannaiq
-      - npm ci
+      - npm ci --prefer-offline
      - npx tsc --noEmit
-      - npm run build
+    depends_on: []
+    when:
+      event: pull_request

-  build-findadispo:
-    image: node:20
+  typecheck-findadispo:
+    image: code.cannabrands.app/creationshop/node:20
    commands:
      - cd findadispo/frontend
-      - npm ci
-      - npm run build
+      - npm ci --prefer-offline
+      - npx tsc --noEmit 2>/dev/null || true
+    depends_on: []
+    when:
+      event: pull_request

-  build-findagram:
-    image: node:20
+  typecheck-findagram:
+    image: code.cannabrands.app/creationshop/node:20
    commands:
      - cd findagram/frontend
-      - npm ci
-      - npm run build
+      - npm ci --prefer-offline
+      - npx tsc --noEmit 2>/dev/null || true
+    depends_on: []
+    when:
+      event: pull_request

-  # Docker builds - only on master
+  # ===========================================
+  # AUTO-MERGE: Merge PR after all checks pass
+  # ===========================================
+  auto-merge:
+    image: alpine:latest
+    environment:
+      GITEA_TOKEN:
+        from_secret: gitea_token
+    commands:
+      - apk add --no-cache curl
+      - |
+        echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
+        curl -s -X POST \
+          -H "Authorization: token $GITEA_TOKEN" \
+          -H "Content-Type: application/json" \
+          -d '{"Do":"merge"}' \
+          "https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
+    depends_on:
+      - typecheck-backend
+      - typecheck-cannaiq
+      - typecheck-findadispo
+      - typecheck-findagram
+    when:
+      event: pull_request
+
+  # ===========================================
+  # MASTER DEPLOY: Parallel Docker builds
+  # NOTE: cache_from/cache_to removed due to plugin bug splitting on commas
+  # ===========================================
  docker-backend:
    image: woodpeckerci/plugin-docker-buildx
    settings:
@@ -49,6 +87,12 @@ steps:
        from_secret: registry_password
      platforms: linux/amd64
      provenance: false
+      build_args:
+        APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
+        APP_GIT_SHA: ${CI_COMMIT_SHA}
+        APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
+        CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
+    depends_on: []
    when:
      branch: master
      event: push
@@ -69,6 +113,7 @@ steps:
        from_secret: registry_password
      platforms: linux/amd64
      provenance: false
+    depends_on: []
    when:
      branch: master
      event: push
@@ -89,6 +134,7 @@ steps:
        from_secret: registry_password
      platforms: linux/amd64
      provenance: false
+    depends_on: []
    when:
      branch: master
      event: push
@@ -109,32 +155,40 @@ steps:
        from_secret: registry_password
      platforms: linux/amd64
      provenance: false
+    depends_on: []
    when:
      branch: master
      event: push

-  # Deploy to Kubernetes
+  # ===========================================
+  # STAGE 3: Deploy and Run Migrations
+  # ===========================================
  deploy:
    image: bitnami/kubectl:latest
    environment:
      KUBECONFIG_CONTENT:
        from_secret: kubeconfig_data
    commands:
-      - echo "Deploying to Kubernetes..."
      - mkdir -p ~/.kube
      - echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
      - chmod 600 ~/.kube/config
+      # Deploy backend first
      - kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
-      - kubectl set image deployment/scraper-worker scraper-worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
+      - kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
+      # Note: Migrations run automatically at startup via auto-migrate
+      # Deploy remaining services
+      # Resilience: ensure workers are scaled up if at 0
+      - REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
+      - kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
      - kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
-      - kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
-      - kubectl rollout status deployment/scraper-worker -n dispensary-scraper --timeout=300s
      - kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
-      - kubectl rollout status deployment/findadispo-frontend -n dispensary-scraper --timeout=120s
-      - kubectl rollout status deployment/findagram-frontend -n dispensary-scraper --timeout=120s
-      - echo "All deployments complete!"
+    depends_on:
+      - docker-backend
+      - docker-cannaiq
+      - docker-findadispo
+      - docker-findagram
    when:
      branch: master
      event: push
--- a/.woodpecker/ci.yml
+++ b/.woodpecker/ci.yml
@@ -0,0 +1,191 @@
+steps:
+  # ===========================================
+  # PR VALIDATION: Only typecheck changed projects
+  # ===========================================
+  typecheck-backend:
+    image: code.cannabrands.app/creationshop/node:20
+    commands:
+      - npm config set cache /npm-cache/backend --global
+      - cd backend
+      - npm ci --prefer-offline
+      - npx tsc --noEmit
+    volumes:
+      - npm-cache:/npm-cache
+    depends_on: []
+    when:
+      event: pull_request
+      path:
+        include: ['backend/**']
+
+  typecheck-cannaiq:
+    image: code.cannabrands.app/creationshop/node:20
+    commands:
+      - npm config set cache /npm-cache/cannaiq --global
+      - cd cannaiq
+      - npm ci --prefer-offline
+      - npx tsc --noEmit
+    volumes:
+      - npm-cache:/npm-cache
+    depends_on: []
+    when:
+      event: pull_request
+      path:
+        include: ['cannaiq/**']
+
+  # findadispo/findagram typechecks skipped - they have || true anyway
+
+  # ===========================================
+  # AUTO-MERGE: Merge PR after all checks pass
+  # ===========================================
+  auto-merge:
+    image: alpine:latest
+    environment:
+      GITEA_TOKEN:
+        from_secret: gitea_token
+    commands:
+      - apk add --no-cache curl
+      - |
+        echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
+        curl -s -X POST \
+          -H "Authorization: token $GITEA_TOKEN" \
+          -H "Content-Type: application/json" \
+          -d '{"Do":"merge"}' \
+          "https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
+    depends_on:
+      - typecheck-backend
+      - typecheck-cannaiq
+    when:
+      event: pull_request
+
+  # ===========================================
+  # MASTER DEPLOY: Parallel Docker builds
+  # ===========================================
+  docker-backend:
+    image: woodpeckerci/plugin-docker-buildx
+    settings:
+      registry: code.cannabrands.app
+      repo: code.cannabrands.app/creationshop/dispensary-scraper
+      tags:
+        - latest
+        - ${CI_COMMIT_SHA:0:8}
+      dockerfile: backend/Dockerfile
+      context: backend
+      username:
+        from_secret: registry_username
+      password:
+        from_secret: registry_password
+      platforms: linux/amd64
+      provenance: false
+      cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
+      cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
+      build_args:
+        APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
+        APP_GIT_SHA: ${CI_COMMIT_SHA}
+        APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
+        CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
+    depends_on: []
+    when:
+      branch: master
+      event: push
+
+  docker-cannaiq:
+    image: woodpeckerci/plugin-docker-buildx
+    settings:
+      registry: code.cannabrands.app
+      repo: code.cannabrands.app/creationshop/cannaiq-frontend
+      tags:
+        - latest
+        - ${CI_COMMIT_SHA:0:8}
+      dockerfile: cannaiq/Dockerfile
+      context: cannaiq
+      username:
+        from_secret: registry_username
+      password:
+        from_secret: registry_password
+      platforms: linux/amd64
+      provenance: false
+      cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
+      cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
+    depends_on: []
+    when:
+      branch: master
+      event: push
+
+  docker-findadispo:
+    image: woodpeckerci/plugin-docker-buildx
+    settings:
+      registry: code.cannabrands.app
+      repo: code.cannabrands.app/creationshop/findadispo-frontend
+      tags:
+        - latest
+        - ${CI_COMMIT_SHA:0:8}
+      dockerfile: findadispo/frontend/Dockerfile
+      context: findadispo/frontend
+      username:
+        from_secret: registry_username
+      password:
+        from_secret: registry_password
+      platforms: linux/amd64
+      provenance: false
+      cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
+      cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
+    depends_on: []
+    when:
+      branch: master
+      event: push
+
+  docker-findagram:
+    image: woodpeckerci/plugin-docker-buildx
+    settings:
+      registry: code.cannabrands.app
+      repo: code.cannabrands.app/creationshop/findagram-frontend
+      tags:
+        - latest
+        - ${CI_COMMIT_SHA:0:8}
+      dockerfile: findagram/frontend/Dockerfile
+      context: findagram/frontend
+      username:
+        from_secret: registry_username
+      password:
+        from_secret: registry_password
+      platforms: linux/amd64
+      provenance: false
+      cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
+      cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
+    depends_on: []
+    when:
+      branch: master
+      event: push
+
+  # ===========================================
+  # STAGE 3: Deploy and Run Migrations
+  # ===========================================
+  deploy:
+    image: bitnami/kubectl:latest
+    environment:
+      KUBECONFIG_CONTENT:
+        from_secret: kubeconfig_data
+    commands:
+      - mkdir -p ~/.kube
+      - echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
+      - chmod 600 ~/.kube/config
+      # Deploy backend first
+      - kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
+      - kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
+      # Note: Migrations run automatically at startup via auto-migrate
+      # Deploy remaining services
+      # Resilience: ensure workers are scaled up if at 0
+      - REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
+      - kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
+      - kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
+      - kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
+      - kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
+      - kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
+    depends_on:
+      - docker-backend
+      - docker-cannaiq
+      - docker-findadispo
+      - docker-findagram
+    when:
+      branch: master
+      event: push
--- a/CLAUDE.md
+++ b/CLAUDE.md
--- a/backend/.gitignore
+++ b/backend/.gitignore
@@ -0,0 +1,3 @@
+
+# IP2Location database (downloaded separately)
+data/ip2location/
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -1,17 +1,17 @@
 # Build stage
 # Image: code.cannabrands.app/creationshop/dispensary-scraper
-FROM node:20-slim AS builder
+FROM code.cannabrands.app/creationshop/node:20-slim AS builder

 WORKDIR /app

 COPY package*.json ./
-RUN npm ci
+RUN npm install

 COPY . .
 RUN npm run build

 # Production stage
-FROM node:20-slim
+FROM code.cannabrands.app/creationshop/node:20-slim

 # Build arguments for version info
 ARG APP_BUILD_VERSION=dev
@@ -25,8 +25,9 @@ ENV APP_GIT_SHA=${APP_GIT_SHA}
 ENV APP_BUILD_TIME=${APP_BUILD_TIME}
 ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}

-# Install Chromium dependencies
+# Install Chromium dependencies and curl for HTTP requests
 RUN apt-get update && apt-get install -y \
+    curl \
    chromium \
    fonts-liberation \
    libnss3 \
@@ -43,10 +44,13 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
 WORKDIR /app

 COPY package*.json ./
-RUN npm ci --omit=dev
+RUN npm install --omit=dev

 COPY --from=builder /app/dist ./dist

+# Copy migrations for auto-migrate on startup
+COPY migrations ./migrations
+
 # Create local images directory for when MinIO is not configured
 RUN mkdir -p /app/public/images/products

--- a/backend/docs/CODEBASE_MAP.md
+++ b/backend/docs/CODEBASE_MAP.md
@@ -0,0 +1,218 @@
+# CannaiQ Backend Codebase Map
+
+**Last Updated:** 2025-12-12
+**Purpose:** Help Claude and developers understand which code is current vs deprecated
+
+---
+
+## Quick Reference: What to Use
+
+### For Crawling/Scraping
+| Task | Use This | NOT This |
+|------|----------|----------|
+| Fetch products | `src/tasks/handlers/payload-fetch.ts` | `src/hydration/*` |
+| Process products | `src/tasks/handlers/product-refresh.ts` | `src/scraper-v2/*` |
+| GraphQL client | `src/platforms/dutchie/client.ts` | `src/dutchie-az/services/graphql-client.ts` |
+| Worker system | `src/tasks/task-worker.ts` | `src/dutchie-az/services/worker.ts` |
+
+### For Database
+| Task | Use This | NOT This |
+|------|----------|----------|
+| Get DB pool | `src/db/pool.ts` | `src/dutchie-az/db/connection.ts` |
+| Run migrations | `src/db/migrate.ts` (CLI only) | Never import at runtime |
+| Query products | `store_products` table | `products`, `dutchie_products` |
+| Query stores | `dispensaries` table | `stores` table |
+
+### For Discovery
+| Task | Use This |
+|------|----------|
+| Discover stores | `src/discovery/*.ts` |
+| Run discovery | `npx tsx src/scripts/run-discovery.ts` |
+
+---
+
+## Directory Status
+
+### ACTIVE DIRECTORIES (Use These)
+
+```
+src/
+├── auth/               # JWT/session auth, middleware
+├── db/                 # Database pool, migrations
+├── discovery/          # Dutchie store discovery pipeline
+├── middleware/         # Express middleware
+├── multi-state/        # Multi-state query support
+├── platforms/          # Platform-specific clients (Dutchie, Jane, etc)
+│   └── dutchie/        # THE Dutchie client - use this one
+├── routes/             # Express API routes
+├── services/           # Core services (logger, scheduler, etc)
+├── tasks/              # Task system (workers, handlers, scheduler)
+│   └── handlers/       # Task handlers (payload_fetch, product_refresh, etc)
+├── types/              # TypeScript types
+└── utils/              # Utilities (storage, image processing)
+```
+
+### DEPRECATED DIRECTORIES (DO NOT USE)
+
+```
+src/
+├── hydration/          # DEPRECATED - Old pipeline approach
+├── scraper-v2/         # DEPRECATED - Old scraper engine
+├── canonical-hydration/# DEPRECATED - Merged into tasks/handlers
+├── dutchie-az/         # PARTIAL - Some parts deprecated, some active
+│   ├── db/             # DEPRECATED - Use src/db/pool.ts
+│   └── services/       # PARTIAL - worker.ts still runs, graphql-client.ts deprecated
+├── portals/            # FUTURE - Not yet implemented
+├── seo/                # PARTIAL - Settings work, templates WIP
+└── system/             # DEPRECATED - Old orchestration system
+```
+
+### DEPRECATED FILES (DO NOT USE)
+
+```
+src/dutchie-az/db/connection.ts      # Use src/db/pool.ts instead
+src/dutchie-az/services/graphql-client.ts  # Use src/platforms/dutchie/client.ts
+src/hydration/*.ts                   # Entire directory deprecated
+src/scraper-v2/*.ts                  # Entire directory deprecated
+```
+
+---
+
+## Key Files Reference
+
+### Entry Points
+| File | Purpose | Status |
+|------|---------|--------|
+| `src/index.ts` | Main Express server | ACTIVE |
+| `src/dutchie-az/services/worker.ts` | Worker process entry | ACTIVE |
+| `src/tasks/task-worker.ts` | Task worker (new system) | ACTIVE |
+
+### Dutchie Integration
+| File | Purpose | Status |
+|------|---------|--------|
+| `src/platforms/dutchie/client.ts` | GraphQL client, hashes, curl | **PRIMARY** |
+| `src/platforms/dutchie/queries.ts` | High-level query functions | ACTIVE |
+| `src/platforms/dutchie/index.ts` | Re-exports | ACTIVE |
+
+### Task Handlers
+| File | Purpose | Status |
+|------|---------|--------|
+| `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
+| `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
+| `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
+| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs | ACTIVE |
+| `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
+
+### Database
+| File | Purpose | Status |
+|------|---------|--------|
+| `src/db/pool.ts` | Canonical DB pool | **PRIMARY** |
+| `src/db/migrate.ts` | Migration runner (CLI only) | CLI ONLY |
+| `src/db/auto-migrate.ts` | Auto-run migrations on startup | ACTIVE |
+
+### Configuration
+| File | Purpose | Status |
+|------|---------|--------|
+| `.env` | Environment variables | ACTIVE |
+| `package.json` | Dependencies | ACTIVE |
+| `tsconfig.json` | TypeScript config | ACTIVE |
+
+---
+
+## GraphQL Hashes (CRITICAL)
+
+The correct hashes are in `src/platforms/dutchie/client.ts`:
+
+```typescript
+export const GRAPHQL_HASHES = {
+  FilteredProducts: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0',
+  GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
+  ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
+  GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
+};
+```
+
+**ALWAYS** use `Status: 'Active'` for FilteredProducts (not `null` or `'All'`).
+
+---
+
+## Scripts Reference
+
+### Useful Scripts (in `src/scripts/`)
+| Script | Purpose |
+|--------|---------|
+| `run-discovery.ts` | Run Dutchie discovery |
+| `crawl-single-store.ts` | Test crawl a single store |
+| `test-dutchie-graphql.ts` | Test GraphQL queries |
+
+### One-Off Scripts (probably don't need)
+| Script | Purpose |
+|--------|---------|
+| `harmonize-az-dispensaries.ts` | One-time data cleanup |
+| `bootstrap-stores-for-dispensaries.ts` | One-time migration |
+| `backfill-*.ts` | Historical backfill scripts |
+
+---
+
+## API Routes
+
+### Active Routes (in `src/routes/`)
+| Route File | Mount Point | Purpose |
+|------------|-------------|---------|
+| `auth.ts` | `/api/auth` | Login/logout/session |
+| `stores.ts` | `/api/stores` | Store CRUD |
+| `dashboard.ts` | `/api/dashboard` | Dashboard stats |
+| `workers.ts` | `/api/workers` | Worker monitoring |
+| `pipeline.ts` | `/api/pipeline` | Crawl triggers |
+| `discovery.ts` | `/api/discovery` | Discovery management |
+| `analytics.ts` | `/api/analytics` | Analytics queries |
+| `wordpress.ts` | `/api/v1/wordpress` | WordPress plugin API |
+
+---
+
+## Documentation Files
+
+### Current Docs (in `backend/docs/`)
+| Doc | Purpose | Currency |
+|-----|---------|----------|
+| `TASK_WORKFLOW_2024-12-10.md` | Task system architecture | CURRENT |
+| `WORKER_TASK_ARCHITECTURE.md` | Worker/task design | CURRENT |
+| `CRAWL_PIPELINE.md` | Crawl pipeline overview | CURRENT |
+| `ORGANIC_SCRAPING_GUIDE.md` | Browser-based scraping | CURRENT |
+| `CODEBASE_MAP.md` | This file | CURRENT |
+| `ANALYTICS_V2_EXAMPLES.md` | Analytics API examples | CURRENT |
+| `BRAND_INTELLIGENCE_API.md` | Brand API docs | CURRENT |
+
+### Root Docs
+| Doc | Purpose | Currency |
+|-----|---------|----------|
+| `CLAUDE.md` | Claude instructions | **PRIMARY** |
+| `README.md` | Project overview | NEEDS UPDATE |
+
+---
+
+## Common Mistakes to Avoid
+
+1. **Don't use `src/hydration/`** - It's an old approach that was superseded by the task system
+
+2. **Don't use `src/dutchie-az/db/connection.ts`** - Use `src/db/pool.ts` instead
+
+3. **Don't import `src/db/migrate.ts` at runtime** - It will crash. Only use for CLI migrations.
+
+4. **Don't query `stores` table** - It's empty. Use `dispensaries`.
+
+5. **Don't query `products` table** - It's empty. Use `store_products`.
+
+6. **Don't use wrong GraphQL hash** - Always get hash from `GRAPHQL_HASHES` in client.ts
+
+7. **Don't use `Status: null`** - It returns 0 products. Use `Status: 'Active'`.
+
+---
+
+## When in Doubt
+
+1. Check if the file is imported in `src/index.ts` - if not, it may be deprecated
+2. Check the last modified date - older files may be stale
+3. Look for `DEPRECATED` comments in the code
+4. Ask: "Is there a newer version of this in `src/tasks/` or `src/platforms/`?"
+5. Read the relevant doc in `docs/` before modifying code
--- a/backend/docs/_archive/ANALYTICS_RUNBOOK.md
+++ b/backend/docs/_archive/ANALYTICS_RUNBOOK.md
--- a/backend/docs/_archive/ANALYTICS_V2_EXAMPLES.md
+++ b/backend/docs/_archive/ANALYTICS_V2_EXAMPLES.md
--- a/backend/docs/_archive/BRAND_INTELLIGENCE_API.md
+++ b/backend/docs/_archive/BRAND_INTELLIGENCE_API.md
@@ -0,0 +1,394 @@
+# Brand Intelligence API
+
+## Endpoint
+
+```
+GET /api/analytics/v2/brand/:name/intelligence
+```
+
+## Query Parameters
+
+| Param | Type | Default | Description |
+|-------|------|---------|-------------|
+| `window` | `7d\|30d\|90d` | `30d` | Time window for trend calculations |
+| `state` | string | - | Filter by state code (e.g., `AZ`) |
+| `category` | string | - | Filter by category (e.g., `Flower`) |
+
+## Response Payload Schema
+
+```typescript
+interface BrandIntelligenceResult {
+  brand_name: string;
+  window: '7d' | '30d' | '90d';
+  generated_at: string;  // ISO timestamp when data was computed
+
+  performance_snapshot: PerformanceSnapshot;
+  alerts: Alerts;
+  sku_performance: SkuPerformance[];
+  retail_footprint: RetailFootprint;
+  competitive_landscape: CompetitiveLandscape;
+  inventory_health: InventoryHealth;
+  promo_performance: PromoPerformance;
+}
+```
+
+---
+
+## Section 1: Performance Snapshot
+
+Summary cards with key brand metrics.
+
+```typescript
+interface PerformanceSnapshot {
+  active_skus: number;              // Total products in catalog
+  total_revenue_30d: number | null; // Estimated from qty × price
+  total_stores: number;             // Active retail partners
+  new_stores_30d: number;           // New distribution in window
+  market_share: number | null;      // % of category SKUs
+  avg_wholesale_price: number | null;
+  price_position: 'premium' | 'value' | 'competitive';
+}
+```
+
+**UI Label Mapping:**
+| Field | User-Facing Label | Helper Text |
+|-------|-------------------|-------------|
+| `active_skus` | Active Products | X total in catalog |
+| `total_revenue_30d` | Monthly Revenue | Estimated from sales |
+| `total_stores` | Retail Distribution | Active retail partners |
+| `new_stores_30d` | New Opportunities | X new in last 30 days |
+| `market_share` | Category Position | % of category |
+| `avg_wholesale_price` | Avg Wholesale | Per unit |
+| `price_position` | Pricing Tier | Premium/Value/Market Rate |
+
+---
+
+## Section 2: Alerts
+
+Issues requiring attention.
+
+```typescript
+interface Alerts {
+  lost_stores_30d_count: number;
+  lost_skus_30d_count: number;
+  competitor_takeover_count: number;
+  avg_oos_duration_days: number | null;
+  avg_reorder_lag_days: number | null;
+  items: AlertItem[];
+}
+
+interface AlertItem {
+  type: 'lost_store' | 'delisted_sku' | 'shelf_loss' | 'extended_oos';
+  severity: 'critical' | 'warning';
+  store_name?: string;
+  product_name?: string;
+  competitor_brand?: string;
+  days_since?: number;
+  state_code?: string;
+}
+```
+
+**UI Label Mapping:**
+| Field | User-Facing Label |
+|-------|-------------------|
+| `lost_stores_30d_count` | Accounts at Risk |
+| `lost_skus_30d_count` | Delisted SKUs |
+| `competitor_takeover_count` | Shelf Losses |
+| `avg_oos_duration_days` | Avg Stockout Length |
+| `avg_reorder_lag_days` | Avg Restock Time |
+| `severity: critical` | Urgent |
+| `severity: warning` | Watch |
+
+---
+
+## Section 3: SKU Performance (Product Velocity)
+
+How fast each SKU sells.
+
+```typescript
+interface SkuPerformance {
+  store_product_id: number;
+  product_name: string;
+  category: string | null;
+  daily_velocity: number;        // Units/day estimate
+  velocity_status: 'hot' | 'steady' | 'slow' | 'stale';
+  retail_price: number | null;
+  on_sale: boolean;
+  stores_carrying: number;
+  stock_status: 'in_stock' | 'low_stock' | 'out_of_stock';
+}
+```
+
+**UI Label Mapping:**
+| Field | User-Facing Label |
+|-------|-------------------|
+| `daily_velocity` | Daily Rate |
+| `velocity_status` | Momentum |
+| `velocity_status: hot` | Hot |
+| `velocity_status: steady` | Steady |
+| `velocity_status: slow` | Slow |
+| `velocity_status: stale` | Stale |
+| `retail_price` | Retail Price |
+| `on_sale` | Promo (badge) |
+
+**Velocity Thresholds:**
+- `hot`: >= 5 units/day
+- `steady`: >= 1 unit/day
+- `slow`: >= 0.1 units/day
+- `stale`: < 0.1 units/day
+
+---
+
+## Section 4: Retail Footprint
+
+Store placement and coverage.
+
+```typescript
+interface RetailFootprint {
+  total_stores: number;
+  in_stock_count: number;
+  out_of_stock_count: number;
+  penetration_by_region: RegionPenetration[];
+  whitespace_stores: WhitespaceStore[];
+}
+
+interface RegionPenetration {
+  state_code: string;
+  store_count: number;
+  percent_reached: number;    // % of state's dispensaries
+  in_stock: number;
+  out_of_stock: number;
+}
+
+interface WhitespaceStore {
+  store_id: number;
+  store_name: string;
+  state_code: string;
+  city: string | null;
+  category_fit: number;       // How many competing brands they carry
+  competitor_brands: string[];
+}
+```
+
+**UI Label Mapping:**
+| Field | User-Facing Label |
+|-------|-------------------|
+| `penetration_by_region` | Market Coverage by Region |
+| `percent_reached` | X% reached |
+| `in_stock` | X stocked |
+| `out_of_stock` | X out |
+| `whitespace_stores` | Expansion Opportunities |
+| `category_fit` | X fit |
+
+---
+
+## Section 5: Competitive Landscape
+
+Market positioning vs competitors.
+
+```typescript
+interface CompetitiveLandscape {
+  brand_price_position: 'premium' | 'value' | 'competitive';
+  market_share_trend: MarketSharePoint[];
+  competitors: Competitor[];
+  head_to_head_skus: HeadToHead[];
+}
+
+interface MarketSharePoint {
+  date: string;
+  share_percent: number;
+}
+
+interface Competitor {
+  brand_name: string;
+  store_overlap_percent: number;
+  price_position: 'premium' | 'value' | 'competitive';
+  avg_price: number | null;
+  sku_count: number;
+}
+
+interface HeadToHead {
+  product_name: string;
+  brand_price: number;
+  competitor_brand: string;
+  competitor_price: number;
+  price_diff_percent: number;
+}
+```
+
+**UI Label Mapping:**
+| Field | User-Facing Label |
+|-------|-------------------|
+| `price_position: premium` | Premium Tier |
+| `price_position: value` | Value Leader |
+| `price_position: competitive` | Market Rate |
+| `market_share_trend` | Share of Shelf Trend |
+| `head_to_head_skus` | Price Comparison |
+| `store_overlap_percent` | X% store overlap |
+
+---
+
+## Section 6: Inventory Health
+
+Stock projections and risk levels.
+
+```typescript
+interface InventoryHealth {
+  critical_count: number;      // <7 days stock
+  warning_count: number;       // 7-14 days stock
+  healthy_count: number;       // 14-90 days stock
+  overstocked_count: number;   // >90 days stock
+  skus: InventorySku[];
+  overstock_alert: OverstockItem[];
+}
+
+interface InventorySku {
+  store_product_id: number;
+  product_name: string;
+  store_name: string;
+  days_of_stock: number | null;
+  risk_level: 'critical' | 'elevated' | 'moderate' | 'healthy';
+  current_quantity: number | null;
+  daily_sell_rate: number | null;
+}
+
+interface OverstockItem {
+  product_name: string;
+  store_name: string;
+  excess_units: number;
+  days_of_stock: number;
+}
+```
+
+**UI Label Mapping:**
+| Field | User-Facing Label |
+|-------|-------------------|
+| `risk_level: critical` | Reorder Now |
+| `risk_level: elevated` | Low Stock |
+| `risk_level: moderate` | Monitor |
+| `risk_level: healthy` | Healthy |
+| `critical_count` | Urgent (<7 days) |
+| `warning_count` | Low (7-14 days) |
+| `overstocked_count` | Excess (>90 days) |
+| `days_of_stock` | X days remaining |
+| `overstock_alert` | Overstock Alert |
+| `excess_units` | X excess units |
+
+---
+
+## Section 7: Promotion Effectiveness
+
+How promotions impact sales.
+
+```typescript
+interface PromoPerformance {
+  avg_baseline_velocity: number | null;
+  avg_promo_velocity: number | null;
+  avg_velocity_lift: number | null;     // % increase during promo
+  avg_efficiency_score: number | null;  // ROI proxy
+  promotions: Promotion[];
+}
+
+interface Promotion {
+  product_name: string;
+  store_name: string;
+  status: 'active' | 'scheduled' | 'ended';
+  start_date: string;
+  end_date: string | null;
+  regular_price: number;
+  promo_price: number;
+  discount_percent: number;
+  baseline_velocity: number | null;
+  promo_velocity: number | null;
+  velocity_lift: number | null;
+  efficiency_score: number | null;
+}
+```
+
+**UI Label Mapping:**
+| Field | User-Facing Label |
+|-------|-------------------|
+| `avg_baseline_velocity` | Normal Rate |
+| `avg_promo_velocity` | During Promos |
+| `avg_velocity_lift` | Avg Sales Lift |
+| `avg_efficiency_score` | ROI Score |
+| `velocity_lift` | Sales Lift |
+| `efficiency_score` | ROI Score |
+| `status: active` | Live |
+| `status: scheduled` | Scheduled |
+| `status: ended` | Ended |
+
+---
+
+## Example Queries
+
+### Get full payload
+```javascript
+const response = await fetch('/api/analytics/v2/brand/Wyld/intelligence?window=30d');
+const data = await response.json();
+```
+
+### Extract summary cards (flattened)
+```javascript
+const { performance_snapshot: ps, alerts } = data;
+
+const summaryCards = {
+  activeProducts: ps.active_skus,
+  monthlyRevenue: ps.total_revenue_30d,
+  retailDistribution: ps.total_stores,
+  newOpportunities: ps.new_stores_30d,
+  categoryPosition: ps.market_share,
+  avgWholesale: ps.avg_wholesale_price,
+  pricingTier: ps.price_position,
+  accountsAtRisk: alerts.lost_stores_30d_count,
+  delistedSkus: alerts.lost_skus_30d_count,
+  shelfLosses: alerts.competitor_takeover_count,
+};
+```
+
+### Get top 10 fastest selling SKUs
+```javascript
+const topSkus = data.sku_performance
+  .filter(sku => sku.velocity_status === 'hot' || sku.velocity_status === 'steady')
+  .sort((a, b) => b.daily_velocity - a.daily_velocity)
+  .slice(0, 10);
+```
+
+### Get critical inventory alerts only
+```javascript
+const criticalInventory = data.inventory_health.skus
+  .filter(sku => sku.risk_level === 'critical');
+```
+
+### Get states with <50% penetration
+```javascript
+const underPenetrated = data.retail_footprint.penetration_by_region
+  .filter(region => region.percent_reached < 50)
+  .sort((a, b) => a.percent_reached - b.percent_reached);
+```
+
+### Get active promotions with positive lift
+```javascript
+const effectivePromos = data.promo_performance.promotions
+  .filter(p => p.status === 'active' && p.velocity_lift > 0)
+  .sort((a, b) => b.velocity_lift - a.velocity_lift);
+```
+
+### Build chart data for market share trend
+```javascript
+const chartData = data.competitive_landscape.market_share_trend.map(point => ({
+  x: new Date(point.date),
+  y: point.share_percent,
+}));
+```
+
+---
+
+## Notes for Frontend Implementation
+
+1. **All fields are snake_case** - transform to camelCase if needed
+2. **Null values are possible** - handle gracefully in UI
+3. **Arrays may be empty** - show appropriate empty states
+4. **Timestamps are ISO format** - parse with `new Date()`
+5. **Percentages are already computed** - no need to multiply by 100
+6. **The `window` parameter affects trend calculations** - 7d/30d/90d
--- a/backend/docs/_archive/CRAWL_PIPELINE.md
+++ b/backend/docs/_archive/CRAWL_PIPELINE.md
@@ -0,0 +1,539 @@
+# Crawl Pipeline Documentation
+
+## Overview
+
+The crawl pipeline fetches product data from Dutchie dispensary menus and stores it in the canonical database. This document covers the complete flow from task scheduling to data storage.
+
+---
+
+## Pipeline Stages
+
+```
+┌─────────────────────┐
+│  store_discovery    │  Find new dispensaries
+└─────────┬───────────┘
+          │
+          ▼
+┌─────────────────────┐
+│ entry_point_discovery│  Resolve slug → platform_dispensary_id
+└─────────┬───────────┘
+          │
+          ▼
+┌─────────────────────┐
+│  product_discovery  │  Initial product crawl
+└─────────┬───────────┘
+          │
+          ▼
+┌─────────────────────┐
+│   product_resync    │  Recurring crawl (every 4 hours)
+└─────────────────────┘
+```
+
+---
+
+## Stage Details
+
+### 1. Store Discovery
+**Purpose:** Find new dispensaries to crawl
+
+**Handler:** `src/tasks/handlers/store-discovery.ts`
+
+**Flow:**
+1. Query Dutchie `ConsumerDispensaries` GraphQL for cities/states
+2. Extract dispensary info (name, address, menu_url)
+3. Insert into `dutchie_discovery_locations`
+4. Queue `entry_point_discovery` for each new location
+
+---
+
+### 2. Entry Point Discovery
+**Purpose:** Resolve menu URL slug to platform_dispensary_id (MongoDB ObjectId)
+
+**Handler:** `src/tasks/handlers/entry-point-discovery.ts`
+
+**Flow:**
+1. Load dispensary from database
+2. Extract slug from `menu_url`:
+   - `/embedded-menu/<slug>` or `/dispensary/<slug>`
+3. Start stealth session (fingerprint + proxy)
+4. Query `resolveDispensaryIdWithDetails(slug)` via GraphQL
+5. Update dispensary with `platform_dispensary_id`
+6. Queue `product_discovery` task
+
+**Example:**
+```
+menu_url: https://dutchie.com/embedded-menu/deeply-rooted
+slug: deeply-rooted
+platform_dispensary_id: 6405ef617056e8014d79101b
+```
+
+---
+
+### 3. Product Discovery
+**Purpose:** Initial crawl of a new dispensary
+
+**Handler:** `src/tasks/handlers/product-discovery.ts`
+
+Same as product_resync but for first-time crawls.
+
+---
+
+### 4. Product Resync
+**Purpose:** Recurring crawl to capture price/stock changes
+
+**Handler:** `src/tasks/handlers/product-resync.ts`
+
+**Flow:**
+
+#### Step 1: Load Dispensary Info
+```sql
+SELECT id, name, platform_dispensary_id, menu_url, state
+FROM dispensaries
+WHERE id = $1 AND crawl_enabled = true
+```
+
+#### Step 2: Start Stealth Session
+- Generate random browser fingerprint
+- Set locale/timezone matching state
+- Optional proxy rotation
+
+#### Step 3: Fetch Products via GraphQL
+**Endpoint:** `https://dutchie.com/api-3/graphql`
+
+**Variables:**
+```javascript
+{
+  includeEnterpriseSpecials: false,
+  productsFilter: {
+    dispensaryId: "<platform_dispensary_id>",
+    pricingType: "rec",
+    Status: "All",
+    types: [],
+    useCache: false,
+    isDefaultSort: true,
+    sortBy: "popularSortIdx",
+    sortDirection: 1,
+    bypassOnlineThresholds: true,
+    isKioskMenu: false,
+    removeProductsBelowOptionThresholds: false
+  },
+  page: 0,
+  perPage: 100
+}
+```
+
+**Key Notes:**
+- `Status: "All"` returns all products (Active returns same count)
+- `Status: null` returns 0 products (broken)
+- `pricingType: "rec"` returns BOTH rec and med prices
+- Paginate until `products.length < perPage` or `allProducts.length >= totalCount`
+
+#### Step 4: Normalize Data
+Transform raw Dutchie payload to canonical format via `DutchieNormalizer`.
+
+#### Step 5: Upsert Products
+Insert/update `store_products` table with normalized data.
+
+#### Step 6: Create Snapshots
+Insert point-in-time record to `store_product_snapshots`.
+
+#### Step 7: Track Missing Products (OOS Detection)
+```sql
+-- Reset consecutive_misses for products IN the feed
+UPDATE store_products
+SET consecutive_misses = 0, last_seen_at = NOW()
+WHERE dispensary_id = $1
+  AND provider = 'dutchie'
+  AND provider_product_id = ANY($2)
+
+-- Increment for products NOT in feed
+UPDATE store_products
+SET consecutive_misses = consecutive_misses + 1
+WHERE dispensary_id = $1
+  AND provider = 'dutchie'
+  AND provider_product_id NOT IN (...)
+  AND consecutive_misses < 3
+
+-- Mark OOS at 3 consecutive misses
+UPDATE store_products
+SET stock_status = 'oos', is_in_stock = false
+WHERE dispensary_id = $1
+  AND consecutive_misses >= 3
+  AND stock_status != 'oos'
+```
+
+#### Step 8: Download Images
+For new products, download and store images locally.
+
+#### Step 9: Update Dispensary
+```sql
+UPDATE dispensaries SET last_crawl_at = NOW() WHERE id = $1
+```
+
+---
+
+## GraphQL Payload Structure
+
+### Product Fields (from filteredProducts.products[])
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `_id` / `id` | string | MongoDB ObjectId (24 hex chars) |
+| `Name` | string | Product display name |
+| `brandName` | string | Brand name |
+| `brand.name` | string | Brand name (nested) |
+| `brand.description` | string | Brand description |
+| `type` | string | Category (Flower, Edible, Concentrate, etc.) |
+| `subcategory` | string | Subcategory |
+| `strainType` | string | Hybrid, Indica, Sativa, N/A |
+| `Status` | string | Always "Active" in feed |
+| `Image` | string | Primary image URL |
+| `images[]` | array | All product images |
+
+### Pricing Fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `Prices[]` | number[] | Rec prices per option |
+| `recPrices[]` | number[] | Rec prices |
+| `medicalPrices[]` | number[] | Medical prices |
+| `recSpecialPrices[]` | number[] | Rec sale prices |
+| `medicalSpecialPrices[]` | number[] | Medical sale prices |
+| `Options[]` | string[] | Size options ("1/8oz", "1g", etc.) |
+| `rawOptions[]` | string[] | Raw weight options ("3.5g") |
+
+### Inventory Fields (POSMetaData.children[])
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `quantity` | number | Total inventory count |
+| `quantityAvailable` | number | Available for online orders |
+| `kioskQuantityAvailable` | number | Available for kiosk orders |
+| `option` | string | Which size option this is for |
+
+### Potency Fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `THCContent.range[]` | number[] | THC percentage |
+| `CBDContent.range[]` | number[] | CBD percentage |
+| `cannabinoidsV2[]` | array | Detailed cannabinoid breakdown |
+
+### Specials (specialData.bogoSpecials[])
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `specialName` | string | Deal name |
+| `specialType` | string | "bogo", "sale", etc. |
+| `itemsForAPrice.value` | string | Bundle price |
+| `bogoRewards[].totalQuantity.quantity` | number | Required quantity |
+
+---
+
+## OOS Detection Logic
+
+Products disappear from the Dutchie feed when they go out of stock. We track this via `consecutive_misses`:
+
+| Scenario | Action |
+|----------|--------|
+| Product in feed | `consecutive_misses = 0` |
+| Product missing 1st time | `consecutive_misses = 1` |
+| Product missing 2nd time | `consecutive_misses = 2` |
+| Product missing 3rd time | `consecutive_misses = 3`, mark `stock_status = 'oos'` |
+| Product returns to feed | `consecutive_misses = 0`, update stock_status |
+
+**Why 3 misses?**
+- Protects against false positives from crawl failures
+- Single bad crawl doesn't trigger mass OOS alerts
+- Balances detection speed vs accuracy
+
+---
+
+## Database Tables
+
+### store_products
+Current state of each product:
+- `provider_product_id` - Dutchie's MongoDB ObjectId
+- `name_raw`, `brand_name_raw` - Raw values from feed
+- `price_rec`, `price_med` - Current prices
+- `is_in_stock`, `stock_status` - Availability
+- `consecutive_misses` - OOS detection counter
+- `last_seen_at` - Last time product was in feed
+
+### store_product_snapshots
+Point-in-time records for historical analysis:
+- One row per product per crawl
+- Captures price, stock, potency at that moment
+- Used for price history, analytics
+
+### dispensaries
+Store metadata:
+- `platform_dispensary_id` - MongoDB ObjectId for GraphQL
+- `menu_url` - Source URL
+- `last_crawl_at` - Last successful crawl
+- `crawl_enabled` - Whether to crawl
+
+---
+
+## Worker Roles
+
+Workers pull tasks from the `worker_tasks` queue based on their assigned role.
+
+| Role | Name | Description | Handler |
+|------|------|-------------|---------|
+| `product_resync` | Product Resync | Re-crawl dispensary products for price/stock changes | `handleProductResync` |
+| `product_discovery` | Product Discovery | Initial product discovery for new dispensaries | `handleProductDiscovery` |
+| `store_discovery` | Store Discovery | Discover new dispensary locations | `handleStoreDiscovery` |
+| `entry_point_discovery` | Entry Point Discovery | Resolve platform IDs from menu URLs | `handleEntryPointDiscovery` |
+| `analytics_refresh` | Analytics Refresh | Refresh materialized views and analytics | `handleAnalyticsRefresh` |
+
+**API Endpoint:** `GET /api/worker-registry/roles`
+
+---
+
+## Scheduling
+
+Crawls are scheduled via `worker_tasks` table:
+
+| Role | Frequency | Description |
+|------|-----------|-------------|
+| `product_resync` | Every 4 hours | Regular product refresh |
+| `product_discovery` | On-demand | First crawl for new stores |
+| `entry_point_discovery` | On-demand | New store setup |
+| `store_discovery` | Daily | Find new stores |
+| `analytics_refresh` | Daily | Refresh analytics materialized views |
+
+---
+
+## Priority & On-Demand Tasks
+
+Tasks are claimed by workers in order of **priority DESC, created_at ASC**.
+
+### Priority Levels
+
+| Priority | Use Case | Example |
+|----------|----------|---------|
+| 0 | Scheduled/batch tasks | Daily product_resync generation |
+| 10 | On-demand/chained tasks | entry_point → product_discovery |
+| Higher | Urgent/manual triggers | Admin-triggered immediate crawl |
+
+### Task Chaining
+
+When a task completes, the system automatically creates follow-up tasks:
+
+```
+store_discovery (completed)
+    └─► entry_point_discovery (priority: 10) for each new store
+
+entry_point_discovery (completed, success)
+    └─► product_discovery (priority: 10) for that store
+
+product_discovery (completed)
+    └─► [no chain] Store enters regular resync schedule
+```
+
+### On-Demand Task Creation
+
+Use the task service to create high-priority tasks:
+
+```typescript
+// Create immediate product resync for a store
+await taskService.createTask({
+  role: 'product_resync',
+  dispensary_id: 123,
+  platform: 'dutchie',
+  priority: 20, // Higher than batch tasks
+});
+
+// Convenience methods with default high priority (10)
+await taskService.createEntryPointTask(dispensaryId, 'dutchie');
+await taskService.createProductDiscoveryTask(dispensaryId, 'dutchie');
+await taskService.createStoreDiscoveryTask('dutchie', 'AZ');
+```
+
+### Claim Function
+
+The `claim_task()` SQL function atomically claims tasks:
+- Respects priority ordering (higher = first)
+- Uses `FOR UPDATE SKIP LOCKED` for concurrency
+- Prevents multiple active tasks per store
+
+---
+
+## Image Storage
+
+Images are downloaded from Dutchie's AWS S3 and stored locally with on-demand resizing.
+
+### Storage Path
+```
+/storage/images/products/<state>/<store>/<brand>/<product_id>/image-<hash>.webp
+/storage/images/brands/<brand>/logo-<hash>.webp
+```
+
+**Example:**
+```
+/storage/images/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp
+```
+
+### Image Proxy API
+Served via `/img/*` with on-demand resizing using **sharp**:
+
+```
+GET /img/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp?w=200
+```
+
+| Param | Description |
+|-------|-------------|
+| `w` | Width in pixels (max 4000) |
+| `h` | Height in pixels (max 4000) |
+| `q` | Quality 1-100 (default 80) |
+| `fit` | cover, contain, fill, inside, outside |
+| `blur` | Blur sigma (0.3-1000) |
+| `gray` | Grayscale (1 = enabled) |
+| `format` | webp, jpeg, png, avif (default webp) |
+
+### Key Files
+| File | Purpose |
+|------|---------|
+| `src/utils/image-storage.ts` | Download & save images to local filesystem |
+| `src/routes/image-proxy.ts` | On-demand resize/transform at `/img/*` |
+
+### Download Rules
+
+| Scenario | Image Action |
+|----------|--------------|
+| **New product (first crawl)** | Download if `primaryImageUrl` exists |
+| **Existing product (refresh)** | Download only if `local_image_path` is NULL (backfill) |
+| **Product already has local image** | Skip download entirely |
+
+**Logic:**
+- Images are downloaded **once** and never re-downloaded on subsequent crawls
+- `skipIfExists: true` - filesystem check prevents re-download even if queued
+- First crawl: all products get images
+- Refresh crawl: only new products or products missing local images
+
+### Storage Rules
+- **NO MinIO** - local filesystem only (`STORAGE_DRIVER=local`)
+- Store full resolution, resize on-demand via `/img` proxy
+- Convert to webp for consistency using **sharp**
+- Preserve original Dutchie URL as fallback in `image_url` column
+- Local path stored in `local_image_path` column
+
+---
+
+## Stealth & Anti-Detection
+
+**PROXIES ARE REQUIRED** - Workers will fail to start if no active proxies are available in the database. All HTTP requests to Dutchie go through a proxy.
+
+Workers automatically initialize anti-detection systems on startup.
+
+### Components
+
+| Component | Purpose | Source |
+|-----------|---------|--------|
+| **CrawlRotator** | Coordinates proxy + UA rotation | `src/services/crawl-rotator.ts` |
+| **ProxyRotator** | Round-robin proxy selection, health tracking | `src/services/crawl-rotator.ts` |
+| **UserAgentRotator** | Cycles through realistic browser fingerprints | `src/services/crawl-rotator.ts` |
+| **Dutchie Client** | Curl-based HTTP with auto-retry on 403 | `src/platforms/dutchie/client.ts` |
+
+### Initialization Flow
+
+```
+Worker Start
+    │
+    ├─► initializeStealth()
+    │       │
+    │       ├─► CrawlRotator.initialize()
+    │       │       └─► Load proxies from `proxies` table
+    │       │
+    │       └─► setCrawlRotator(rotator)
+    │               └─► Wire to Dutchie client
+    │
+    └─► Process tasks...
+```
+
+### Stealth Session (per task)
+
+Each crawl task starts a stealth session:
+
+```typescript
+// In product-refresh.ts, entry-point-discovery.ts
+const session = startSession(dispensary.state || 'AZ', 'America/Phoenix');
+```
+
+This creates a new identity with:
+- **Random fingerprint:** Chrome/Firefox/Safari/Edge on Win/Mac/Linux
+- **Accept-Language:** Matches timezone (e.g., `America/Phoenix` → `en-US,en;q=0.9`)
+- **sec-ch-ua headers:** Proper Client Hints for the browser profile
+
+### On 403 Block
+
+When Dutchie returns 403, the client automatically:
+
+1. Records failure on current proxy (increments `failure_count`)
+2. If proxy has 5+ failures, deactivates it
+3. Rotates to next healthy proxy
+4. Rotates fingerprint
+5. Retries the request
+
+### Proxy Table Schema
+
+```sql
+CREATE TABLE proxies (
+  id SERIAL PRIMARY KEY,
+  host VARCHAR(255) NOT NULL,
+  port INTEGER NOT NULL,
+  username VARCHAR(100),
+  password VARCHAR(100),
+  protocol VARCHAR(10) DEFAULT 'http',  -- http, https, socks5
+  is_active BOOLEAN DEFAULT true,
+  last_used_at TIMESTAMPTZ,
+  failure_count INTEGER DEFAULT 0,
+  success_count INTEGER DEFAULT 0,
+  avg_response_time_ms INTEGER,
+  last_failure_at TIMESTAMPTZ,
+  last_error TEXT
+);
+```
+
+### Configuration
+
+Proxies are mandatory. There is no environment variable to disable them. Workers will refuse to start without active proxies in the database.
+
+### User-Agent Generation
+
+See `workflow-12102025.md` for full specification.
+
+**Summary:**
+- Uses `intoli/user-agents` library (daily-updated market share data)
+- Device distribution: Mobile 62%, Desktop 36%, Tablet 2%
+- Browser whitelist: Chrome, Safari, Edge, Firefox only
+- UA sticks until IP rotates (403 or manual rotation)
+- Failure = alert admin + stop crawl (no fallback)
+
+Each fingerprint includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers.
+
+---
+
+## Error Handling
+
+- **GraphQL errors:** Logged, task marked failed, retried later
+- **Normalization errors:** Logged as warnings, continue with valid products
+- **Image download errors:** Non-fatal, logged, continue
+- **Database errors:** Task fails, will be retried
+- **403 blocks:** Auto-rotate proxy + fingerprint, retry (up to 3 retries)
+
+---
+
+## Files
+
+| File | Purpose |
+|------|---------|
+| `src/tasks/handlers/product-resync.ts` | Main crawl handler |
+| `src/tasks/handlers/entry-point-discovery.ts` | Slug → ID resolution |
+| `src/platforms/dutchie/index.ts` | GraphQL client, session management |
+| `src/hydration/normalizers/dutchie.ts` | Payload normalization |
+| `src/hydration/canonical-upsert.ts` | Database upsert logic |
+| `src/utils/image-storage.ts` | Image download and local storage |
+| `src/routes/image-proxy.ts` | On-demand image resizing |
+| `migrations/075_consecutive_misses.sql` | OOS tracking column |
--- a/backend/docs/_archive/ORGANIC_SCRAPING_GUIDE.md
+++ b/backend/docs/_archive/ORGANIC_SCRAPING_GUIDE.md
@@ -0,0 +1,297 @@
+# Organic Browser-Based Scraping Guide
+
+**Last Updated:** 2025-12-12
+**Status:** Production-ready proof of concept
+
+---
+
+## Overview
+
+This document describes the "organic" browser-based approach to scraping Dutchie dispensary menus. Unlike direct curl/axios requests, this method uses a real browser session to make API calls, making requests appear natural and reducing detection risk.
+
+---
+
+## Why Organic Scraping?
+
+| Approach | Detection Risk | Speed | Complexity |
+|----------|---------------|-------|------------|
+| Direct curl | Higher | Fast | Low |
+| curl-impersonate | Medium | Fast | Medium |
+| **Browser-based (organic)** | **Lowest** | Slower | Higher |
+
+Direct curl requests can be fingerprinted via:
+- TLS fingerprint (cipher suites, extensions)
+- Header order and values
+- Missing cookies/session data
+- Request patterns
+
+Browser-based requests inherit:
+- Real Chrome TLS fingerprint
+- Session cookies from page visit
+- Natural header order
+- JavaScript execution environment
+
+---
+
+## Implementation
+
+### Dependencies
+
+```bash
+npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth
+```
+
+### Core Script: `test-intercept.js`
+
+Located at: `backend/test-intercept.js`
+
+```javascript
+const puppeteer = require('puppeteer-extra');
+const StealthPlugin = require('puppeteer-extra-plugin-stealth');
+const fs = require('fs');
+
+puppeteer.use(StealthPlugin());
+
+async function capturePayload(config) {
+  const { dispensaryId, platformId, cName, outputPath } = config;
+
+  const browser = await puppeteer.launch({
+    headless: 'new',
+    args: ['--no-sandbox', '--disable-setuid-sandbox']
+  });
+
+  const page = await browser.newPage();
+
+  // STEP 1: Establish session by visiting the menu
+  const embedUrl = `https://dutchie.com/embedded-menu/${cName}?menuType=rec`;
+  await page.goto(embedUrl, { waitUntil: 'networkidle2', timeout: 60000 });
+
+  // STEP 2: Fetch ALL products using GraphQL from browser context
+  const result = await page.evaluate(async (platformId) => {
+    const allProducts = [];
+    let pageNum = 0;
+    const perPage = 100;
+    let totalCount = 0;
+    const sessionId = 'browser-session-' + Date.now();
+
+    while (pageNum < 30) {
+      const variables = {
+        includeEnterpriseSpecials: false,
+        productsFilter: {
+          dispensaryId: platformId,
+          pricingType: 'rec',
+          Status: 'Active',  // CRITICAL: Must be 'Active', not null
+          types: [],
+          useCache: true,
+          isDefaultSort: true,
+          sortBy: 'popularSortIdx',
+          sortDirection: 1,
+          bypassOnlineThresholds: true,
+          isKioskMenu: false,
+          removeProductsBelowOptionThresholds: false,
+        },
+        page: pageNum,
+        perPage: perPage,
+      };
+
+      const extensions = {
+        persistedQuery: {
+          version: 1,
+          sha256Hash: 'ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0'
+        }
+      };
+
+      const qs = new URLSearchParams({
+        operationName: 'FilteredProducts',
+        variables: JSON.stringify(variables),
+        extensions: JSON.stringify(extensions)
+      });
+
+      const response = await fetch(`https://dutchie.com/api-3/graphql?${qs}`, {
+        method: 'GET',
+        headers: {
+          'Accept': 'application/json',
+          'content-type': 'application/json',
+          'x-dutchie-session': sessionId,
+          'apollographql-client-name': 'Marketplace (production)',
+        },
+        credentials: 'include'
+      });
+
+      const json = await response.json();
+      const data = json?.data?.filteredProducts;
+      if (!data?.products) break;
+
+      allProducts.push(...data.products);
+      if (pageNum === 0) totalCount = data.queryInfo?.totalCount || 0;
+      if (allProducts.length >= totalCount) break;
+
+      pageNum++;
+      await new Promise(r => setTimeout(r, 200)); // Polite delay
+    }
+
+    return { products: allProducts, totalCount };
+  }, platformId);
+
+  await browser.close();
+
+  // STEP 3: Save payload
+  const payload = {
+    dispensaryId,
+    platformId,
+    cName,
+    fetchedAt: new Date().toISOString(),
+    productCount: result.products.length,
+    products: result.products,
+  };
+
+  fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));
+  return payload;
+}
+```
+
+---
+
+## Critical Parameters
+
+### GraphQL Hash (FilteredProducts)
+
+```
+ee29c060826dc41c527e470e9ae502c9b2c169720faa0a9f5d25e1b9a530a4a0
+```
+
+**WARNING:** Using the wrong hash returns HTTP 400.
+
+### Status Parameter
+
+| Value | Result |
+|-------|--------|
+| `'Active'` | Returns in-stock products (1019 in test) |
+| `null` | Returns 0 products |
+| `'All'` | Returns HTTP 400 |
+
+**ALWAYS use `Status: 'Active'`**
+
+### Required Headers
+
+```javascript
+{
+  'Accept': 'application/json',
+  'content-type': 'application/json',
+  'x-dutchie-session': 'unique-session-id',
+  'apollographql-client-name': 'Marketplace (production)',
+}
+```
+
+### Endpoint
+
+```
+https://dutchie.com/api-3/graphql
+```
+
+---
+
+## Performance Benchmarks
+
+Test store: AZ-Deeply-Rooted (1019 products)
+
+| Metric | Value |
+|--------|-------|
+| Total products | 1019 |
+| Time | 18.5 seconds |
+| Payload size | 11.8 MB |
+| Pages fetched | 11 (100 per page) |
+| Success rate | 100% |
+
+---
+
+## Payload Format
+
+The output matches the existing `payload-fetch.ts` handler format:
+
+```json
+{
+  "dispensaryId": 123,
+  "platformId": "6405ef617056e8014d79101b",
+  "cName": "AZ-Deeply-Rooted",
+  "fetchedAt": "2025-12-12T05:05:19.837Z",
+  "productCount": 1019,
+  "products": [
+    {
+      "id": "6927508db4851262f629a869",
+      "Name": "Product Name",
+      "brand": { "name": "Brand Name", ... },
+      "type": "Flower",
+      "THC": "25%",
+      "Prices": [...],
+      "Options": [...],
+      ...
+    }
+  ]
+}
+```
+
+---
+
+## Integration Points
+
+### As a Task Handler
+
+The organic approach can be integrated as an alternative to curl-based fetching:
+
+```typescript
+// In src/tasks/handlers/organic-payload-fetch.ts
+export async function handleOrganicPayloadFetch(ctx: TaskContext): Promise<TaskResult> {
+  // Use puppeteer-based capture
+  // Save to same payload storage
+  // Queue product_refresh task
+}
+```
+
+### Worker Configuration
+
+Add to job_schedules:
+```sql
+INSERT INTO job_schedules (name, role, cron_expression)
+VALUES ('organic_product_crawl', 'organic_payload_fetch', '0 */6 * * *');
+```
+
+---
+
+## Troubleshooting
+
+### HTTP 400 Bad Request
+- Check hash is correct: `ee29c060...`
+- Verify Status is `'Active'` (string, not null)
+
+### 0 Products Returned
+- Status was likely `null` or `'All'` - use `'Active'`
+- Check platformId is valid MongoDB ObjectId
+
+### Session Not Established
+- Increase timeout on initial page.goto()
+- Check cName is valid (matches embedded-menu URL)
+
+### Detection/Blocking
+- StealthPlugin should handle most cases
+- Add random delays between pages
+- Use headless: 'new' (not true/false)
+
+---
+
+## Files Reference
+
+| File | Purpose |
+|------|---------|
+| `backend/test-intercept.js` | Proof of concept script |
+| `backend/src/platforms/dutchie/client.ts` | GraphQL hashes, curl implementation |
+| `backend/src/tasks/handlers/payload-fetch.ts` | Current curl-based handler |
+| `backend/src/utils/payload-storage.ts` | Payload save/load utilities |
+
+---
+
+## See Also
+
+- `DUTCHIE_CRAWL_WORKFLOW.md` - Full crawl pipeline documentation
+- `TASK_WORKFLOW_2024-12-10.md` - Task system architecture
+- `CLAUDE.md` - Project rules and constraints
--- a/backend/docs/_archive/README.md
+++ b/backend/docs/_archive/README.md
@@ -0,0 +1,25 @@
+# ARCHIVED DOCUMENTATION
+
+**WARNING: These docs may be outdated or inaccurate.**
+
+The code has evolved significantly. These docs are kept for historical reference only.
+
+## What to Use Instead
+
+**The single source of truth is:**
+- `CLAUDE.md` (root) - Essential rules and quick reference
+- `docs/CODEBASE_MAP.md` - Current file/directory reference
+
+## Why Archive?
+
+These docs were written during development iterations and may reference:
+- Old file paths that no longer exist
+- Deprecated approaches (hydration, scraper-v2)
+- APIs that have changed
+- Database schemas that evolved
+
+## If You Need Details
+
+1. First check CODEBASE_MAP.md for current file locations
+2. Then read the actual source code
+3. Only use archive docs as a last resort for historical context
--- a/backend/docs/_archive/TASK_WORKFLOW_2024-12-10.md
+++ b/backend/docs/_archive/TASK_WORKFLOW_2024-12-10.md
@@ -0,0 +1,584 @@
+# Task Workflow Documentation
+**Date: 2024-12-10**
+
+This document describes the complete task/job processing architecture after the 2024-12-10 rewrite.
+
+---
+
+## Complete Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────────────────────┐
+│                              KUBERNETES CLUSTER                                  │
+├─────────────────────────────────────────────────────────────────────────────────┤
+│                                                                                  │
+│  ┌─────────────────────────────────────────────────────────────────────────┐    │
+│  │                         API SERVER POD (scraper)                         │    │
+│  │                                                                          │    │
+│  │   ┌──────────────────┐     ┌────────────────────────────────────────┐   │    │
+│  │   │   Express API    │     │         TaskScheduler                   │   │    │
+│  │   │                  │     │   (src/services/task-scheduler.ts)      │   │    │
+│  │   │  /api/job-queue  │     │                                         │   │    │
+│  │   │  /api/tasks      │     │   • Polls every 60s                     │   │    │
+│  │   │  /api/schedules  │     │   • Checks task_schedules table         │   │    │
+│  │   └────────┬─────────┘     │   • SELECT FOR UPDATE SKIP LOCKED       │   │    │
+│  │            │               │   • Generates tasks when due            │   │    │
+│  │            │               └──────────────────┬─────────────────────┘   │    │
+│  │            │                                  │                          │    │
+│  └────────────┼──────────────────────────────────┼──────────────────────────┘    │
+│               │                                  │                               │
+│               │         ┌────────────────────────┘                               │
+│               │         │                                                        │
+│               ▼         ▼                                                        │
+│  ┌─────────────────────────────────────────────────────────────────────────┐    │
+│  │                          POSTGRESQL DATABASE                             │    │
+│  │                                                                          │    │
+│  │   ┌─────────────────────┐        ┌─────────────────────┐                │    │
+│  │   │   task_schedules    │        │    worker_tasks     │                │    │
+│  │   │                     │        │                     │                │    │
+│  │   │ • product_refresh   │───────►│ • pending tasks     │                │    │
+│  │   │ • store_discovery   │ create │ • claimed tasks     │                │    │
+│  │   │ • analytics_refresh │ tasks  │ • running tasks     │                │    │
+│  │   │                     │        │ • completed tasks   │                │    │
+│  │   │ next_run_at         │        │                     │                │    │
+│  │   │ last_run_at         │        │ role, dispensary_id │                │    │
+│  │   │ interval_hours      │        │ priority, status    │                │    │
+│  │   └─────────────────────┘        └──────────┬──────────┘                │    │
+│  │                                             │                            │    │
+│  └─────────────────────────────────────────────┼────────────────────────────┘    │
+│                                                │                                  │
+│                         ┌──────────────────────┘                                  │
+│                         │ Workers poll for tasks                                  │
+│                         │ (SELECT FOR UPDATE SKIP LOCKED)                         │
+│                         ▼                                                         │
+│  ┌─────────────────────────────────────────────────────────────────────────┐    │
+│  │                    WORKER PODS (StatefulSet: scraper-worker)             │    │
+│  │                                                                          │    │
+│  │   ┌─────────────┐  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐    │    │
+│  │   │  Worker 0   │  │  Worker 1   │  │  Worker 2   │  │  Worker N   │    │    │
+│  │   │             │  │             │  │             │  │             │    │    │
+│  │   │ task-worker │  │ task-worker │  │ task-worker │  │ task-worker │    │    │
+│  │   │     .ts     │  │     .ts     │  │     .ts     │  │     .ts     │    │    │
+│  │   └─────────────┘  └─────────────┘  └─────────────┘  └─────────────┘    │    │
+│  │                                                                          │    │
+│  └──────────────────────────────────────────────────────────────────────────┘    │
+│                                                                                  │
+└──────────────────────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Startup Sequence
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                        API SERVER STARTUP                                    │
+├─────────────────────────────────────────────────────────────────────────────┤
+│                                                                              │
+│   1. Express app initializes                                                 │
+│                    │                                                         │
+│                    ▼                                                         │
+│   2. runAutoMigrations()                                                     │
+│      • Runs pending migrations (including 079_task_schedules.sql)           │
+│                    │                                                         │
+│                    ▼                                                         │
+│   3. initializeMinio() / initializeImageStorage()                           │
+│                    │                                                         │
+│                    ▼                                                         │
+│   4. cleanupOrphanedJobs()                                                   │
+│                    │                                                         │
+│                    ▼                                                         │
+│   5. taskScheduler.start()  ◄─── NEW (per TASK_WORKFLOW_2024-12-10.md)      │
+│      │                                                                       │
+│      ├── Recover stale tasks (workers that died)                            │
+│      ├── Ensure default schedules exist in task_schedules                   │
+│      ├── Check and run any due schedules immediately                        │
+│      └── Start 60-second poll interval                                      │
+│                    │                                                         │
+│                    ▼                                                         │
+│   6. app.listen(PORT)                                                        │
+│                                                                              │
+└─────────────────────────────────────────────────────────────────────────────┘
+
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                        WORKER POD STARTUP                                    │
+├─────────────────────────────────────────────────────────────────────────────┤
+│                                                                              │
+│   1. K8s starts pod from StatefulSet                                        │
+│                    │                                                         │
+│                    ▼                                                         │
+│   2. TaskWorker.constructor()                                               │
+│      • Create DB pool                                                        │
+│      • Create CrawlRotator                                                   │
+│                    │                                                         │
+│                    ▼                                                         │
+│   3. initializeStealth()                                                    │
+│      • Load proxies from DB (REQUIRED - fails if none)                      │
+│      • Wire rotator to Dutchie client                                       │
+│                    │                                                         │
+│                    ▼                                                         │
+│   4. register() with API                                                    │
+│      • Optional - continues if fails                                         │
+│                    │                                                         │
+│                    ▼                                                         │
+│   5. startRegistryHeartbeat() every 30s                                     │
+│                    │                                                         │
+│                    ▼                                                         │
+│   6. processNextTask() loop                                                 │
+│      │                                                                       │
+│      ├── Poll for pending task (FOR UPDATE SKIP LOCKED)                     │
+│      ├── Claim task atomically                                              │
+│      ├── Execute handler (product_refresh, store_discovery, etc.)           │
+│      ├── Mark complete/failed                                               │
+│      ├── Chain next task if applicable                                      │
+│      └── Loop                                                               │
+│                                                                              │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Schedule Flow
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                     SCHEDULER POLL (every 60 seconds)                        │
+├─────────────────────────────────────────────────────────────────────────────┤
+│                                                                              │
+│   BEGIN TRANSACTION                                                          │
+│         │                                                                    │
+│         ▼                                                                    │
+│   SELECT * FROM task_schedules                                              │
+│   WHERE enabled = true AND next_run_at <= NOW()                             │
+│   FOR UPDATE SKIP LOCKED  ◄─── Prevents duplicate execution across replicas │
+│         │                                                                    │
+│         ▼                                                                    │
+│   For each due schedule:                                                     │
+│         │                                                                    │
+│         ├── product_refresh_all                                             │
+│         │   └─► Query dispensaries needing crawl                            │
+│         │   └─► Create product_refresh tasks in worker_tasks                │
+│         │                                                                    │
+│         ├── store_discovery_dutchie                                         │
+│         │   └─► Create single store_discovery task                          │
+│         │                                                                    │
+│         └── analytics_refresh                                                │
+│             └─► Create single analytics_refresh task                        │
+│         │                                                                    │
+│         ▼                                                                    │
+│   UPDATE task_schedules SET                                                  │
+│     last_run_at = NOW(),                                                     │
+│     next_run_at = NOW() + interval_hours                                    │
+│         │                                                                    │
+│         ▼                                                                    │
+│   COMMIT                                                                     │
+│                                                                              │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Task Lifecycle
+
+```
+                                    ┌──────────┐
+                                    │ SCHEDULE │
+                                    │   DUE    │
+                                    └────┬─────┘
+                                         │
+                                         ▼
+┌──────────────┐    claim    ┌──────────────┐    start    ┌──────────────┐
+│   PENDING    │────────────►│   CLAIMED    │────────────►│   RUNNING    │
+└──────────────┘             └──────────────┘             └──────┬───────┘
+       ▲                                                        │
+       │                                         ┌──────────────┼──────────────┐
+       │ retry                                   │              │              │
+       │ (if retries < max)                      ▼              ▼              ▼
+       │                                  ┌──────────┐   ┌──────────┐   ┌──────────┐
+       └──────────────────────────────────│  FAILED  │   │ COMPLETED│   │  STALE   │
+                                          └──────────┘   └──────────┘   └────┬─────┘
+                                                                              │
+                                                              recover_stale_tasks()
+                                                                              │
+                                                                              ▼
+                                                                        ┌──────────┐
+                                                                        │ PENDING  │
+                                                                        └──────────┘
+```
+
+---
+
+## Database Tables
+
+### task_schedules (NEW - migration 079)
+
+Stores schedule definitions. Survives restarts.
+
+```sql
+CREATE TABLE task_schedules (
+  id SERIAL PRIMARY KEY,
+  name VARCHAR(100) NOT NULL UNIQUE,
+  role VARCHAR(50) NOT NULL,        -- product_refresh, store_discovery, etc.
+  enabled BOOLEAN DEFAULT TRUE,
+  interval_hours INTEGER NOT NULL,  -- How often to run
+  priority INTEGER DEFAULT 0,       -- Task priority when created
+  state_code VARCHAR(2),            -- Optional filter
+  last_run_at TIMESTAMPTZ,          -- When it last ran
+  next_run_at TIMESTAMPTZ,          -- When it's due next
+  last_task_count INTEGER,          -- Tasks created last run
+  last_error TEXT                   -- Error message if failed
+);
+```
+
+### worker_tasks (migration 074)
+
+The task queue. Workers pull from here.
+
+```sql
+CREATE TABLE worker_tasks (
+  id SERIAL PRIMARY KEY,
+  role task_role NOT NULL,          -- What type of work
+  dispensary_id INTEGER,            -- Which store (if applicable)
+  platform VARCHAR(50),             -- Which platform
+  status task_status DEFAULT 'pending',
+  priority INTEGER DEFAULT 0,       -- Higher = process first
+  scheduled_for TIMESTAMP,          -- Don't process before this time
+  worker_id VARCHAR(100),           -- Which worker claimed it
+  claimed_at TIMESTAMP,
+  started_at TIMESTAMP,
+  completed_at TIMESTAMP,
+  last_heartbeat_at TIMESTAMP,      -- For stale detection
+  result JSONB,
+  error_message TEXT,
+  retry_count INTEGER DEFAULT 0,
+  max_retries INTEGER DEFAULT 3
+);
+```
+
+---
+
+## Default Schedules
+
+| Name | Role | Interval | Priority | Description |
+|------|------|----------|----------|-------------|
+| `payload_fetch_all` | payload_fetch | 4 hours | 0 | Fetch payloads from Dutchie API (chains to product_refresh) |
+| `store_discovery_dutchie` | store_discovery | 24 hours | 5 | Find new Dutchie stores |
+| `analytics_refresh` | analytics_refresh | 6 hours | 0 | Refresh MVs |
+
+---
+
+## Task Roles
+
+| Role | Description | Creates Tasks For |
+|------|-------------|-------------------|
+| `payload_fetch` | **NEW** - Fetch from Dutchie API, save to disk | Each dispensary needing crawl |
+| `product_refresh` | **CHANGED** - Read local payload, normalize, upsert to DB | Chained from payload_fetch |
+| `store_discovery` | Find new dispensaries, returns newStoreIds[] | Single task per platform |
+| `entry_point_discovery` | **DEPRECATED** - Resolve platform IDs | No longer used |
+| `product_discovery` | Initial product fetch for new stores | Chained from store_discovery |
+| `analytics_refresh` | Refresh MVs | Single global task |
+
+### Payload/Refresh Separation (2024-12-10)
+
+The crawl workflow is now split into two phases:
+
+```
+payload_fetch (scheduled every 4h)
+  └─► Hit Dutchie GraphQL API
+  └─► Save raw JSON to /storage/payloads/{year}/{month}/{day}/store_{id}_{ts}.json.gz
+  └─► Record metadata in raw_crawl_payloads table
+  └─► Queue product_refresh task with payload_id
+
+product_refresh (chained from payload_fetch)
+  └─► Load payload from filesystem (NOT from API)
+  └─► Normalize via DutchieNormalizer
+  └─► Upsert to store_products
+  └─► Create snapshots
+  └─► Track missing products
+  └─► Download images
+```
+
+**Benefits:**
+- **Retry-friendly**: If normalize fails, re-run product_refresh without re-crawling
+- **Replay-able**: Run product_refresh against any historical payload
+- **Faster refreshes**: Local file read vs network call
+- **Historical diffs**: Compare payloads to see what changed between crawls
+- **Less API pressure**: Only payload_fetch hits Dutchie
+
+---
+
+## Task Chaining
+
+Tasks automatically queue follow-up tasks upon successful completion. This creates two main flows:
+
+### Discovery Flow (New Stores)
+
+When `store_discovery` finds new dispensaries, they automatically get their initial product data:
+
+```
+store_discovery
+  └─► Discovers new locations via Dutchie GraphQL
+  └─► Auto-promotes valid locations to dispensaries table
+  └─► Collects newDispensaryIds[] from promotions
+  └─► Returns { newStoreIds: [...] } in result
+
+chainNextTask() detects newStoreIds
+  └─► Creates product_discovery task for each new store
+
+product_discovery
+  └─► Calls handlePayloadFetch() internally
+  └─► payload_fetch hits Dutchie API
+  └─► Saves raw JSON to /storage/payloads/
+  └─► Queues product_refresh task with payload_id
+
+product_refresh
+  └─► Loads payload from filesystem
+  └─► Normalizes and upserts to store_products
+  └─► Creates snapshots, downloads images
+```
+
+**Complete Discovery Chain:**
+```
+store_discovery → product_discovery → payload_fetch → product_refresh
+                        (internal call)    (queues next)
+```
+
+### Scheduled Flow (Existing Stores)
+
+For existing stores, `payload_fetch_all` schedule runs every 4 hours:
+
+```
+TaskScheduler (every 60s)
+  └─► Checks task_schedules for due schedules
+  └─► payload_fetch_all is due
+  └─► Generates payload_fetch task for each dispensary
+
+payload_fetch
+  └─► Hits Dutchie GraphQL API
+  └─► Saves raw JSON to /storage/payloads/
+  └─► Queues product_refresh task with payload_id
+
+product_refresh
+  └─► Loads payload from filesystem (NOT API)
+  └─► Normalizes via DutchieNormalizer
+  └─► Upserts to store_products
+  └─► Creates snapshots
+```
+
+**Complete Scheduled Chain:**
+```
+payload_fetch → product_refresh
+  (queues)        (reads local)
+```
+
+### Chaining Implementation
+
+Task chaining is handled in two places:
+
+1. **Internal chaining (handler calls handler):**
+   - `product_discovery` calls `handlePayloadFetch()` directly
+
+2. **External chaining (chainNextTask() in task-service.ts):**
+   - Called after task completion
+   - `store_discovery` → queues `product_discovery` for each newStoreId
+
+3. **Queue-based chaining (taskService.createTask):**
+   - `payload_fetch` queues `product_refresh` with `payload: { payload_id }`
+
+---
+
+## Payload API Endpoints
+
+Raw crawl payloads can be accessed via the Payloads API:
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `GET /api/payloads` | GET | List payload metadata (paginated) |
+| `GET /api/payloads/:id` | GET | Get payload metadata by ID |
+| `GET /api/payloads/:id/data` | GET | Get full payload JSON (decompressed) |
+| `GET /api/payloads/store/:dispensaryId` | GET | List payloads for a store |
+| `GET /api/payloads/store/:dispensaryId/latest` | GET | Get latest payload for a store |
+| `GET /api/payloads/store/:dispensaryId/diff` | GET | Diff two payloads for changes |
+
+### Payload Diff Response
+
+The diff endpoint returns:
+```json
+{
+  "success": true,
+  "from": { "id": 123, "fetchedAt": "...", "productCount": 100 },
+  "to": { "id": 456, "fetchedAt": "...", "productCount": 105 },
+  "diff": {
+    "added": 10,
+    "removed": 5,
+    "priceChanges": 8,
+    "stockChanges": 12
+  },
+  "details": {
+    "added": [...],
+    "removed": [...],
+    "priceChanges": [...],
+    "stockChanges": [...]
+  }
+}
+```
+
+---
+
+## API Endpoints
+
+### Schedules (NEW)
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `GET /api/schedules` | GET | List all schedules |
+| `PUT /api/schedules/:id` | PUT | Update schedule |
+| `POST /api/schedules/:id/trigger` | POST | Run schedule immediately |
+
+### Task Creation (rewired 2024-12-10)
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `POST /api/job-queue/enqueue` | POST | Create single task |
+| `POST /api/job-queue/enqueue-batch` | POST | Create batch tasks |
+| `POST /api/job-queue/enqueue-state` | POST | Create tasks for state |
+| `POST /api/tasks` | POST | Direct task creation |
+
+### Task Management
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `GET /api/tasks` | GET | List tasks |
+| `GET /api/tasks/:id` | GET | Get single task |
+| `GET /api/tasks/counts` | GET | Task counts by status |
+| `POST /api/tasks/recover-stale` | POST | Recover stale tasks |
+
+---
+
+## Key Files
+
+| File | Purpose |
+|------|---------|
+| `src/services/task-scheduler.ts` | **NEW** - DB-driven scheduler |
+| `src/tasks/task-worker.ts` | Worker that processes tasks |
+| `src/tasks/task-service.ts` | Task CRUD operations |
+| `src/tasks/handlers/payload-fetch.ts` | **NEW** - Fetches from API, saves to disk |
+| `src/tasks/handlers/product-refresh.ts` | **CHANGED** - Reads from disk, processes to DB |
+| `src/utils/payload-storage.ts` | **NEW** - Payload save/load utilities |
+| `src/routes/tasks.ts` | Task API endpoints |
+| `src/routes/job-queue.ts` | Job Queue UI endpoints (rewired) |
+| `migrations/079_task_schedules.sql` | Schedule table |
+| `migrations/080_raw_crawl_payloads.sql` | Payload metadata table |
+| `migrations/081_payload_fetch_columns.sql` | payload, last_fetch_at columns |
+| `migrations/074_worker_task_queue.sql` | Task queue table |
+
+---
+
+## Legacy Code (DEPRECATED)
+
+| File | Status | Replacement |
+|------|--------|-------------|
+| `src/services/scheduler.ts` | DEPRECATED | `task-scheduler.ts` |
+| `dispensary_crawl_jobs` table | ORPHANED | `worker_tasks` |
+| `job_schedules` table | LEGACY | `task_schedules` |
+
+---
+
+## Dashboard Integration
+
+Both pages remain wired to the dashboard:
+
+| Page | Data Source | Actions |
+|------|-------------|---------|
+| **Job Queue** | `worker_tasks`, `task_schedules` | Create tasks, view schedules |
+| **Task Queue** | `worker_tasks` | View tasks, recover stale |
+
+---
+
+## Multi-Replica Safety
+
+The scheduler uses `SELECT FOR UPDATE SKIP LOCKED` to ensure:
+
+1. **Only one replica** executes a schedule at a time
+2. **No duplicate tasks** created
+3. **Survives pod restarts** - state in DB, not memory
+4. **Self-healing** - recovers stale tasks on startup
+
+```sql
+-- This query is atomic across all API server replicas
+SELECT * FROM task_schedules
+WHERE enabled = true AND next_run_at <= NOW()
+FOR UPDATE SKIP LOCKED
+```
+
+---
+
+## Worker Scaling (K8s)
+
+Workers run as a StatefulSet in Kubernetes. You can scale from the admin UI or CLI.
+
+### From Admin UI
+
+The Workers page (`/admin/workers`) provides:
+- Current replica count display
+- Scale up/down buttons
+- Target replica input
+
+### API Endpoints
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `GET /api/workers/k8s/replicas` | GET | Get current/desired replica counts |
+| `POST /api/workers/k8s/scale` | POST | Scale to N replicas (body: `{ replicas: N }`) |
+
+### From CLI
+
+```bash
+# View current replicas
+kubectl get statefulset scraper-worker -n dispensary-scraper
+
+# Scale to 10 workers
+kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=10
+
+# Scale down to 3 workers
+kubectl scale statefulset scraper-worker -n dispensary-scraper --replicas=3
+```
+
+### Configuration
+
+Environment variables for the API server:
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `K8S_NAMESPACE` | `dispensary-scraper` | Kubernetes namespace |
+| `K8S_WORKER_STATEFULSET` | `scraper-worker` | StatefulSet name |
+
+### RBAC Requirements
+
+The API server pod needs these K8s permissions:
+
+```yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: worker-scaler
+  namespace: dispensary-scraper
+rules:
+- apiGroups: ["apps"]
+  resources: ["statefulsets"]
+  verbs: ["get", "patch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: scraper-worker-scaler
+  namespace: dispensary-scraper
+subjects:
+- kind: ServiceAccount
+  name: default
+  namespace: dispensary-scraper
+roleRef:
+  kind: Role
+  name: worker-scaler
+  apiGroup: rbac.authorization.k8s.io
+```
--- a/backend/docs/_archive/WORKER_TASK_ARCHITECTURE.md
+++ b/backend/docs/_archive/WORKER_TASK_ARCHITECTURE.md
@@ -0,0 +1,542 @@
+# Worker Task Architecture
+
+This document describes the unified task-based worker system that replaces the legacy fragmented job systems.
+
+## Overview
+
+The task worker architecture provides a single, unified system for managing all background work in CannaiQ:
+
+- **Store discovery** - Find new dispensaries on platforms
+- **Entry point discovery** - Resolve platform IDs from menu URLs
+- **Product discovery** - Initial product fetch for new stores
+- **Product resync** - Regular price/stock updates for existing stores
+- **Analytics refresh** - Refresh materialized views and analytics
+
+## Architecture
+
+### Database Tables
+
+**`worker_tasks`** - Central task queue
+```sql
+CREATE TABLE worker_tasks (
+  id SERIAL PRIMARY KEY,
+  role task_role NOT NULL,           -- What type of work
+  dispensary_id INTEGER,              -- Which store (if applicable)
+  platform VARCHAR(50),               -- Which platform (dutchie, etc.)
+  status task_status DEFAULT 'pending',
+  priority INTEGER DEFAULT 0,         -- Higher = process first
+  scheduled_for TIMESTAMP,            -- Don't process before this time
+  worker_id VARCHAR(100),             -- Which worker claimed it
+  claimed_at TIMESTAMP,
+  started_at TIMESTAMP,
+  completed_at TIMESTAMP,
+  last_heartbeat_at TIMESTAMP,        -- For stale detection
+  result JSONB,                       -- Output from handler
+  error_message TEXT,
+  retry_count INTEGER DEFAULT 0,
+  max_retries INTEGER DEFAULT 3,
+  created_at TIMESTAMP DEFAULT NOW(),
+  updated_at TIMESTAMP DEFAULT NOW()
+);
+```
+
+**Key indexes:**
+- `idx_worker_tasks_pending_priority` - For efficient task claiming
+- `idx_worker_tasks_active_dispensary` - Prevents concurrent tasks per store (partial unique index)
+
+### Task Roles
+
+| Role | Purpose | Per-Store | Scheduled |
+|------|---------|-----------|-----------|
+| `store_discovery` | Find new stores on a platform | No | Daily |
+| `entry_point_discovery` | Resolve platform IDs | Yes | On-demand |
+| `product_discovery` | Initial product fetch | Yes | After entry_point |
+| `product_resync` | Price/stock updates | Yes | Every 4 hours |
+| `analytics_refresh` | Refresh MVs | No | Daily |
+
+### Task Lifecycle
+
+```
+pending → claimed → running → completed
+                  ↓
+                failed
+```
+
+1. **pending** - Task is waiting to be picked up
+2. **claimed** - Worker has claimed it (atomic via SELECT FOR UPDATE SKIP LOCKED)
+3. **running** - Worker is actively processing
+4. **completed** - Task finished successfully
+5. **failed** - Task encountered an error
+6. **stale** - Task lost its worker (recovered automatically)
+
+## Files
+
+### Core Files
+
+| File | Purpose |
+|------|---------|
+| `src/tasks/task-service.ts` | TaskService - CRUD, claiming, capacity metrics |
+| `src/tasks/task-worker.ts` | TaskWorker - Main worker loop |
+| `src/tasks/index.ts` | Module exports |
+| `src/routes/tasks.ts` | API endpoints |
+| `migrations/074_worker_task_queue.sql` | Database schema |
+
+### Task Handlers
+
+| File | Role |
+|------|------|
+| `src/tasks/handlers/store-discovery.ts` | `store_discovery` |
+| `src/tasks/handlers/entry-point-discovery.ts` | `entry_point_discovery` |
+| `src/tasks/handlers/product-discovery.ts` | `product_discovery` |
+| `src/tasks/handlers/product-resync.ts` | `product_resync` |
+| `src/tasks/handlers/analytics-refresh.ts` | `analytics_refresh` |
+
+## Running Workers
+
+### Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `WORKER_ROLE` | (required) | Which task role to process |
+| `WORKER_ID` | auto-generated | Custom worker identifier |
+| `POLL_INTERVAL_MS` | 5000 | How often to check for tasks |
+| `HEARTBEAT_INTERVAL_MS` | 30000 | How often to update heartbeat |
+
+### Starting a Worker
+
+```bash
+# Start a product resync worker
+WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts
+
+# Start with custom ID
+WORKER_ROLE=product_resync WORKER_ID=resync-1 npx tsx src/tasks/task-worker.ts
+
+# Start multiple workers for different roles
+WORKER_ROLE=store_discovery npx tsx src/tasks/task-worker.ts &
+WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts &
+```
+
+### Kubernetes Deployment
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: task-worker-resync
+spec:
+  replicas: 3
+  template:
+    spec:
+      containers:
+      - name: worker
+        image: code.cannabrands.app/creationshop/dispensary-scraper:latest
+        command: ["npx", "tsx", "src/tasks/task-worker.ts"]
+        env:
+        - name: WORKER_ROLE
+          value: "product_resync"
+```
+
+## API Endpoints
+
+### Task Management
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/api/tasks` | GET | List tasks with filters |
+| `/api/tasks` | POST | Create a new task |
+| `/api/tasks/:id` | GET | Get task by ID |
+| `/api/tasks/counts` | GET | Get counts by status |
+| `/api/tasks/capacity` | GET | Get capacity metrics |
+| `/api/tasks/capacity/:role` | GET | Get role-specific capacity |
+| `/api/tasks/recover-stale` | POST | Recover tasks from dead workers |
+
+### Task Generation
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/api/tasks/generate/resync` | POST | Generate daily resync tasks |
+| `/api/tasks/generate/discovery` | POST | Create store discovery task |
+
+### Migration (from legacy systems)
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/api/tasks/migration/status` | GET | Compare old vs new systems |
+| `/api/tasks/migration/disable-old-schedules` | POST | Disable job_schedules |
+| `/api/tasks/migration/cancel-pending-crawl-jobs` | POST | Cancel old crawl jobs |
+| `/api/tasks/migration/create-resync-tasks` | POST | Create tasks for all stores |
+| `/api/tasks/migration/full-migrate` | POST | One-click migration |
+
+### Role-Specific Endpoints
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/api/tasks/role/:role/last-completion` | GET | Last completion time |
+| `/api/tasks/role/:role/recent` | GET | Recent completions |
+| `/api/tasks/store/:id/active` | GET | Check if store has active task |
+
+## Capacity Planning
+
+The `v_worker_capacity` view provides real-time metrics:
+
+```sql
+SELECT * FROM v_worker_capacity;
+```
+
+Returns:
+- `pending_tasks` - Tasks waiting to be claimed
+- `ready_tasks` - Tasks ready now (scheduled_for is null or past)
+- `claimed_tasks` - Tasks claimed but not started
+- `running_tasks` - Tasks actively processing
+- `completed_last_hour` - Recent completions
+- `failed_last_hour` - Recent failures
+- `active_workers` - Workers with recent heartbeats
+- `avg_duration_sec` - Average task duration
+- `tasks_per_worker_hour` - Throughput estimate
+- `estimated_hours_to_drain` - Time to clear queue
+
+### Scaling Recommendations
+
+```javascript
+// API: GET /api/tasks/capacity/:role
+{
+  "role": "product_resync",
+  "pending_tasks": 500,
+  "active_workers": 3,
+  "workers_needed": {
+    "for_1_hour": 10,
+    "for_4_hours": 3,
+    "for_8_hours": 2
+  }
+}
+```
+
+## Task Chaining
+
+Tasks can automatically create follow-up tasks:
+
+```
+store_discovery → entry_point_discovery → product_discovery
+                              ↓
+                     (store has platform_dispensary_id)
+                              ↓
+                     Daily resync tasks
+```
+
+The `chainNextTask()` method handles this automatically.
+
+## Stale Task Recovery
+
+Tasks are considered stale if `last_heartbeat_at` is older than the threshold (default 10 minutes).
+
+```sql
+SELECT recover_stale_tasks(10); -- 10 minute threshold
+```
+
+Or via API:
+```bash
+curl -X POST /api/tasks/recover-stale \
+  -H 'Content-Type: application/json' \
+  -d '{"threshold_minutes": 10}'
+```
+
+## Migration from Legacy Systems
+
+### Legacy Systems Replaced
+
+1. **job_schedules + job_run_logs** - Scheduled job definitions
+2. **dispensary_crawl_jobs** - Per-dispensary crawl queue
+3. **SyncOrchestrator + HydrationWorker** - Raw payload processing
+
+### Migration Steps
+
+**Option 1: One-Click Migration**
+```bash
+curl -X POST /api/tasks/migration/full-migrate
+```
+
+This will:
+1. Disable all job_schedules
+2. Cancel pending dispensary_crawl_jobs
+3. Generate resync tasks for all stores
+4. Create discovery and analytics tasks
+
+**Option 2: Manual Migration**
+```bash
+# 1. Check current status
+curl /api/tasks/migration/status
+
+# 2. Disable old schedules
+curl -X POST /api/tasks/migration/disable-old-schedules
+
+# 3. Cancel pending crawl jobs
+curl -X POST /api/tasks/migration/cancel-pending-crawl-jobs
+
+# 4. Create resync tasks
+curl -X POST /api/tasks/migration/create-resync-tasks \
+  -H 'Content-Type: application/json' \
+  -d '{"state_code": "AZ"}'
+
+# 5. Generate daily resync schedule
+curl -X POST /api/tasks/generate/resync \
+  -H 'Content-Type: application/json' \
+  -d '{"batches_per_day": 6}'
+```
+
+## Per-Store Locking
+
+The system prevents concurrent tasks for the same store using a partial unique index:
+
+```sql
+CREATE UNIQUE INDEX idx_worker_tasks_active_dispensary
+ON worker_tasks (dispensary_id)
+WHERE dispensary_id IS NOT NULL
+AND status IN ('claimed', 'running');
+```
+
+This ensures only one task can be active per store at any time.
+
+## Task Priority
+
+Tasks are claimed in priority order (higher first), then by creation time:
+
+```sql
+ORDER BY priority DESC, created_at ASC
+```
+
+Default priorities:
+- `store_discovery`: 0
+- `entry_point_discovery`: 10 (high - new stores)
+- `product_discovery`: 10 (high - new stores)
+- `product_resync`: 0
+- `analytics_refresh`: 0
+
+## Scheduled Tasks
+
+Tasks can be scheduled for future execution:
+
+```javascript
+await taskService.createTask({
+  role: 'product_resync',
+  dispensary_id: 123,
+  scheduled_for: new Date('2025-01-10T06:00:00Z'),
+});
+```
+
+The `generate_resync_tasks()` function creates staggered tasks throughout the day:
+
+```sql
+SELECT generate_resync_tasks(6, '2025-01-10'); -- 6 batches = every 4 hours
+```
+
+## Dashboard Integration
+
+The admin dashboard shows task queue status in the main overview:
+
+```
+Task Queue Summary
+------------------
+Pending:   45
+Running:   3
+Completed: 1,234
+Failed:    12
+```
+
+Full task management is available at `/admin/tasks`.
+
+## Error Handling
+
+Failed tasks include the error message in `error_message` and can be retried:
+
+```sql
+-- View failed tasks
+SELECT id, role, dispensary_id, error_message, retry_count
+FROM worker_tasks
+WHERE status = 'failed'
+ORDER BY completed_at DESC
+LIMIT 20;
+
+-- Retry failed tasks
+UPDATE worker_tasks
+SET status = 'pending', retry_count = retry_count + 1
+WHERE status = 'failed' AND retry_count < max_retries;
+```
+
+## Concurrent Task Processing (Added 2024-12)
+
+Workers can now process multiple tasks concurrently within a single worker instance. This improves throughput by utilizing async I/O efficiently.
+
+### Architecture
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                         Pod (K8s)                           │
+│                                                             │
+│  ┌─────────────────────────────────────────────────────┐   │
+│  │                    TaskWorker                        │   │
+│  │                                                      │   │
+│  │  ┌─────────┐  ┌─────────┐  ┌─────────┐             │   │
+│  │  │ Task 1  │  │ Task 2  │  │ Task 3  │  (concurrent)│   │
+│  │  └─────────┘  └─────────┘  └─────────┘             │   │
+│  │                                                      │   │
+│  │  Resource Monitor                                    │   │
+│  │  ├── Memory: 65% (threshold: 85%)                   │   │
+│  │  ├── CPU: 45% (threshold: 90%)                      │   │
+│  │  └── Status: Normal                                  │   │
+│  └─────────────────────────────────────────────────────┘   │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `MAX_CONCURRENT_TASKS` | 3 | Maximum tasks a worker will run concurrently |
+| `MEMORY_BACKOFF_THRESHOLD` | 0.85 | Back off when heap memory exceeds 85% |
+| `CPU_BACKOFF_THRESHOLD` | 0.90 | Back off when CPU exceeds 90% |
+| `BACKOFF_DURATION_MS` | 10000 | How long to wait when backing off (10s) |
+
+### How It Works
+
+1. **Main Loop**: Worker continuously tries to fill up to `MAX_CONCURRENT_TASKS`
+2. **Resource Monitoring**: Before claiming a new task, worker checks memory and CPU
+3. **Backoff**: If resources exceed thresholds, worker pauses and stops claiming new tasks
+4. **Concurrent Execution**: Tasks run in parallel using `Promise` - they don't block each other
+5. **Graceful Shutdown**: On SIGTERM/decommission, worker stops claiming but waits for active tasks
+
+### Resource Monitoring
+
+```typescript
+// ResourceStats interface
+interface ResourceStats {
+  memoryPercent: number;    // Current heap usage as decimal (0.0-1.0)
+  memoryMb: number;         // Current heap used in MB
+  memoryTotalMb: number;    // Total heap available in MB
+  cpuPercent: number;       // CPU usage as percentage (0-100)
+  isBackingOff: boolean;    // True if worker is in backoff state
+  backoffReason: string;    // Why the worker is backing off
+}
+```
+
+### Heartbeat Data
+
+Workers report the following in their heartbeat:
+
+```json
+{
+  "worker_id": "worker-abc123",
+  "current_task_id": 456,
+  "current_task_ids": [456, 457, 458],
+  "active_task_count": 3,
+  "max_concurrent_tasks": 3,
+  "status": "active",
+  "resources": {
+    "memory_mb": 256,
+    "memory_total_mb": 512,
+    "memory_rss_mb": 320,
+    "memory_percent": 50,
+    "cpu_user_ms": 12500,
+    "cpu_system_ms": 3200,
+    "cpu_percent": 45,
+    "is_backing_off": false,
+    "backoff_reason": null
+  }
+}
+```
+
+### Backoff Behavior
+
+When resources exceed thresholds:
+
+1. Worker logs the backoff reason:
+   ```
+   [TaskWorker] MyWorker backing off: Memory at 87.3% (threshold: 85%)
+   ```
+
+2. Worker stops claiming new tasks but continues existing tasks
+
+3. After `BACKOFF_DURATION_MS`, worker rechecks resources
+
+4. When resources return to normal:
+   ```
+   [TaskWorker] MyWorker resuming normal operation
+   ```
+
+### UI Display
+
+The Workers Dashboard shows:
+
+- **Tasks Column**: `2/3 tasks` (active/max concurrent)
+- **Resources Column**: Memory % and CPU % with color coding
+  - Green: < 50%
+  - Yellow: 50-74%
+  - Amber: 75-89%
+  - Red: 90%+
+- **Backing Off**: Orange warning badge when worker is in backoff state
+
+### Task Count Badge Details
+
+```
+┌─────────────────────────────────────────────┐
+│ Worker: "MyWorker"                          │
+│ Tasks: 2/3 tasks  #456, #457                │
+│ Resources: 🧠 65%  💻 45%                    │
+│ Status: ● Active                            │
+└─────────────────────────────────────────────┘
+```
+
+### Best Practices
+
+1. **Start Conservative**: Use `MAX_CONCURRENT_TASKS=3` initially
+2. **Monitor Resources**: Watch for frequent backoffs in logs
+3. **Tune Per Workload**: I/O-bound tasks benefit from higher concurrency
+4. **Scale Horizontally**: Add more pods rather than cranking concurrency too high
+
+### Code References
+
+| File | Purpose |
+|------|---------|
+| `src/tasks/task-worker.ts:68-71` | Concurrency environment variables |
+| `src/tasks/task-worker.ts:104-111` | ResourceStats interface |
+| `src/tasks/task-worker.ts:149-179` | getResourceStats() method |
+| `src/tasks/task-worker.ts:184-196` | shouldBackOff() method |
+| `src/tasks/task-worker.ts:462-516` | mainLoop() with concurrent claiming |
+| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
+| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
+
+## Monitoring
+
+### Logs
+
+Workers log to stdout:
+```
+[TaskWorker] Starting worker worker-product_resync-a1b2c3d4 for role: product_resync
+[TaskWorker] Claimed task 123 (product_resync) for dispensary 456
+[TaskWorker] Task 123 completed successfully
+```
+
+### Health Check
+
+Check if workers are active:
+```sql
+SELECT worker_id, role, COUNT(*), MAX(last_heartbeat_at)
+FROM worker_tasks
+WHERE last_heartbeat_at > NOW() - INTERVAL '5 minutes'
+GROUP BY worker_id, role;
+```
+
+### Metrics
+
+```sql
+-- Tasks by status
+SELECT status, COUNT(*) FROM worker_tasks GROUP BY status;
+
+-- Tasks by role
+SELECT role, status, COUNT(*) FROM worker_tasks GROUP BY role, status;
+
+-- Average duration by role
+SELECT role, AVG(EXTRACT(EPOCH FROM (completed_at - started_at))) as avg_seconds
+FROM worker_tasks
+WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours'
+GROUP BY role;
+```
--- a/backend/k8s/cronjob-ip2location.yaml
+++ b/backend/k8s/cronjob-ip2location.yaml
@@ -0,0 +1,69 @@
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+  name: ip2location-update
+  namespace: default
+spec:
+  # Run on the 1st of every month at 3am UTC
+  schedule: "0 3 1 * *"
+  concurrencyPolicy: Forbid
+  successfulJobsHistoryLimit: 3
+  failedJobsHistoryLimit: 3
+  jobTemplate:
+    spec:
+      template:
+        spec:
+          containers:
+          - name: ip2location-updater
+            image: curlimages/curl:latest
+            command:
+            - /bin/sh
+            - -c
+            - |
+              set -e
+              echo "Downloading IP2Location LITE DB5..."
+
+              # Download to temp
+              cd /tmp
+              curl -L -o ip2location.zip "https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB5LITEBIN"
+
+              # Extract
+              unzip -o ip2location.zip
+
+              # Find and copy the BIN file
+              BIN_FILE=$(ls *.BIN 2>/dev/null | head -1)
+              if [ -z "$BIN_FILE" ]; then
+                echo "ERROR: No BIN file found"
+                exit 1
+              fi
+
+              # Copy to shared volume
+              cp "$BIN_FILE" /data/IP2LOCATION-LITE-DB5.BIN
+
+              echo "Done! Database updated: /data/IP2LOCATION-LITE-DB5.BIN"
+            env:
+            - name: IP2LOCATION_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: dutchie-backend-secret
+                  key: IP2LOCATION_TOKEN
+            volumeMounts:
+            - name: ip2location-data
+              mountPath: /data
+          restartPolicy: OnFailure
+          volumes:
+          - name: ip2location-data
+            persistentVolumeClaim:
+              claimName: ip2location-pvc
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: ip2location-pvc
+  namespace: default
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 100Mi
--- a/backend/k8s/deployment.yaml
+++ b/backend/k8s/deployment.yaml
@@ -26,6 +26,12 @@ spec:
            name: dutchie-backend-config
        - secretRef:
            name: dutchie-backend-secret
+        env:
+        - name: IP2LOCATION_DB_PATH
+          value: /data/ip2location/IP2LOCATION-LITE-DB5.BIN
+        volumeMounts:
+        - name: ip2location-data
+          mountPath: /data/ip2location
        resources:
          requests:
            memory: "256Mi"
@@ -45,3 +51,7 @@ spec:
            port: 3010
          initialDelaySeconds: 5
          periodSeconds: 5
+      volumes:
+      - name: ip2location-data
+        persistentVolumeClaim:
+          claimName: ip2location-pvc
--- a/backend/k8s/scraper-worker-statefulset.yaml
+++ b/backend/k8s/scraper-worker-statefulset.yaml
@@ -0,0 +1,77 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: scraper-worker
+  namespace: dispensary-scraper
+  labels:
+    app: scraper-worker
+spec:
+  clusterIP: None  # Headless service required for StatefulSet
+  selector:
+    app: scraper-worker
+  ports:
+  - port: 3010
+    name: http
+---
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: scraper-worker
+  namespace: dispensary-scraper
+spec:
+  serviceName: scraper-worker
+  replicas: 8
+  podManagementPolicy: Parallel  # Start all pods at once
+  updateStrategy:
+    type: OnDelete  # Pods only update when manually deleted - no automatic restarts
+  selector:
+    matchLabels:
+      app: scraper-worker
+  template:
+    metadata:
+      labels:
+        app: scraper-worker
+    spec:
+      terminationGracePeriodSeconds: 60
+      imagePullSecrets:
+      - name: regcred
+      containers:
+      - name: worker
+        image: code.cannabrands.app/creationshop/dispensary-scraper:latest
+        imagePullPolicy: Always
+        command: ["node"]
+        args: ["dist/tasks/task-worker.js"]
+        env:
+        - name: WORKER_MODE
+          value: "true"
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: MAX_CONCURRENT_TASKS
+          value: "50"
+        - name: API_BASE_URL
+          value: http://scraper
+        - name: NODE_OPTIONS
+          value: --max-old-space-size=1500
+        envFrom:
+        - configMapRef:
+            name: scraper-config
+        - secretRef:
+            name: scraper-secrets
+        resources:
+          requests:
+            cpu: 100m
+            memory: 1Gi
+          limits:
+            cpu: 500m
+            memory: 2Gi
+        livenessProbe:
+          exec:
+            command:
+            - /bin/sh
+            - -c
+            - pgrep -f 'task-worker' > /dev/null
+          initialDelaySeconds: 10
+          periodSeconds: 30
+          failureThreshold: 3
--- a/backend/migrations/051_worker_definitions.sql
+++ b/backend/migrations/051_worker_definitions.sql
@@ -0,0 +1,119 @@
+-- Migration 051: Worker Definitions
+-- Creates a dedicated workers table for named workers with roles and assignments
+
+-- Workers table - defines named workers with roles
+CREATE TABLE IF NOT EXISTS workers (
+  id SERIAL PRIMARY KEY,
+  name VARCHAR(100) NOT NULL UNIQUE,
+  role VARCHAR(100) NOT NULL,
+  description TEXT,
+  enabled BOOLEAN DEFAULT TRUE,
+
+  -- Schedule configuration (for dedicated crawl workers)
+  schedule_type VARCHAR(50) DEFAULT 'interval', -- 'interval', 'cron', 'manual'
+  interval_minutes INTEGER DEFAULT 240,
+  cron_expression VARCHAR(100), -- e.g., '0 */4 * * *'
+  jitter_minutes INTEGER DEFAULT 30,
+
+  -- Assignment scope
+  assignment_type VARCHAR(50) DEFAULT 'all', -- 'all', 'state', 'dispensary', 'chain'
+  assigned_state_codes TEXT[], -- e.g., ['AZ', 'CA']
+  assigned_dispensary_ids INTEGER[],
+  assigned_chain_ids INTEGER[],
+
+  -- Job configuration
+  job_type VARCHAR(50) NOT NULL DEFAULT 'dutchie_product_crawl',
+  job_config JSONB DEFAULT '{}',
+  priority INTEGER DEFAULT 0,
+  max_concurrent INTEGER DEFAULT 1,
+
+  -- Status tracking
+  status VARCHAR(50) DEFAULT 'idle', -- 'idle', 'running', 'paused', 'error'
+  last_run_at TIMESTAMPTZ,
+  last_status VARCHAR(50),
+  last_error TEXT,
+  last_duration_ms INTEGER,
+  next_run_at TIMESTAMPTZ,
+  current_job_id INTEGER,
+
+  -- Metrics
+  total_runs INTEGER DEFAULT 0,
+  successful_runs INTEGER DEFAULT 0,
+  failed_runs INTEGER DEFAULT 0,
+  avg_duration_ms INTEGER,
+
+  created_at TIMESTAMPTZ DEFAULT NOW(),
+  updated_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- Worker run history
+CREATE TABLE IF NOT EXISTS worker_runs (
+  id SERIAL PRIMARY KEY,
+  worker_id INTEGER NOT NULL REFERENCES workers(id) ON DELETE CASCADE,
+  started_at TIMESTAMPTZ DEFAULT NOW(),
+  completed_at TIMESTAMPTZ,
+  status VARCHAR(50) DEFAULT 'running', -- 'running', 'success', 'error', 'cancelled'
+  duration_ms INTEGER,
+
+  -- What was processed
+  jobs_created INTEGER DEFAULT 0,
+  jobs_completed INTEGER DEFAULT 0,
+  jobs_failed INTEGER DEFAULT 0,
+  dispensaries_crawled INTEGER DEFAULT 0,
+  products_found INTEGER DEFAULT 0,
+
+  error_message TEXT,
+  metadata JSONB DEFAULT '{}',
+
+  created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- Index for efficient lookups
+CREATE INDEX IF NOT EXISTS idx_workers_enabled ON workers(enabled) WHERE enabled = TRUE;
+CREATE INDEX IF NOT EXISTS idx_workers_next_run ON workers(next_run_at) WHERE enabled = TRUE;
+CREATE INDEX IF NOT EXISTS idx_workers_status ON workers(status);
+CREATE INDEX IF NOT EXISTS idx_worker_runs_worker_id ON worker_runs(worker_id);
+CREATE INDEX IF NOT EXISTS idx_worker_runs_started_at ON worker_runs(started_at DESC);
+
+-- Add worker_id to dispensary_crawl_jobs if not exists
+DO $$
+BEGIN
+  IF NOT EXISTS (
+    SELECT 1 FROM information_schema.columns
+    WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'assigned_worker_id'
+  ) THEN
+    ALTER TABLE dispensary_crawl_jobs ADD COLUMN assigned_worker_id INTEGER REFERENCES workers(id);
+  END IF;
+END $$;
+
+-- Migrate existing job_schedules workers to new workers table
+INSERT INTO workers (name, role, description, enabled, interval_minutes, jitter_minutes, job_type, job_config, last_run_at, last_status, last_error, last_duration_ms, next_run_at)
+SELECT
+  worker_name,
+  worker_role,
+  description,
+  enabled,
+  base_interval_minutes,
+  jitter_minutes,
+  job_name,
+  job_config,
+  last_run_at,
+  last_status,
+  last_error_message,
+  last_duration_ms,
+  next_run_at
+FROM job_schedules
+WHERE worker_name IS NOT NULL
+ON CONFLICT (name) DO UPDATE SET
+  updated_at = NOW();
+
+-- Available worker roles (reference)
+COMMENT ON TABLE workers IS 'Named workers with specific roles and assignments. Roles include:
+- product_sync: Crawls products from dispensary menus
+- store_discovery: Discovers new dispensary locations
+- entry_point_finder: Detects menu providers and resolves platform IDs
+- analytics_refresh: Refreshes materialized views and analytics
+- price_monitor: Monitors price changes and triggers alerts
+- inventory_sync: Syncs inventory levels
+- image_processor: Downloads and processes product images
+- data_validator: Validates data integrity';
--- a/backend/migrations/052_seo_settings.sql
+++ b/backend/migrations/052_seo_settings.sql
@@ -0,0 +1,49 @@
+-- Migration 052: SEO Settings Table
+-- Key/value store for SEO Orchestrator configuration
+
+CREATE TABLE IF NOT EXISTS seo_settings (
+  id SERIAL PRIMARY KEY,
+  key TEXT UNIQUE NOT NULL,
+  value JSONB NOT NULL,
+  created_at TIMESTAMP DEFAULT NOW(),
+  updated_at TIMESTAMP DEFAULT NOW()
+);
+
+-- Create index on key for fast lookups
+CREATE INDEX IF NOT EXISTS idx_seo_settings_key ON seo_settings(key);
+
+-- Seed with default settings
+INSERT INTO seo_settings (key, value) VALUES
+  -- Section 1: Global Content Generation Settings
+  ('primary_prompt_template', '"You are a cannabis industry content expert. Generate SEO-optimized content for {{page_type}} pages about {{subject}}. Focus on: {{focus_areas}}. Maintain a {{tone}} tone and keep content {{length}}."'),
+  ('regeneration_prompt_template', '"Regenerate the following SEO content with fresh perspectives. Original topic: {{subject}}. Improve upon: {{improvement_areas}}. Maintain compliance with cannabis industry standards."'),
+  ('default_content_length', '"medium"'),
+  ('tone_voice', '"informational"'),
+
+  -- Section 2: Automatic Refresh Rules
+  ('auto_refresh_interval', '"weekly"'),
+  ('trigger_pct_product_change', 'true'),
+  ('trigger_pct_brand_change', 'true'),
+  ('trigger_new_stores', 'true'),
+  ('trigger_market_shift', 'false'),
+  ('webhook_url', '""'),
+  ('notify_on_trigger', 'false'),
+
+  -- Section 3: Page-Level Defaults
+  ('default_title_template', '"{{state_name}} Dispensaries | Find Cannabis Near You | CannaiQ"'),
+  ('default_meta_description_template', '"Discover the best dispensaries in {{state_name}}. Browse {{dispensary_count}}+ licensed retailers, compare prices, and find cannabis products near you."'),
+  ('default_slug_template', '"dispensaries-{{state_code_lower}}"'),
+  ('default_og_image_template', '"/images/seo/og-{{state_code_lower}}.jpg"'),
+  ('enable_ai_images', 'false'),
+
+  -- Section 4: Crawl / Dataset Configuration
+  ('primary_data_provider', '"cannaiq"'),
+  ('fallback_data_provider', '"dutchie"'),
+  ('min_data_freshness_hours', '24'),
+  ('stale_data_behavior', '"allow_with_warning"')
+ON CONFLICT (key) DO NOTHING;
+
+-- Record migration
+INSERT INTO schema_migrations (version, name, applied_at)
+VALUES ('052', 'seo_settings', NOW())
+ON CONFLICT (version) DO NOTHING;
--- a/backend/migrations/066_dutchie_field_alignment.sql
+++ b/backend/migrations/066_dutchie_field_alignment.sql
@@ -0,0 +1,140 @@
+-- Migration 066: Align dispensaries and discovery_locations tables with Dutchie field names
+-- Uses snake_case convention (Postgres standard) mapped from Dutchie's camelCase
+--
+-- Changes:
+-- 1. dispensaries: rename address→address1, zip→zipcode, remove company_name
+-- 2. dispensaries: add missing Dutchie fields
+-- 3. dutchie_discovery_locations: add missing Dutchie fields
+
+-- ============================================================================
+-- DISPENSARIES TABLE
+-- ============================================================================
+
+-- Rename address to address1 (matches Dutchie's address1)
+ALTER TABLE dispensaries RENAME COLUMN address TO address1;
+
+-- Rename zip to zipcode (matches Dutchie's zip, but we use zipcode for clarity)
+ALTER TABLE dispensaries RENAME COLUMN zip TO zipcode;
+
+-- Drop company_name (redundant with name)
+ALTER TABLE dispensaries DROP COLUMN IF EXISTS company_name;
+
+-- Add address2
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS address2 VARCHAR(255);
+
+-- Add country
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States';
+
+-- Add timezone
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
+
+-- Add email
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS email VARCHAR(255);
+
+-- Add description
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS description TEXT;
+
+-- Add logo_image (Dutchie: logoImage)
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS logo_image TEXT;
+
+-- Add banner_image (Dutchie: bannerImage)
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS banner_image TEXT;
+
+-- Add offer_pickup (Dutchie: offerPickup)
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_pickup BOOLEAN DEFAULT TRUE;
+
+-- Add offer_delivery (Dutchie: offerDelivery)
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_delivery BOOLEAN DEFAULT FALSE;
+
+-- Add offer_curbside_pickup (Dutchie: offerCurbsidePickup)
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_curbside_pickup BOOLEAN DEFAULT FALSE;
+
+-- Add is_medical (Dutchie: isMedical)
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_medical BOOLEAN DEFAULT FALSE;
+
+-- Add is_recreational (Dutchie: isRecreational)
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_recreational BOOLEAN DEFAULT FALSE;
+
+-- Add chain_slug (Dutchie: chain)
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255);
+
+-- Add enterprise_id (Dutchie: retailer.enterpriseId)
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100);
+
+-- Add status (Dutchie: status - open/closed)
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS status VARCHAR(50);
+
+-- Add c_name (Dutchie: cName - the URL slug used in embedded menus)
+ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
+
+-- ============================================================================
+-- DUTCHIE_DISCOVERY_LOCATIONS TABLE
+-- ============================================================================
+
+-- Add phone
+ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS phone VARCHAR(50);
+
+-- Add website (Dutchie: embedBackUrl)
+ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS website TEXT;
+
+-- Add email
+ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS email VARCHAR(255);
+
+-- Add description
+ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS description TEXT;
+
+-- Add logo_image
+ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS logo_image TEXT;
+
+-- Add banner_image
+ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS banner_image TEXT;
+
+-- Add chain_slug
+ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255);
+
+-- Add enterprise_id
+ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100);
+
+-- Add c_name
+ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
+
+-- Add country
+ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States';
+
+-- Add store status
+ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS store_status VARCHAR(50);
+
+-- ============================================================================
+-- INDEXES
+-- ============================================================================
+
+-- Index for chain lookups
+CREATE INDEX IF NOT EXISTS idx_dispensaries_chain_slug ON dispensaries(chain_slug) WHERE chain_slug IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_discovery_locations_chain_slug ON dutchie_discovery_locations(chain_slug) WHERE chain_slug IS NOT NULL;
+
+-- Index for enterprise lookups (for multi-location chains)
+CREATE INDEX IF NOT EXISTS idx_dispensaries_enterprise_id ON dispensaries(enterprise_id) WHERE enterprise_id IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_discovery_locations_enterprise_id ON dutchie_discovery_locations(enterprise_id) WHERE enterprise_id IS NOT NULL;
+
+-- Index for c_name lookups
+CREATE INDEX IF NOT EXISTS idx_dispensaries_c_name ON dispensaries(c_name) WHERE c_name IS NOT NULL;
+
+-- ============================================================================
+-- COMMENTS
+-- ============================================================================
+
+COMMENT ON COLUMN dispensaries.address1 IS 'Street address line 1 (Dutchie: address1)';
+COMMENT ON COLUMN dispensaries.address2 IS 'Street address line 2 (Dutchie: address2)';
+COMMENT ON COLUMN dispensaries.zipcode IS 'ZIP/postal code (Dutchie: zip)';
+COMMENT ON COLUMN dispensaries.c_name IS 'Dutchie URL slug for embedded menus (Dutchie: cName)';
+COMMENT ON COLUMN dispensaries.chain_slug IS 'Chain identifier slug (Dutchie: chain)';
+COMMENT ON COLUMN dispensaries.enterprise_id IS 'Parent enterprise UUID (Dutchie: retailer.enterpriseId)';
+COMMENT ON COLUMN dispensaries.logo_image IS 'Logo image URL (Dutchie: logoImage)';
+COMMENT ON COLUMN dispensaries.banner_image IS 'Banner image URL (Dutchie: bannerImage)';
+COMMENT ON COLUMN dispensaries.offer_pickup IS 'Offers in-store pickup (Dutchie: offerPickup)';
+COMMENT ON COLUMN dispensaries.offer_delivery IS 'Offers delivery (Dutchie: offerDelivery)';
+COMMENT ON COLUMN dispensaries.offer_curbside_pickup IS 'Offers curbside pickup (Dutchie: offerCurbsidePickup)';
+COMMENT ON COLUMN dispensaries.is_medical IS 'Licensed for medical sales (Dutchie: isMedical)';
+COMMENT ON COLUMN dispensaries.is_recreational IS 'Licensed for recreational sales (Dutchie: isRecreational)';
+
+SELECT 'Migration 066 completed: Dutchie field alignment' as status;
--- a/backend/migrations/067_promotion_log.sql
+++ b/backend/migrations/067_promotion_log.sql
@@ -0,0 +1,24 @@
+-- Promotion log table for tracking discovery → dispensary promotions
+-- Tracks validation and promotion actions for audit/review
+
+CREATE TABLE IF NOT EXISTS dutchie_promotion_log (
+  id SERIAL PRIMARY KEY,
+  discovery_id INTEGER REFERENCES dutchie_discovery_locations(id) ON DELETE SET NULL,
+  dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE SET NULL,
+  action VARCHAR(50) NOT NULL, -- 'validated', 'rejected', 'promoted_create', 'promoted_update', 'skipped'
+  state_code VARCHAR(10),
+  store_name VARCHAR(255),
+  validation_errors TEXT[], -- Array of error messages if rejected
+  field_changes JSONB, -- Before/after snapshot of changed fields
+  triggered_by VARCHAR(100) DEFAULT 'auto', -- 'auto', 'manual', 'api'
+  created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Indexes for efficient querying
+CREATE INDEX IF NOT EXISTS idx_promotion_log_discovery_id ON dutchie_promotion_log(discovery_id);
+CREATE INDEX IF NOT EXISTS idx_promotion_log_dispensary_id ON dutchie_promotion_log(dispensary_id);
+CREATE INDEX IF NOT EXISTS idx_promotion_log_action ON dutchie_promotion_log(action);
+CREATE INDEX IF NOT EXISTS idx_promotion_log_state_code ON dutchie_promotion_log(state_code);
+CREATE INDEX IF NOT EXISTS idx_promotion_log_created_at ON dutchie_promotion_log(created_at DESC);
+
+COMMENT ON TABLE dutchie_promotion_log IS 'Audit log for discovery location validation and promotion to dispensaries';
--- a/backend/migrations/068_crawler_status_alerts.sql
+++ b/backend/migrations/068_crawler_status_alerts.sql
@@ -0,0 +1,95 @@
+-- Migration 068: Crawler Status Alerts
+-- Creates status_alerts table for dashboard notifications and status change logging
+
+-- ============================================================
+-- STATUS ALERTS TABLE
+-- ============================================================
+
+CREATE TABLE IF NOT EXISTS crawler_status_alerts (
+  id SERIAL PRIMARY KEY,
+
+  -- References
+  dispensary_id INTEGER REFERENCES dispensaries(id),
+  profile_id INTEGER REFERENCES dispensary_crawler_profiles(id),
+
+  -- Alert info
+  alert_type VARCHAR(50) NOT NULL, -- 'status_change', 'crawl_error', 'validation_failed', 'promoted', 'demoted'
+  severity VARCHAR(20) DEFAULT 'info', -- 'info', 'warning', 'error', 'critical'
+
+  -- Status transition
+  previous_status VARCHAR(50),
+  new_status VARCHAR(50),
+
+  -- Context
+  message TEXT,
+  error_details JSONB,
+  metadata JSONB, -- Additional context (product counts, error codes, etc.)
+
+  -- Tracking
+  acknowledged BOOLEAN DEFAULT FALSE,
+  acknowledged_at TIMESTAMP WITH TIME ZONE,
+  acknowledged_by VARCHAR(100),
+
+  -- Timestamps
+  created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Indexes for common queries
+CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_dispensary ON crawler_status_alerts(dispensary_id);
+CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_type ON crawler_status_alerts(alert_type);
+CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_severity ON crawler_status_alerts(severity);
+CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_unack ON crawler_status_alerts(acknowledged) WHERE acknowledged = FALSE;
+CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_created ON crawler_status_alerts(created_at DESC);
+
+-- ============================================================
+-- STATUS DEFINITIONS (for reference/validation)
+-- ============================================================
+
+COMMENT ON TABLE crawler_status_alerts IS 'Crawler status change notifications for dashboard alerting';
+COMMENT ON COLUMN crawler_status_alerts.alert_type IS 'Type: status_change, crawl_error, validation_failed, promoted, demoted';
+COMMENT ON COLUMN crawler_status_alerts.severity IS 'Severity: info, warning, error, critical';
+COMMENT ON COLUMN crawler_status_alerts.previous_status IS 'Previous crawler status before change';
+COMMENT ON COLUMN crawler_status_alerts.new_status IS 'New crawler status after change';
+
+-- ============================================================
+-- STATUS TRACKING ON PROFILES
+-- ============================================================
+
+-- Add columns for status tracking if not exists
+DO $$
+BEGIN
+  -- Consecutive success count for auto-promotion
+  IF NOT EXISTS (SELECT 1 FROM information_schema.columns
+    WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_successes') THEN
+    ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_successes INTEGER DEFAULT 0;
+  END IF;
+
+  -- Consecutive failure count for auto-demotion
+  IF NOT EXISTS (SELECT 1 FROM information_schema.columns
+    WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_failures') THEN
+    ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_failures INTEGER DEFAULT 0;
+  END IF;
+
+  -- Last status change timestamp
+  IF NOT EXISTS (SELECT 1 FROM information_schema.columns
+    WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_changed_at') THEN
+    ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_changed_at TIMESTAMP WITH TIME ZONE;
+  END IF;
+
+  -- Status change reason
+  IF NOT EXISTS (SELECT 1 FROM information_schema.columns
+    WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_reason') THEN
+    ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_reason TEXT;
+  END IF;
+END $$;
+
+-- ============================================================
+-- VALID STATUS VALUES
+-- ============================================================
+-- Status values for dispensary_crawler_profiles.status:
+--   'sandbox'      - Newly created, being validated
+--   'production'   - Healthy, actively crawled
+--   'needs_manual' - Requires human intervention
+--   'failing'      - Multiple consecutive failures
+--   'disabled'     - Manually disabled
+--   'legacy'       - No profile, uses default method (virtual status)
--- a/backend/migrations/069_six_stage_status.sql
+++ b/backend/migrations/069_six_stage_status.sql
@@ -0,0 +1,163 @@
+-- Migration 069: Seven-Stage Status System
+--
+-- Implements explicit 7-stage pipeline for store lifecycle:
+--   1. discovered - Found via Dutchie API, raw data
+--   2. validated  - Passed field checks, ready for promotion
+--   3. promoted   - In dispensaries table, has crawler profile
+--   4. sandbox    - First crawl attempted, testing
+--   5. hydrating  - Products are being loaded/updated
+--   6. production - Healthy, scheduled crawls via Horizon
+--   7. failing    - Crawl errors, needs attention
+
+-- ============================================================
+-- STAGE ENUM TYPE
+-- ============================================================
+
+DO $$
+BEGIN
+  -- Create enum if not exists
+  IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'store_stage') THEN
+    CREATE TYPE store_stage AS ENUM (
+      'discovered',
+      'validated',
+      'promoted',
+      'sandbox',
+      'hydrating',
+      'production',
+      'failing'
+    );
+  END IF;
+END $$;
+
+-- ============================================================
+-- UPDATE DISCOVERY LOCATIONS TABLE
+-- ============================================================
+
+-- Add stage column to discovery locations (replaces status)
+DO $$
+BEGIN
+  IF NOT EXISTS (SELECT 1 FROM information_schema.columns
+    WHERE table_name = 'dutchie_discovery_locations' AND column_name = 'stage') THEN
+    ALTER TABLE dutchie_discovery_locations ADD COLUMN stage VARCHAR(20) DEFAULT 'discovered';
+  END IF;
+END $$;
+
+-- Migrate existing status values to stage
+UPDATE dutchie_discovery_locations
+SET stage = CASE
+  WHEN status = 'discovered' THEN 'discovered'
+  WHEN status = 'verified' THEN 'validated'
+  WHEN status = 'rejected' THEN 'failing'
+  WHEN status = 'merged' THEN 'validated'
+  ELSE 'discovered'
+END
+WHERE stage IS NULL OR stage = '';
+
+-- ============================================================
+-- UPDATE CRAWLER PROFILES TABLE
+-- ============================================================
+
+-- Ensure status column exists and update to new values
+UPDATE dispensary_crawler_profiles
+SET status = CASE
+  WHEN status = 'sandbox' THEN 'sandbox'
+  WHEN status = 'production' THEN 'production'
+  WHEN status = 'needs_manual' THEN 'failing'
+  WHEN status = 'failing' THEN 'failing'
+  WHEN status = 'disabled' THEN 'failing'
+  WHEN status IS NULL THEN 'promoted'
+  ELSE 'promoted'
+END;
+
+-- ============================================================
+-- ADD STAGE TRACKING TO DISPENSARIES
+-- ============================================================
+
+DO $$
+BEGIN
+  -- Add stage column to dispensaries for quick filtering
+  IF NOT EXISTS (SELECT 1 FROM information_schema.columns
+    WHERE table_name = 'dispensaries' AND column_name = 'stage') THEN
+    ALTER TABLE dispensaries ADD COLUMN stage VARCHAR(20) DEFAULT 'promoted';
+  END IF;
+
+  -- Add stage_changed_at for tracking
+  IF NOT EXISTS (SELECT 1 FROM information_schema.columns
+    WHERE table_name = 'dispensaries' AND column_name = 'stage_changed_at') THEN
+    ALTER TABLE dispensaries ADD COLUMN stage_changed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP;
+  END IF;
+
+  -- Add first_crawl_at to track sandbox → production transition
+  IF NOT EXISTS (SELECT 1 FROM information_schema.columns
+    WHERE table_name = 'dispensaries' AND column_name = 'first_crawl_at') THEN
+    ALTER TABLE dispensaries ADD COLUMN first_crawl_at TIMESTAMP WITH TIME ZONE;
+  END IF;
+
+  -- Add last_successful_crawl_at
+  IF NOT EXISTS (SELECT 1 FROM information_schema.columns
+    WHERE table_name = 'dispensaries' AND column_name = 'last_successful_crawl_at') THEN
+    ALTER TABLE dispensaries ADD COLUMN last_successful_crawl_at TIMESTAMP WITH TIME ZONE;
+  END IF;
+END $$;
+
+-- Set initial stage for existing dispensaries based on their crawler profile status
+UPDATE dispensaries d
+SET stage = COALESCE(
+  (SELECT dcp.status FROM dispensary_crawler_profiles dcp
+   WHERE dcp.dispensary_id = d.id AND dcp.enabled = true
+   ORDER BY dcp.updated_at DESC LIMIT 1),
+  'promoted'
+)
+WHERE d.stage IS NULL OR d.stage = '';
+
+-- ============================================================
+-- INDEXES FOR STAGE-BASED QUERIES
+-- ============================================================
+
+CREATE INDEX IF NOT EXISTS idx_dispensaries_stage ON dispensaries(stage);
+CREATE INDEX IF NOT EXISTS idx_dispensaries_stage_state ON dispensaries(stage, state);
+CREATE INDEX IF NOT EXISTS idx_discovery_locations_stage ON dutchie_discovery_locations(stage);
+CREATE INDEX IF NOT EXISTS idx_crawler_profiles_status ON dispensary_crawler_profiles(status);
+
+-- ============================================================
+-- STAGE TRANSITION LOG
+-- ============================================================
+
+CREATE TABLE IF NOT EXISTS stage_transitions (
+  id SERIAL PRIMARY KEY,
+
+  -- What changed
+  entity_type VARCHAR(20) NOT NULL, -- 'discovery_location' or 'dispensary'
+  entity_id INTEGER NOT NULL,
+
+  -- Stage change
+  from_stage VARCHAR(20),
+  to_stage VARCHAR(20) NOT NULL,
+
+  -- Context
+  trigger_type VARCHAR(50) NOT NULL, -- 'api', 'scheduler', 'manual', 'auto'
+  trigger_endpoint VARCHAR(200),
+
+  -- Outcome
+  success BOOLEAN DEFAULT TRUE,
+  error_message TEXT,
+  metadata JSONB,
+
+  -- Timing
+  duration_ms INTEGER,
+  created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+);
+
+CREATE INDEX IF NOT EXISTS idx_stage_transitions_entity ON stage_transitions(entity_type, entity_id);
+CREATE INDEX IF NOT EXISTS idx_stage_transitions_to_stage ON stage_transitions(to_stage);
+CREATE INDEX IF NOT EXISTS idx_stage_transitions_created ON stage_transitions(created_at DESC);
+
+-- ============================================================
+-- COMMENTS
+-- ============================================================
+
+COMMENT ON TABLE stage_transitions IS 'Audit log for all stage transitions in the pipeline';
+COMMENT ON COLUMN dispensaries.stage IS 'Current pipeline stage: discovered, validated, promoted, sandbox, production, failing';
+COMMENT ON COLUMN dispensaries.stage_changed_at IS 'When the stage was last changed';
+COMMENT ON COLUMN dispensaries.first_crawl_at IS 'When the first crawl was attempted (sandbox stage)';
+COMMENT ON COLUMN dispensaries.last_successful_crawl_at IS 'When the last successful crawl completed';
--- a/backend/migrations/070_product_variants.sql
+++ b/backend/migrations/070_product_variants.sql
@@ -0,0 +1,239 @@
+-- ============================================================================
+-- Migration 070: Product Variants Tables
+-- ============================================================================
+--
+-- Purpose: Store variant-level pricing and inventory as first-class entities
+--          to enable time-series analytics, price comparisons, and sale tracking.
+--
+-- Enables queries like:
+--   - Price history for a specific variant (1g Blue Dream over time)
+--   - Sale frequency analysis (how often is this on special?)
+--   - Cross-store price comparison (who has cheapest 1g flower?)
+--   - Current specials across all stores
+--
+-- RULES:
+--   - STRICTLY ADDITIVE (no DROP, DELETE, TRUNCATE)
+--   - All new tables use IF NOT EXISTS
+--   - All indexes use IF NOT EXISTS
+--
+-- ============================================================================
+
+-- ============================================================================
+-- SECTION 1: PRODUCT_VARIANTS TABLE (Current State)
+-- ============================================================================
+-- One row per product+option combination. Tracks current pricing/inventory.
+
+CREATE TABLE IF NOT EXISTS product_variants (
+    id SERIAL PRIMARY KEY,
+    store_product_id INTEGER NOT NULL REFERENCES store_products(id) ON DELETE CASCADE,
+    dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
+
+    -- Variant identity (from Dutchie POSMetaData.children)
+    option VARCHAR(100) NOT NULL,              -- "1g", "3.5g", "1/8oz", "100mg"
+    canonical_sku VARCHAR(100),                -- Dutchie canonicalSKU
+    canonical_id VARCHAR(100),                 -- Dutchie canonicalID
+    canonical_name VARCHAR(500),               -- Dutchie canonicalName
+
+    -- Current pricing (in dollars, not cents)
+    price_rec NUMERIC(10,2),
+    price_med NUMERIC(10,2),
+    price_rec_special NUMERIC(10,2),
+    price_med_special NUMERIC(10,2),
+
+    -- Current inventory
+    quantity INTEGER,
+    quantity_available INTEGER,
+    in_stock BOOLEAN DEFAULT TRUE,
+
+    -- Special/sale status
+    is_on_special BOOLEAN DEFAULT FALSE,
+
+    -- Weight/size parsing (for analytics)
+    weight_value NUMERIC(10,2),               -- 1, 3.5, 28, etc.
+    weight_unit VARCHAR(20),                  -- g, oz, mg, ml, etc.
+
+    -- Timestamps
+    first_seen_at TIMESTAMPTZ DEFAULT NOW(),
+    last_seen_at TIMESTAMPTZ DEFAULT NOW(),
+    last_price_change_at TIMESTAMPTZ,
+    last_stock_change_at TIMESTAMPTZ,
+
+    created_at TIMESTAMPTZ DEFAULT NOW(),
+    updated_at TIMESTAMPTZ DEFAULT NOW(),
+
+    UNIQUE(store_product_id, option)
+);
+
+-- Indexes for common queries
+CREATE INDEX IF NOT EXISTS idx_variants_store_product ON product_variants(store_product_id);
+CREATE INDEX IF NOT EXISTS idx_variants_dispensary ON product_variants(dispensary_id);
+CREATE INDEX IF NOT EXISTS idx_variants_option ON product_variants(option);
+CREATE INDEX IF NOT EXISTS idx_variants_in_stock ON product_variants(dispensary_id, in_stock) WHERE in_stock = TRUE;
+CREATE INDEX IF NOT EXISTS idx_variants_on_special ON product_variants(dispensary_id, is_on_special) WHERE is_on_special = TRUE;
+CREATE INDEX IF NOT EXISTS idx_variants_canonical_sku ON product_variants(canonical_sku) WHERE canonical_sku IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_variants_price_rec ON product_variants(price_rec) WHERE price_rec IS NOT NULL;
+
+COMMENT ON TABLE product_variants IS 'Current state of each product variant (weight/size option). One row per product+option.';
+COMMENT ON COLUMN product_variants.option IS 'Weight/size option string from Dutchie (e.g., "1g", "3.5g", "1/8oz")';
+COMMENT ON COLUMN product_variants.canonical_sku IS 'Dutchie POS SKU for cross-store matching';
+
+
+-- ============================================================================
+-- SECTION 2: PRODUCT_VARIANT_SNAPSHOTS TABLE (Historical Data)
+-- ============================================================================
+-- Time-series data for variant pricing. One row per variant per crawl.
+-- CRITICAL: NEVER DELETE from this table.
+
+CREATE TABLE IF NOT EXISTS product_variant_snapshots (
+    id SERIAL PRIMARY KEY,
+    product_variant_id INTEGER NOT NULL REFERENCES product_variants(id) ON DELETE CASCADE,
+    store_product_id INTEGER REFERENCES store_products(id) ON DELETE SET NULL,
+    dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
+    crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
+
+    -- Variant identity (denormalized for query performance)
+    option VARCHAR(100) NOT NULL,
+
+    -- Pricing at time of capture
+    price_rec NUMERIC(10,2),
+    price_med NUMERIC(10,2),
+    price_rec_special NUMERIC(10,2),
+    price_med_special NUMERIC(10,2),
+
+    -- Inventory at time of capture
+    quantity INTEGER,
+    in_stock BOOLEAN DEFAULT TRUE,
+
+    -- Special status at time of capture
+    is_on_special BOOLEAN DEFAULT FALSE,
+
+    -- Feed presence (FALSE = variant missing from crawl)
+    is_present_in_feed BOOLEAN DEFAULT TRUE,
+
+    -- Capture timestamp
+    captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+
+    created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- Indexes for time-series queries
+CREATE INDEX IF NOT EXISTS idx_variant_snapshots_variant ON product_variant_snapshots(product_variant_id, captured_at DESC);
+CREATE INDEX IF NOT EXISTS idx_variant_snapshots_dispensary ON product_variant_snapshots(dispensary_id, captured_at DESC);
+CREATE INDEX IF NOT EXISTS idx_variant_snapshots_crawl ON product_variant_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_variant_snapshots_captured ON product_variant_snapshots(captured_at DESC);
+CREATE INDEX IF NOT EXISTS idx_variant_snapshots_special ON product_variant_snapshots(is_on_special, captured_at DESC) WHERE is_on_special = TRUE;
+CREATE INDEX IF NOT EXISTS idx_variant_snapshots_option ON product_variant_snapshots(option, captured_at DESC);
+
+COMMENT ON TABLE product_variant_snapshots IS 'Historical variant pricing/inventory. One row per variant per crawl. NEVER DELETE.';
+
+
+-- ============================================================================
+-- SECTION 3: USEFUL VIEWS
+-- ============================================================================
+
+-- View: Current specials across all stores
+CREATE OR REPLACE VIEW v_current_specials AS
+SELECT
+    pv.id as variant_id,
+    sp.id as product_id,
+    sp.name_raw as product_name,
+    sp.brand_name_raw as brand_name,
+    sp.category_raw as category,
+    d.id as dispensary_id,
+    d.name as dispensary_name,
+    d.city,
+    d.state,
+    pv.option,
+    pv.price_rec,
+    pv.price_rec_special,
+    ROUND(((pv.price_rec - pv.price_rec_special) / NULLIF(pv.price_rec, 0)) * 100, 1) as discount_percent,
+    pv.quantity,
+    pv.in_stock,
+    pv.last_seen_at
+FROM product_variants pv
+JOIN store_products sp ON sp.id = pv.store_product_id
+JOIN dispensaries d ON d.id = pv.dispensary_id
+WHERE pv.is_on_special = TRUE
+  AND pv.in_stock = TRUE
+  AND pv.price_rec_special IS NOT NULL
+  AND pv.price_rec_special < pv.price_rec;
+
+COMMENT ON VIEW v_current_specials IS 'All products currently on special across all stores';
+
+
+-- View: Price comparison for a product across stores
+CREATE OR REPLACE VIEW v_price_comparison AS
+SELECT
+    sp.name_raw as product_name,
+    sp.brand_name_raw as brand_name,
+    sp.category_raw as category,
+    pv.option,
+    d.id as dispensary_id,
+    d.name as dispensary_name,
+    d.city,
+    pv.price_rec,
+    pv.price_rec_special,
+    pv.is_on_special,
+    pv.in_stock,
+    pv.quantity,
+    RANK() OVER (PARTITION BY sp.name_raw, pv.option ORDER BY COALESCE(pv.price_rec_special, pv.price_rec) ASC) as price_rank
+FROM product_variants pv
+JOIN store_products sp ON sp.id = pv.store_product_id
+JOIN dispensaries d ON d.id = pv.dispensary_id
+WHERE pv.in_stock = TRUE
+  AND (pv.price_rec IS NOT NULL OR pv.price_rec_special IS NOT NULL);
+
+COMMENT ON VIEW v_price_comparison IS 'Compare prices for same product across stores, ranked by price';
+
+
+-- View: Latest snapshot per variant
+CREATE OR REPLACE VIEW v_latest_variant_snapshots AS
+SELECT DISTINCT ON (product_variant_id)
+    pvs.*
+FROM product_variant_snapshots pvs
+ORDER BY product_variant_id, captured_at DESC;
+
+
+-- ============================================================================
+-- SECTION 4: HELPER FUNCTION FOR SALE FREQUENCY
+-- ============================================================================
+
+-- Function to calculate sale frequency for a variant
+CREATE OR REPLACE FUNCTION get_variant_sale_stats(p_variant_id INTEGER, p_days INTEGER DEFAULT 30)
+RETURNS TABLE (
+    total_snapshots BIGINT,
+    times_on_special BIGINT,
+    special_frequency_pct NUMERIC,
+    avg_discount_pct NUMERIC,
+    min_price NUMERIC,
+    max_price NUMERIC,
+    avg_price NUMERIC
+) AS $$
+BEGIN
+    RETURN QUERY
+    SELECT
+        COUNT(*)::BIGINT as total_snapshots,
+        COUNT(*) FILTER (WHERE is_on_special)::BIGINT as times_on_special,
+        ROUND((COUNT(*) FILTER (WHERE is_on_special)::NUMERIC / NULLIF(COUNT(*), 0)) * 100, 1) as special_frequency_pct,
+        ROUND(AVG(
+            CASE WHEN is_on_special AND price_rec_special IS NOT NULL AND price_rec IS NOT NULL
+            THEN ((price_rec - price_rec_special) / NULLIF(price_rec, 0)) * 100
+            END
+        ), 1) as avg_discount_pct,
+        MIN(COALESCE(price_rec_special, price_rec)) as min_price,
+        MAX(price_rec) as max_price,
+        ROUND(AVG(COALESCE(price_rec_special, price_rec)), 2) as avg_price
+    FROM product_variant_snapshots
+    WHERE product_variant_id = p_variant_id
+      AND captured_at >= NOW() - (p_days || ' days')::INTERVAL;
+END;
+$$ LANGUAGE plpgsql;
+
+COMMENT ON FUNCTION get_variant_sale_stats IS 'Get sale frequency and price stats for a variant over N days';
+
+
+-- ============================================================================
+-- DONE
+-- ============================================================================
+
+SELECT 'Migration 070 completed. Product variants tables ready for time-series analytics.' AS status;
--- a/backend/migrations/071_harmonize_store_products.sql
+++ b/backend/migrations/071_harmonize_store_products.sql
@@ -0,0 +1,53 @@
+-- Migration 071: Harmonize store_products with dutchie_products
+-- Adds missing columns to store_products to consolidate on a single canonical table
+
+-- Product details
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS description TEXT;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weight VARCHAR(50);
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weights JSONB;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS measurements JSONB;
+
+-- Cannabinoid/terpene data
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS effects JSONB;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS terpenes JSONB;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabinoids_v2 JSONB;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS thc_content NUMERIC(10,4);
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cbd_content NUMERIC(10,4);
+
+-- Images
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS images JSONB;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS primary_image_url TEXT;
+
+-- Inventory
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS total_quantity_available INTEGER DEFAULT 0;
+
+-- Status/flags
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS status VARCHAR(50);
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS featured BOOLEAN DEFAULT FALSE;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS coming_soon BOOLEAN DEFAULT FALSE;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost BOOLEAN DEFAULT FALSE;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost_at TIMESTAMP WITH TIME ZONE;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_restored_at TIMESTAMP WITH TIME ZONE;
+
+-- Threshold flags (Dutchie-specific)
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_threshold BOOLEAN DEFAULT FALSE;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_threshold BOOLEAN DEFAULT FALSE;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS certificate_of_analysis_enabled BOOLEAN DEFAULT FALSE;
+
+-- Platform metadata
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS external_product_id VARCHAR(100);
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS c_name VARCHAR(500);
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS past_c_names TEXT[];
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS latest_raw_payload JSONB;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS created_at_platform TIMESTAMP WITH TIME ZONE;
+ALTER TABLE store_products ADD COLUMN IF NOT EXISTS updated_at_platform TIMESTAMP WITH TIME ZONE;
+
+-- Indexes for common queries
+CREATE INDEX IF NOT EXISTS idx_store_products_external_id ON store_products(external_product_id);
+CREATE INDEX IF NOT EXISTS idx_store_products_visibility_lost ON store_products(visibility_lost) WHERE visibility_lost = TRUE;
+CREATE INDEX IF NOT EXISTS idx_store_products_status ON store_products(status);
+
+-- Add comment
+COMMENT ON TABLE store_products IS 'Canonical product table - consolidated from dutchie_products';
--- a/backend/migrations/072_product_views.sql
+++ b/backend/migrations/072_product_views.sql
@@ -0,0 +1,74 @@
+-- Migration 072: Create compatibility views for store_products and store_product_snapshots
+-- These views provide backward-compatible column names for API routes
+
+-- v_products view - aliases store_products columns to match legacy dutchie_products naming
+CREATE OR REPLACE VIEW v_products AS
+SELECT
+  id,
+  dispensary_id,
+  provider_product_id as external_product_id,
+  provider_product_id as dutchie_id,
+  name_raw as name,
+  brand_name_raw as brand_name,
+  category_raw as type,
+  subcategory_raw as subcategory,
+  strain_type,
+  thc_percent as thc,
+  cbd_percent as cbd,
+  stock_status,
+  is_in_stock,
+  stock_quantity,
+  image_url,
+  primary_image_url,
+  images,
+  effects,
+  description,
+  is_on_special,
+  featured,
+  medical_only,
+  rec_only,
+  external_product_id as external_id,
+  provider,
+  created_at,
+  updated_at
+FROM store_products;
+
+-- v_product_snapshots view - aliases store_product_snapshots columns to match legacy naming
+CREATE OR REPLACE VIEW v_product_snapshots AS
+SELECT
+  id,
+  store_product_id,
+  dispensary_id,
+  provider,
+  provider_product_id,
+  crawl_run_id,
+  captured_at as crawled_at,
+  name_raw,
+  brand_name_raw,
+  category_raw,
+  subcategory_raw,
+  -- Convert price_rec (dollars) to rec_min_price_cents (cents)
+  CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_min_price_cents,
+  CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_max_price_cents,
+  CASE WHEN price_rec_special IS NOT NULL THEN (price_rec_special * 100)::integer END as rec_min_special_price_cents,
+  CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_min_price_cents,
+  CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_max_price_cents,
+  CASE WHEN price_med_special IS NOT NULL THEN (price_med_special * 100)::integer END as med_min_special_price_cents,
+  is_on_special as special,
+  discount_percent,
+  is_in_stock,
+  stock_quantity,
+  stock_status,
+  stock_quantity as total_quantity_available,
+  thc_percent,
+  cbd_percent,
+  image_url,
+  raw_data as options,
+  created_at
+FROM store_product_snapshots;
+
+-- Add indexes for the views' underlying tables
+CREATE INDEX IF NOT EXISTS idx_store_products_dispensary ON store_products(dispensary_id);
+CREATE INDEX IF NOT EXISTS idx_store_products_stock ON store_products(stock_status);
+CREATE INDEX IF NOT EXISTS idx_store_snapshots_product ON store_product_snapshots(store_product_id);
+CREATE INDEX IF NOT EXISTS idx_store_snapshots_captured ON store_product_snapshots(captured_at DESC);
--- a/backend/migrations/073_proxy_timezone.sql
+++ b/backend/migrations/073_proxy_timezone.sql
@@ -0,0 +1,12 @@
+-- Add timezone column to proxies table for geo-consistent fingerprinting
+-- This allows matching Accept-Language and other headers to proxy location
+
+ALTER TABLE proxies
+ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
+
+-- Add timezone to failed_proxies as well
+ALTER TABLE failed_proxies
+ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
+
+-- Comment explaining usage
+COMMENT ON COLUMN proxies.timezone IS 'IANA timezone (e.g., America/Phoenix) for geo-consistent fingerprinting';
--- a/backend/migrations/074_worker_commands.sql
+++ b/backend/migrations/074_worker_commands.sql
@@ -0,0 +1,27 @@
+-- Migration: Worker Commands Table
+-- Purpose: Store commands for workers (decommission, etc.)
+-- Workers poll this table after each task to check for commands
+
+CREATE TABLE IF NOT EXISTS worker_commands (
+  id SERIAL PRIMARY KEY,
+  worker_id TEXT NOT NULL,
+  command TEXT NOT NULL,  -- 'decommission', 'pause', 'resume'
+  reason TEXT,
+  issued_by TEXT,
+  issued_at TIMESTAMPTZ DEFAULT NOW(),
+  acknowledged_at TIMESTAMPTZ,
+  executed_at TIMESTAMPTZ,
+  status TEXT DEFAULT 'pending'  -- 'pending', 'acknowledged', 'executed', 'cancelled'
+);
+
+-- Index for worker lookups
+CREATE INDEX IF NOT EXISTS idx_worker_commands_worker_id ON worker_commands(worker_id);
+CREATE INDEX IF NOT EXISTS idx_worker_commands_pending ON worker_commands(worker_id, status) WHERE status = 'pending';
+
+-- Add decommission_requested column to worker_registry for quick checks
+ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested BOOLEAN DEFAULT FALSE;
+ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_reason TEXT;
+ALTER TABLE worker_registry ADD COLUMN IF NOT EXISTS decommission_requested_at TIMESTAMPTZ;
+
+-- Comment
+COMMENT ON TABLE worker_commands IS 'Commands issued to workers (decommission after task, pause, etc.)';
--- a/backend/migrations/074_worker_task_queue.sql
+++ b/backend/migrations/074_worker_task_queue.sql
@@ -0,0 +1,322 @@
+-- Migration 074: Worker Task Queue System
+-- Implements role-based task queue with per-store locking and capacity tracking
+
+-- Task queue table
+CREATE TABLE IF NOT EXISTS worker_tasks (
+  id SERIAL PRIMARY KEY,
+
+  -- Task identification
+  role VARCHAR(50) NOT NULL,           -- store_discovery, entry_point_discovery, product_discovery, product_resync, analytics_refresh
+  dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE CASCADE,
+  platform VARCHAR(20),                 -- dutchie, jane, treez, etc.
+
+  -- Task state
+  status VARCHAR(20) NOT NULL DEFAULT 'pending',
+  priority INTEGER DEFAULT 0,           -- Higher = more urgent
+
+  -- Scheduling
+  scheduled_for TIMESTAMPTZ,            -- For batch scheduling (e.g., every 4 hours)
+
+  -- Ownership
+  worker_id VARCHAR(100),               -- Pod name or worker ID
+  claimed_at TIMESTAMPTZ,
+  started_at TIMESTAMPTZ,
+  completed_at TIMESTAMPTZ,
+  last_heartbeat_at TIMESTAMPTZ,
+
+  -- Results
+  result JSONB,                         -- Task output data
+  error_message TEXT,
+  retry_count INTEGER DEFAULT 0,
+  max_retries INTEGER DEFAULT 3,
+
+  -- Metadata
+  created_at TIMESTAMPTZ DEFAULT NOW(),
+  updated_at TIMESTAMPTZ DEFAULT NOW(),
+
+  -- Constraints
+  CONSTRAINT valid_status CHECK (status IN ('pending', 'claimed', 'running', 'completed', 'failed', 'stale'))
+);
+
+-- Indexes for efficient task claiming
+CREATE INDEX IF NOT EXISTS idx_worker_tasks_pending
+  ON worker_tasks(role, priority DESC, created_at ASC)
+  WHERE status = 'pending';
+
+CREATE INDEX IF NOT EXISTS idx_worker_tasks_claimed
+  ON worker_tasks(worker_id, claimed_at)
+  WHERE status = 'claimed';
+
+CREATE INDEX IF NOT EXISTS idx_worker_tasks_running
+  ON worker_tasks(worker_id, last_heartbeat_at)
+  WHERE status = 'running';
+
+CREATE INDEX IF NOT EXISTS idx_worker_tasks_dispensary
+  ON worker_tasks(dispensary_id)
+  WHERE dispensary_id IS NOT NULL;
+
+CREATE INDEX IF NOT EXISTS idx_worker_tasks_scheduled
+  ON worker_tasks(scheduled_for)
+  WHERE status = 'pending' AND scheduled_for IS NOT NULL;
+
+CREATE INDEX IF NOT EXISTS idx_worker_tasks_history
+  ON worker_tasks(role, completed_at DESC)
+  WHERE status IN ('completed', 'failed');
+
+-- Partial unique index to prevent duplicate active tasks per store
+-- Only one task can be claimed/running for a given dispensary at a time
+CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_tasks_unique_active_store
+  ON worker_tasks(dispensary_id)
+  WHERE status IN ('claimed', 'running') AND dispensary_id IS NOT NULL;
+
+-- Worker registration table (tracks active workers)
+CREATE TABLE IF NOT EXISTS worker_registry (
+  id SERIAL PRIMARY KEY,
+  worker_id VARCHAR(100) UNIQUE NOT NULL,
+  role VARCHAR(50) NOT NULL,
+  pod_name VARCHAR(100),
+  hostname VARCHAR(100),
+  started_at TIMESTAMPTZ DEFAULT NOW(),
+  last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(),
+  tasks_completed INTEGER DEFAULT 0,
+  tasks_failed INTEGER DEFAULT 0,
+  status VARCHAR(20) DEFAULT 'active',
+
+  CONSTRAINT valid_worker_status CHECK (status IN ('active', 'idle', 'offline'))
+);
+
+CREATE INDEX IF NOT EXISTS idx_worker_registry_role
+  ON worker_registry(role, status);
+
+CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat
+  ON worker_registry(last_heartbeat_at)
+  WHERE status = 'active';
+
+-- Task completion tracking (summarized history)
+CREATE TABLE IF NOT EXISTS task_completion_log (
+  id SERIAL PRIMARY KEY,
+  role VARCHAR(50) NOT NULL,
+  date DATE NOT NULL DEFAULT CURRENT_DATE,
+  hour INTEGER NOT NULL DEFAULT EXTRACT(HOUR FROM NOW()),
+
+  tasks_created INTEGER DEFAULT 0,
+  tasks_completed INTEGER DEFAULT 0,
+  tasks_failed INTEGER DEFAULT 0,
+
+  avg_duration_sec NUMERIC(10,2),
+  min_duration_sec NUMERIC(10,2),
+  max_duration_sec NUMERIC(10,2),
+
+  updated_at TIMESTAMPTZ DEFAULT NOW(),
+
+  UNIQUE(role, date, hour)
+);
+
+-- Capacity planning view
+CREATE OR REPLACE VIEW v_worker_capacity AS
+SELECT
+  role,
+  COUNT(*) FILTER (WHERE status = 'pending') as pending_tasks,
+  COUNT(*) FILTER (WHERE status = 'pending' AND (scheduled_for IS NULL OR scheduled_for <= NOW())) as ready_tasks,
+  COUNT(*) FILTER (WHERE status = 'claimed') as claimed_tasks,
+  COUNT(*) FILTER (WHERE status = 'running') as running_tasks,
+  COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') as completed_last_hour,
+  COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '1 hour') as failed_last_hour,
+  COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) as active_workers,
+  AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
+    FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') as avg_duration_sec,
+  -- Capacity planning metrics
+  CASE
+    WHEN COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') > 0
+    THEN 3600.0 / NULLIF(AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
+      FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour'), 0)
+    ELSE NULL
+  END as tasks_per_worker_hour,
+  -- Estimated time to drain queue
+  CASE
+    WHEN COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) > 0
+      AND COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') > 0
+    THEN COUNT(*) FILTER (WHERE status = 'pending') / NULLIF(
+      COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) *
+      (3600.0 / NULLIF(AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
+        FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour'), 0)),
+      0
+    )
+    ELSE NULL
+  END as estimated_hours_to_drain
+FROM worker_tasks
+GROUP BY role;
+
+-- Task history view (for UI)
+CREATE OR REPLACE VIEW v_task_history AS
+SELECT
+  t.id,
+  t.role,
+  t.dispensary_id,
+  d.name as dispensary_name,
+  t.platform,
+  t.status,
+  t.priority,
+  t.worker_id,
+  t.scheduled_for,
+  t.claimed_at,
+  t.started_at,
+  t.completed_at,
+  t.error_message,
+  t.retry_count,
+  t.created_at,
+  EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
+FROM worker_tasks t
+LEFT JOIN dispensaries d ON d.id = t.dispensary_id
+ORDER BY t.created_at DESC;
+
+-- Function to claim a task atomically
+CREATE OR REPLACE FUNCTION claim_task(
+  p_role VARCHAR(50),
+  p_worker_id VARCHAR(100)
+) RETURNS worker_tasks AS $$
+DECLARE
+  claimed_task worker_tasks;
+BEGIN
+  UPDATE worker_tasks
+  SET
+    status = 'claimed',
+    worker_id = p_worker_id,
+    claimed_at = NOW(),
+    updated_at = NOW()
+  WHERE id = (
+    SELECT id FROM worker_tasks
+    WHERE role = p_role
+      AND status = 'pending'
+      AND (scheduled_for IS NULL OR scheduled_for <= NOW())
+      -- Exclude stores that already have an active task
+      AND (dispensary_id IS NULL OR dispensary_id NOT IN (
+        SELECT dispensary_id FROM worker_tasks
+        WHERE status IN ('claimed', 'running')
+        AND dispensary_id IS NOT NULL
+      ))
+    ORDER BY priority DESC, created_at ASC
+    LIMIT 1
+    FOR UPDATE SKIP LOCKED
+  )
+  RETURNING * INTO claimed_task;
+
+  RETURN claimed_task;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Function to mark stale tasks (workers that died)
+CREATE OR REPLACE FUNCTION recover_stale_tasks(
+  stale_threshold_minutes INTEGER DEFAULT 10
+) RETURNS INTEGER AS $$
+DECLARE
+  recovered_count INTEGER;
+BEGIN
+  WITH stale AS (
+    UPDATE worker_tasks
+    SET
+      status = 'pending',
+      worker_id = NULL,
+      claimed_at = NULL,
+      started_at = NULL,
+      retry_count = retry_count + 1,
+      updated_at = NOW()
+    WHERE status IN ('claimed', 'running')
+      AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
+      AND retry_count < max_retries
+    RETURNING id
+  )
+  SELECT COUNT(*) INTO recovered_count FROM stale;
+
+  -- Mark tasks that exceeded retries as failed
+  UPDATE worker_tasks
+  SET
+    status = 'failed',
+    error_message = 'Exceeded max retries after worker failures',
+    completed_at = NOW(),
+    updated_at = NOW()
+  WHERE status IN ('claimed', 'running')
+    AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
+    AND retry_count >= max_retries;
+
+  RETURN recovered_count;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Function to generate daily resync tasks
+CREATE OR REPLACE FUNCTION generate_resync_tasks(
+  p_batches_per_day INTEGER DEFAULT 6,  -- Every 4 hours
+  p_date DATE DEFAULT CURRENT_DATE
+) RETURNS INTEGER AS $$
+DECLARE
+  store_count INTEGER;
+  stores_per_batch INTEGER;
+  batch_num INTEGER;
+  scheduled_time TIMESTAMPTZ;
+  created_count INTEGER := 0;
+BEGIN
+  -- Count active stores that need resync
+  SELECT COUNT(*) INTO store_count
+  FROM dispensaries
+  WHERE crawl_enabled = true
+    AND menu_type = 'dutchie'
+    AND platform_dispensary_id IS NOT NULL;
+
+  IF store_count = 0 THEN
+    RETURN 0;
+  END IF;
+
+  stores_per_batch := CEIL(store_count::NUMERIC / p_batches_per_day);
+
+  FOR batch_num IN 0..(p_batches_per_day - 1) LOOP
+    scheduled_time := p_date + (batch_num * 4 || ' hours')::INTERVAL;
+
+    INSERT INTO worker_tasks (role, dispensary_id, platform, scheduled_for, priority)
+    SELECT
+      'product_resync',
+      d.id,
+      'dutchie',
+      scheduled_time,
+      0
+    FROM (
+      SELECT id, ROW_NUMBER() OVER (ORDER BY id) as rn
+      FROM dispensaries
+      WHERE crawl_enabled = true
+        AND menu_type = 'dutchie'
+        AND platform_dispensary_id IS NOT NULL
+    ) d
+    WHERE d.rn > (batch_num * stores_per_batch)
+      AND d.rn <= ((batch_num + 1) * stores_per_batch)
+    ON CONFLICT DO NOTHING;
+
+    GET DIAGNOSTICS created_count = created_count + ROW_COUNT;
+  END LOOP;
+
+  RETURN created_count;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Trigger to update timestamp
+CREATE OR REPLACE FUNCTION update_worker_tasks_timestamp()
+RETURNS TRIGGER AS $$
+BEGIN
+  NEW.updated_at = NOW();
+  RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS worker_tasks_updated_at ON worker_tasks;
+CREATE TRIGGER worker_tasks_updated_at
+  BEFORE UPDATE ON worker_tasks
+  FOR EACH ROW
+  EXECUTE FUNCTION update_worker_tasks_timestamp();
+
+-- Comments
+COMMENT ON TABLE worker_tasks IS 'Central task queue for all worker roles';
+COMMENT ON TABLE worker_registry IS 'Registry of active workers and their stats';
+COMMENT ON TABLE task_completion_log IS 'Hourly aggregated task completion metrics';
+COMMENT ON VIEW v_worker_capacity IS 'Real-time capacity planning metrics per role';
+COMMENT ON VIEW v_task_history IS 'Task history with dispensary details for UI';
+COMMENT ON FUNCTION claim_task IS 'Atomically claim a task for a worker, respecting per-store locking';
+COMMENT ON FUNCTION recover_stale_tasks IS 'Release tasks from dead workers back to pending';
+COMMENT ON FUNCTION generate_resync_tasks IS 'Generate daily product resync tasks in batches';
--- a/backend/migrations/075_consecutive_misses.sql
+++ b/backend/migrations/075_consecutive_misses.sql
@@ -0,0 +1,13 @@
+-- Migration 075: Add consecutive_misses column to store_products
+-- Used to track how many consecutive crawls a product has been missing from the feed
+-- After 3 consecutive misses, product is marked as OOS
+
+ALTER TABLE store_products
+ADD COLUMN IF NOT EXISTS consecutive_misses INTEGER NOT NULL DEFAULT 0;
+
+-- Index for finding products that need OOS check
+CREATE INDEX IF NOT EXISTS idx_store_products_consecutive_misses
+ON store_products (dispensary_id, consecutive_misses)
+WHERE consecutive_misses > 0;
+
+COMMENT ON COLUMN store_products.consecutive_misses IS 'Number of consecutive crawls where product was not in feed. Reset to 0 when seen. At 3, mark OOS.';
--- a/backend/migrations/076_visitor_analytics.sql
+++ b/backend/migrations/076_visitor_analytics.sql
@@ -0,0 +1,71 @@
+-- Visitor location analytics for Findagram
+-- Tracks visitor locations to understand popular areas
+
+CREATE TABLE IF NOT EXISTS visitor_locations (
+  id SERIAL PRIMARY KEY,
+
+  -- Location data (from IP lookup)
+  ip_hash VARCHAR(64),           -- Hashed IP for privacy (SHA256)
+  city VARCHAR(100),
+  state VARCHAR(100),
+  state_code VARCHAR(10),
+  country VARCHAR(100),
+  country_code VARCHAR(10),
+  latitude DECIMAL(10, 7),
+  longitude DECIMAL(10, 7),
+
+  -- Visit metadata
+  domain VARCHAR(50) NOT NULL,    -- 'findagram.co', 'findadispo.com', etc.
+  page_path VARCHAR(255),         -- '/products', '/dispensaries/123', etc.
+  referrer VARCHAR(500),
+  user_agent VARCHAR(500),
+
+  -- Session tracking
+  session_id VARCHAR(64),         -- For grouping page views in a session
+
+  -- Timestamps
+  created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- Indexes for analytics queries
+CREATE INDEX IF NOT EXISTS idx_visitor_locations_domain ON visitor_locations(domain);
+CREATE INDEX IF NOT EXISTS idx_visitor_locations_city_state ON visitor_locations(city, state_code);
+CREATE INDEX IF NOT EXISTS idx_visitor_locations_created_at ON visitor_locations(created_at);
+CREATE INDEX IF NOT EXISTS idx_visitor_locations_session ON visitor_locations(session_id);
+
+-- Aggregated daily stats (materialized for performance)
+CREATE TABLE IF NOT EXISTS visitor_location_stats (
+  id SERIAL PRIMARY KEY,
+  date DATE NOT NULL,
+  domain VARCHAR(50) NOT NULL,
+  city VARCHAR(100),
+  state VARCHAR(100),
+  state_code VARCHAR(10),
+  country_code VARCHAR(10),
+
+  -- Metrics
+  visit_count INTEGER DEFAULT 0,
+  unique_sessions INTEGER DEFAULT 0,
+
+  UNIQUE(date, domain, city, state_code, country_code)
+);
+
+CREATE INDEX IF NOT EXISTS idx_visitor_stats_date ON visitor_location_stats(date);
+CREATE INDEX IF NOT EXISTS idx_visitor_stats_domain ON visitor_location_stats(domain);
+CREATE INDEX IF NOT EXISTS idx_visitor_stats_state ON visitor_location_stats(state_code);
+
+-- View for easy querying of top locations
+CREATE OR REPLACE VIEW v_top_visitor_locations AS
+SELECT
+  domain,
+  city,
+  state,
+  state_code,
+  country_code,
+  COUNT(*) as total_visits,
+  COUNT(DISTINCT session_id) as unique_sessions,
+  MAX(created_at) as last_visit
+FROM visitor_locations
+WHERE created_at > NOW() - INTERVAL '30 days'
+GROUP BY domain, city, state, state_code, country_code
+ORDER BY total_visits DESC;
--- a/backend/migrations/076_worker_registry.sql
+++ b/backend/migrations/076_worker_registry.sql
@@ -0,0 +1,141 @@
+-- Migration 076: Worker Registry for Dynamic Workers
+-- Workers register on startup, receive a friendly name, and report heartbeats
+
+-- Name pool for workers (expandable, no hardcoding)
+CREATE TABLE IF NOT EXISTS worker_name_pool (
+  id SERIAL PRIMARY KEY,
+  name VARCHAR(50) UNIQUE NOT NULL,
+  in_use BOOLEAN DEFAULT FALSE,
+  assigned_to VARCHAR(100), -- worker_id
+  assigned_at TIMESTAMPTZ,
+  created_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- Seed with initial names (can add more via API)
+INSERT INTO worker_name_pool (name) VALUES
+  ('Alice'), ('Bella'), ('Clara'), ('Diana'), ('Elena'),
+  ('Fiona'), ('Grace'), ('Hazel'), ('Iris'), ('Julia'),
+  ('Katie'), ('Luna'), ('Mia'), ('Nora'), ('Olive'),
+  ('Pearl'), ('Quinn'), ('Rosa'), ('Sara'), ('Tara'),
+  ('Uma'), ('Vera'), ('Wendy'), ('Xena'), ('Yuki'), ('Zara'),
+  ('Amber'), ('Blake'), ('Coral'), ('Dawn'), ('Echo'),
+  ('Fleur'), ('Gem'), ('Haven'), ('Ivy'), ('Jade'),
+  ('Kira'), ('Lotus'), ('Maple'), ('Nova'), ('Onyx'),
+  ('Pixel'), ('Quest'), ('Raven'), ('Sage'), ('Terra'),
+  ('Unity'), ('Violet'), ('Willow'), ('Xylo'), ('Yara'), ('Zen')
+ON CONFLICT (name) DO NOTHING;
+
+-- Worker registry - tracks active workers
+CREATE TABLE IF NOT EXISTS worker_registry (
+  id SERIAL PRIMARY KEY,
+  worker_id VARCHAR(100) UNIQUE NOT NULL,  -- e.g., "pod-abc123" or uuid
+  friendly_name VARCHAR(50),                -- assigned from pool
+  role VARCHAR(50) NOT NULL,                -- task role
+  pod_name VARCHAR(100),                    -- k8s pod name
+  hostname VARCHAR(100),                    -- machine hostname
+  ip_address VARCHAR(50),                   -- worker IP
+  status VARCHAR(20) DEFAULT 'starting',    -- starting, active, idle, offline, terminated
+  started_at TIMESTAMPTZ DEFAULT NOW(),
+  last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(),
+  last_task_at TIMESTAMPTZ,
+  tasks_completed INTEGER DEFAULT 0,
+  tasks_failed INTEGER DEFAULT 0,
+  current_task_id INTEGER,
+  metadata JSONB DEFAULT '{}',
+  created_at TIMESTAMPTZ DEFAULT NOW(),
+  updated_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- Indexes for worker registry
+CREATE INDEX IF NOT EXISTS idx_worker_registry_status ON worker_registry(status);
+CREATE INDEX IF NOT EXISTS idx_worker_registry_role ON worker_registry(role);
+CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat ON worker_registry(last_heartbeat_at);
+
+-- Function to assign a name to a new worker
+CREATE OR REPLACE FUNCTION assign_worker_name(p_worker_id VARCHAR(100))
+RETURNS VARCHAR(50) AS $$
+DECLARE
+  v_name VARCHAR(50);
+BEGIN
+  -- Try to get an unused name
+  UPDATE worker_name_pool
+  SET in_use = TRUE, assigned_to = p_worker_id, assigned_at = NOW()
+  WHERE id = (
+    SELECT id FROM worker_name_pool
+    WHERE in_use = FALSE
+    ORDER BY RANDOM()
+    LIMIT 1
+    FOR UPDATE SKIP LOCKED
+  )
+  RETURNING name INTO v_name;
+
+  -- If no names available, generate one
+  IF v_name IS NULL THEN
+    v_name := 'Worker-' || SUBSTRING(p_worker_id FROM 1 FOR 8);
+  END IF;
+
+  RETURN v_name;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Function to release a worker's name back to the pool
+CREATE OR REPLACE FUNCTION release_worker_name(p_worker_id VARCHAR(100))
+RETURNS VOID AS $$
+BEGIN
+  UPDATE worker_name_pool
+  SET in_use = FALSE, assigned_to = NULL, assigned_at = NULL
+  WHERE assigned_to = p_worker_id;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Function to mark stale workers as offline
+CREATE OR REPLACE FUNCTION mark_stale_workers(stale_threshold_minutes INTEGER DEFAULT 5)
+RETURNS INTEGER AS $$
+DECLARE
+  v_count INTEGER;
+BEGIN
+  UPDATE worker_registry
+  SET status = 'offline', updated_at = NOW()
+  WHERE status IN ('active', 'idle', 'starting')
+    AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
+  RETURNING COUNT(*) INTO v_count;
+
+  -- Release names from offline workers
+  PERFORM release_worker_name(worker_id)
+  FROM worker_registry
+  WHERE status = 'offline'
+    AND last_heartbeat_at < NOW() - INTERVAL '30 minutes';
+
+  RETURN COALESCE(v_count, 0);
+END;
+$$ LANGUAGE plpgsql;
+
+-- View for dashboard
+CREATE OR REPLACE VIEW v_active_workers AS
+SELECT
+  wr.id,
+  wr.worker_id,
+  wr.friendly_name,
+  wr.role,
+  wr.status,
+  wr.pod_name,
+  wr.hostname,
+  wr.started_at,
+  wr.last_heartbeat_at,
+  wr.last_task_at,
+  wr.tasks_completed,
+  wr.tasks_failed,
+  wr.current_task_id,
+  EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
+  CASE
+    WHEN wr.status = 'offline' THEN 'offline'
+    WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
+    WHEN wr.current_task_id IS NOT NULL THEN 'busy'
+    ELSE 'ready'
+  END as health_status
+FROM worker_registry wr
+WHERE wr.status != 'terminated'
+ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
+
+COMMENT ON TABLE worker_registry IS 'Tracks all workers that have registered with the system';
+COMMENT ON TABLE worker_name_pool IS 'Pool of friendly names for workers - expandable via API';
--- a/backend/migrations/077_click_events_location.sql
+++ b/backend/migrations/077_click_events_location.sql
@@ -0,0 +1,35 @@
+-- Migration: Add visitor location and dispensary name to click events
+-- Captures where visitors are clicking from and which dispensary
+
+-- Add visitor location columns
+ALTER TABLE product_click_events
+ADD COLUMN IF NOT EXISTS visitor_city VARCHAR(100);
+
+ALTER TABLE product_click_events
+ADD COLUMN IF NOT EXISTS visitor_state VARCHAR(10);
+
+ALTER TABLE product_click_events
+ADD COLUMN IF NOT EXISTS visitor_lat DECIMAL(10, 7);
+
+ALTER TABLE product_click_events
+ADD COLUMN IF NOT EXISTS visitor_lng DECIMAL(10, 7);
+
+-- Add dispensary name for easier reporting
+ALTER TABLE product_click_events
+ADD COLUMN IF NOT EXISTS dispensary_name VARCHAR(255);
+
+-- Create index for location-based analytics
+CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_state
+ON product_click_events(visitor_state)
+WHERE visitor_state IS NOT NULL;
+
+CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_city
+ON product_click_events(visitor_city)
+WHERE visitor_city IS NOT NULL;
+
+-- Add comments
+COMMENT ON COLUMN product_click_events.visitor_city IS 'City where the visitor is located (from IP geolocation)';
+COMMENT ON COLUMN product_click_events.visitor_state IS 'State where the visitor is located (from IP geolocation)';
+COMMENT ON COLUMN product_click_events.visitor_lat IS 'Visitor latitude (from IP geolocation)';
+COMMENT ON COLUMN product_click_events.visitor_lng IS 'Visitor longitude (from IP geolocation)';
+COMMENT ON COLUMN product_click_events.dispensary_name IS 'Name of the dispensary (denormalized for easier reporting)';
--- a/backend/migrations/078_proxy_consecutive_403.sql
+++ b/backend/migrations/078_proxy_consecutive_403.sql
@@ -0,0 +1,8 @@
+-- Migration 078: Add consecutive_403_count to proxies table
+-- Per workflow-12102025.md: Track consecutive 403s per proxy
+-- After 3 consecutive 403s with different fingerprints → disable proxy
+
+ALTER TABLE proxies ADD COLUMN IF NOT EXISTS consecutive_403_count INTEGER DEFAULT 0;
+
+-- Add comment explaining the column
+COMMENT ON COLUMN proxies.consecutive_403_count IS 'Tracks consecutive 403 blocks. Reset to 0 on success. Proxy disabled at 3.';
--- a/backend/migrations/079_task_schedules.sql
+++ b/backend/migrations/079_task_schedules.sql
@@ -0,0 +1,49 @@
+-- Migration 079: Task Schedules for Database-Driven Scheduler
+-- Per TASK_WORKFLOW_2024-12-10.md: Replaces node-cron with DB-driven scheduling
+--
+-- 2024-12-10: Created for reliable, multi-replica-safe task scheduling
+
+-- task_schedules: Stores schedule definitions and state
+CREATE TABLE IF NOT EXISTS task_schedules (
+  id SERIAL PRIMARY KEY,
+  name VARCHAR(100) NOT NULL UNIQUE,
+  role VARCHAR(50) NOT NULL,  -- TaskRole: product_refresh, store_discovery, etc.
+  description TEXT,
+
+  -- Schedule configuration
+  enabled BOOLEAN DEFAULT TRUE,
+  interval_hours INTEGER NOT NULL DEFAULT 4,
+  priority INTEGER DEFAULT 0,
+
+  -- Optional scope filters
+  state_code VARCHAR(2),  -- NULL = all states
+  platform VARCHAR(50),   -- NULL = all platforms
+
+  -- Execution state (updated by scheduler)
+  last_run_at TIMESTAMPTZ,
+  next_run_at TIMESTAMPTZ,
+  last_task_count INTEGER DEFAULT 0,
+  last_error TEXT,
+
+  created_at TIMESTAMPTZ DEFAULT NOW(),
+  updated_at TIMESTAMPTZ DEFAULT NOW()
+);
+
+-- Indexes for scheduler queries
+CREATE INDEX IF NOT EXISTS idx_task_schedules_enabled ON task_schedules(enabled) WHERE enabled = TRUE;
+CREATE INDEX IF NOT EXISTS idx_task_schedules_next_run ON task_schedules(next_run_at) WHERE enabled = TRUE;
+
+-- Insert default schedules
+INSERT INTO task_schedules (name, role, interval_hours, priority, description, next_run_at)
+VALUES
+  ('product_refresh_all', 'product_refresh', 4, 0, 'Generate product refresh tasks for all crawl-enabled stores every 4 hours', NOW()),
+  ('store_discovery_dutchie', 'store_discovery', 24, 5, 'Discover new Dutchie stores daily', NOW()),
+  ('analytics_refresh', 'analytics_refresh', 6, 0, 'Refresh analytics materialized views every 6 hours', NOW())
+ON CONFLICT (name) DO NOTHING;
+
+-- Comment for documentation
+COMMENT ON TABLE task_schedules IS 'Database-driven task scheduler configuration. Per TASK_WORKFLOW_2024-12-10.md:
+- Schedules persist in DB (survive restarts)
+- Uses SELECT FOR UPDATE SKIP LOCKED for multi-replica safety
+- Scheduler polls every 60s and executes due schedules
+- Creates tasks in worker_tasks for task-worker.ts to process';
--- a/backend/migrations/080_raw_crawl_payloads.sql
+++ b/backend/migrations/080_raw_crawl_payloads.sql
@@ -0,0 +1,58 @@
+-- Migration 080: Raw Crawl Payloads Metadata Table
+-- Per TASK_WORKFLOW_2024-12-10.md: Store full GraphQL payloads for historical analysis
+--
+-- Design Pattern: Metadata/Payload Separation
+-- - Metadata (this table): Small, indexed, queryable
+-- - Payload (filesystem): Gzipped JSON at storage_path
+--
+-- Benefits:
+-- - Compare any two crawls to see what changed
+-- - Replay/re-normalize historical data if logic changes
+-- - Debug issues by seeing exactly what the API returned
+-- - DB stays small, backups stay fast
+--
+-- Storage location: /storage/payloads/{year}/{month}/{day}/store_{id}_{timestamp}.json.gz
+-- Compression: ~90% reduction (1.5MB -> 150KB per crawl)
+
+CREATE TABLE IF NOT EXISTS raw_crawl_payloads (
+  id SERIAL PRIMARY KEY,
+
+  -- Links to crawl tracking
+  crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
+  dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
+
+  -- File location (gzipped JSON)
+  storage_path TEXT NOT NULL,
+
+  -- Metadata for quick queries without loading file
+  product_count INTEGER NOT NULL DEFAULT 0,
+  size_bytes INTEGER,                    -- Compressed size
+  size_bytes_raw INTEGER,                -- Uncompressed size
+
+  -- Timestamps
+  fetched_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+  created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+
+  -- Optional: checksum for integrity verification
+  checksum_sha256 VARCHAR(64)
+);
+
+-- Indexes for common queries
+CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary
+  ON raw_crawl_payloads(dispensary_id);
+
+CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_dispensary_fetched
+  ON raw_crawl_payloads(dispensary_id, fetched_at DESC);
+
+CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_fetched
+  ON raw_crawl_payloads(fetched_at DESC);
+
+CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_crawl_run
+  ON raw_crawl_payloads(crawl_run_id)
+  WHERE crawl_run_id IS NOT NULL;
+
+-- Comments
+COMMENT ON TABLE raw_crawl_payloads IS 'Metadata for raw GraphQL payloads stored on filesystem. Per TASK_WORKFLOW_2024-12-10.md: Full payloads enable historical diffs and replay.';
+COMMENT ON COLUMN raw_crawl_payloads.storage_path IS 'Path to gzipped JSON file, e.g. /storage/payloads/2024/12/10/store_123_1702234567.json.gz';
+COMMENT ON COLUMN raw_crawl_payloads.size_bytes IS 'Compressed file size in bytes';
+COMMENT ON COLUMN raw_crawl_payloads.size_bytes_raw IS 'Uncompressed payload size in bytes';
--- a/backend/migrations/081_payload_fetch_columns.sql
+++ b/backend/migrations/081_payload_fetch_columns.sql
@@ -0,0 +1,37 @@
+-- Migration 081: Payload Fetch Columns
+-- Per TASK_WORKFLOW_2024-12-10.md: Separates API fetch from data processing
+--
+-- New architecture:
+-- - payload_fetch: Hits Dutchie API, saves raw payload to disk
+-- - product_refresh: Reads local payload, normalizes, upserts to DB
+--
+-- This migration adds:
+-- 1. payload column to worker_tasks (for task chaining data)
+-- 2. processed_at column to raw_crawl_payloads (track when payload was processed)
+-- 3. last_fetch_at column to dispensaries (track when last payload was fetched)
+
+-- Add payload column to worker_tasks for task chaining
+-- Used by payload_fetch to pass payload_id to product_refresh
+ALTER TABLE worker_tasks
+ADD COLUMN IF NOT EXISTS payload JSONB DEFAULT NULL;
+
+COMMENT ON COLUMN worker_tasks.payload IS 'Per TASK_WORKFLOW_2024-12-10.md: Task chaining data (e.g., payload_id from payload_fetch to product_refresh)';
+
+-- Add processed_at to raw_crawl_payloads
+-- Tracks when the payload was processed by product_refresh
+ALTER TABLE raw_crawl_payloads
+ADD COLUMN IF NOT EXISTS processed_at TIMESTAMPTZ DEFAULT NULL;
+
+COMMENT ON COLUMN raw_crawl_payloads.processed_at IS 'When this payload was processed by product_refresh handler';
+
+-- Index for finding unprocessed payloads
+CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_unprocessed
+  ON raw_crawl_payloads(dispensary_id, fetched_at DESC)
+  WHERE processed_at IS NULL;
+
+-- Add last_fetch_at to dispensaries
+-- Tracks when the last payload was fetched (separate from last_crawl_at which is when processing completed)
+ALTER TABLE dispensaries
+ADD COLUMN IF NOT EXISTS last_fetch_at TIMESTAMPTZ DEFAULT NULL;
+
+COMMENT ON COLUMN dispensaries.last_fetch_at IS 'Per TASK_WORKFLOW_2024-12-10.md: When last payload was fetched from API (separate from last_crawl_at which is when processing completed)';
--- a/backend/migrations/082_proxy_notification_trigger.sql
+++ b/backend/migrations/082_proxy_notification_trigger.sql
@@ -0,0 +1,27 @@
+-- Migration: 082_proxy_notification_trigger
+-- Date: 2024-12-11
+-- Description: Add PostgreSQL NOTIFY trigger to alert workers when proxies are added
+
+-- Create function to notify workers when active proxy is added/activated
+CREATE OR REPLACE FUNCTION notify_proxy_added()
+RETURNS TRIGGER AS $$
+BEGIN
+  -- Only notify if proxy is active
+  IF NEW.active = true THEN
+    PERFORM pg_notify('proxy_added', NEW.id::text);
+  END IF;
+  RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Drop existing trigger if any
+DROP TRIGGER IF EXISTS proxy_added_trigger ON proxies;
+
+-- Create trigger on insert and update of active column
+CREATE TRIGGER proxy_added_trigger
+AFTER INSERT OR UPDATE OF active ON proxies
+FOR EACH ROW
+EXECUTE FUNCTION notify_proxy_added();
+
+COMMENT ON FUNCTION notify_proxy_added() IS
+'Sends PostgreSQL NOTIFY to proxy_added channel when an active proxy is added or activated. Workers LISTEN on this channel to wake up immediately.';
--- a/backend/migrations/083_discovery_runs.sql
+++ b/backend/migrations/083_discovery_runs.sql
@@ -0,0 +1,88 @@
+-- Migration 083: Discovery Run Tracking
+-- Tracks progress of store discovery runs step-by-step
+
+-- Main discovery runs table
+CREATE TABLE IF NOT EXISTS discovery_runs (
+  id SERIAL PRIMARY KEY,
+  platform VARCHAR(50) NOT NULL DEFAULT 'dutchie',
+  status VARCHAR(20) NOT NULL DEFAULT 'running', -- running, completed, failed
+  started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+  finished_at TIMESTAMPTZ,
+  task_id INTEGER REFERENCES worker_task_queue(id),
+
+  -- Totals
+  states_total INTEGER DEFAULT 0,
+  states_completed INTEGER DEFAULT 0,
+  locations_discovered INTEGER DEFAULT 0,
+  locations_promoted INTEGER DEFAULT 0,
+  new_store_ids INTEGER[] DEFAULT '{}',
+
+  -- Error info
+  error_message TEXT,
+
+  created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+-- Per-state progress within a run
+CREATE TABLE IF NOT EXISTS discovery_run_states (
+  id SERIAL PRIMARY KEY,
+  run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
+  state_code VARCHAR(2) NOT NULL,
+  status VARCHAR(20) NOT NULL DEFAULT 'pending', -- pending, running, completed, failed
+  started_at TIMESTAMPTZ,
+  finished_at TIMESTAMPTZ,
+
+  -- Results
+  cities_found INTEGER DEFAULT 0,
+  locations_found INTEGER DEFAULT 0,
+  locations_upserted INTEGER DEFAULT 0,
+  new_dispensary_ids INTEGER[] DEFAULT '{}',
+
+  -- Error info
+  error_message TEXT,
+
+  created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+
+  UNIQUE(run_id, state_code)
+);
+
+-- Step-by-step log for detailed progress tracking
+CREATE TABLE IF NOT EXISTS discovery_run_steps (
+  id SERIAL PRIMARY KEY,
+  run_id INTEGER NOT NULL REFERENCES discovery_runs(id) ON DELETE CASCADE,
+  state_code VARCHAR(2),
+  step_name VARCHAR(100) NOT NULL,
+  status VARCHAR(20) NOT NULL DEFAULT 'started', -- started, completed, failed
+  started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+  finished_at TIMESTAMPTZ,
+
+  -- Details (JSON for flexibility)
+  details JSONB DEFAULT '{}',
+
+  created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+-- Indexes for querying
+CREATE INDEX IF NOT EXISTS idx_discovery_runs_status ON discovery_runs(status);
+CREATE INDEX IF NOT EXISTS idx_discovery_runs_platform ON discovery_runs(platform);
+CREATE INDEX IF NOT EXISTS idx_discovery_runs_started_at ON discovery_runs(started_at DESC);
+CREATE INDEX IF NOT EXISTS idx_discovery_run_states_run_id ON discovery_run_states(run_id);
+CREATE INDEX IF NOT EXISTS idx_discovery_run_steps_run_id ON discovery_run_steps(run_id);
+
+-- View for latest run status per platform
+CREATE OR REPLACE VIEW v_latest_discovery_runs AS
+SELECT DISTINCT ON (platform)
+  id,
+  platform,
+  status,
+  started_at,
+  finished_at,
+  states_total,
+  states_completed,
+  locations_discovered,
+  locations_promoted,
+  array_length(new_store_ids, 1) as new_stores_count,
+  error_message,
+  EXTRACT(EPOCH FROM (COALESCE(finished_at, NOW()) - started_at)) as duration_seconds
+FROM discovery_runs
+ORDER BY platform, started_at DESC;
--- a/backend/migrations/084_dual_transport_preflight.sql
+++ b/backend/migrations/084_dual_transport_preflight.sql
@@ -0,0 +1,253 @@
+-- Migration 084: Dual Transport Preflight System
+-- Workers run both curl and http (Puppeteer) preflights on startup
+-- Tasks can require a specific transport method
+
+-- ===================================================================
+-- PART 1: Add preflight columns to worker_registry
+-- ===================================================================
+
+-- Preflight status for curl/axios transport (proxy-based)
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS preflight_curl_status VARCHAR(20) DEFAULT 'pending';
+
+-- Preflight status for http/Puppeteer transport (browser-based)
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS preflight_http_status VARCHAR(20) DEFAULT 'pending';
+
+-- Timestamps for when each preflight completed
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS preflight_curl_at TIMESTAMPTZ;
+
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS preflight_http_at TIMESTAMPTZ;
+
+-- Error messages for failed preflights
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS preflight_curl_error TEXT;
+
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS preflight_http_error TEXT;
+
+-- Response time for successful preflights (ms)
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS preflight_curl_ms INTEGER;
+
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS preflight_http_ms INTEGER;
+
+-- Constraints for preflight status values
+ALTER TABLE worker_registry
+DROP CONSTRAINT IF EXISTS valid_preflight_curl_status;
+
+ALTER TABLE worker_registry
+ADD CONSTRAINT valid_preflight_curl_status
+CHECK (preflight_curl_status IN ('pending', 'passed', 'failed', 'skipped'));
+
+ALTER TABLE worker_registry
+DROP CONSTRAINT IF EXISTS valid_preflight_http_status;
+
+ALTER TABLE worker_registry
+ADD CONSTRAINT valid_preflight_http_status
+CHECK (preflight_http_status IN ('pending', 'passed', 'failed', 'skipped'));
+
+-- ===================================================================
+-- PART 2: Add method column to worker_tasks
+-- ===================================================================
+
+-- Transport method requirement for the task
+-- NULL = no preference (any worker can claim)
+-- 'curl' = requires curl/axios transport (proxy-based, fast)
+-- 'http' = requires http/Puppeteer transport (browser-based, anti-detect)
+ALTER TABLE worker_tasks
+ADD COLUMN IF NOT EXISTS method VARCHAR(10);
+
+-- Constraint for valid method values
+ALTER TABLE worker_tasks
+DROP CONSTRAINT IF EXISTS valid_task_method;
+
+ALTER TABLE worker_tasks
+ADD CONSTRAINT valid_task_method
+CHECK (method IS NULL OR method IN ('curl', 'http'));
+
+-- Index for method-based task claiming
+CREATE INDEX IF NOT EXISTS idx_worker_tasks_method
+  ON worker_tasks(method)
+  WHERE status = 'pending';
+
+-- Set default method for all existing pending tasks to 'http'
+-- ALL current tasks require Puppeteer/browser-based transport
+UPDATE worker_tasks
+SET method = 'http'
+WHERE method IS NULL;
+
+-- ===================================================================
+-- PART 3: Update claim_task function for method compatibility
+-- ===================================================================
+
+CREATE OR REPLACE FUNCTION claim_task(
+  p_role VARCHAR(50),
+  p_worker_id VARCHAR(100),
+  p_curl_passed BOOLEAN DEFAULT TRUE,
+  p_http_passed BOOLEAN DEFAULT FALSE
+) RETURNS worker_tasks AS $$
+DECLARE
+  claimed_task worker_tasks;
+BEGIN
+  UPDATE worker_tasks
+  SET
+    status = 'claimed',
+    worker_id = p_worker_id,
+    claimed_at = NOW(),
+    updated_at = NOW()
+  WHERE id = (
+    SELECT id FROM worker_tasks
+    WHERE role = p_role
+      AND status = 'pending'
+      AND (scheduled_for IS NULL OR scheduled_for <= NOW())
+      -- Method compatibility: worker must have passed the required preflight
+      AND (
+        method IS NULL  -- No preference, any worker can claim
+        OR (method = 'curl' AND p_curl_passed = TRUE)
+        OR (method = 'http' AND p_http_passed = TRUE)
+      )
+      -- Exclude stores that already have an active task
+      AND (dispensary_id IS NULL OR dispensary_id NOT IN (
+        SELECT dispensary_id FROM worker_tasks
+        WHERE status IN ('claimed', 'running')
+        AND dispensary_id IS NOT NULL
+      ))
+    ORDER BY priority DESC, created_at ASC
+    LIMIT 1
+    FOR UPDATE SKIP LOCKED
+  )
+  RETURNING * INTO claimed_task;
+
+  RETURN claimed_task;
+END;
+$$ LANGUAGE plpgsql;
+
+-- ===================================================================
+-- PART 4: Update v_active_workers view
+-- ===================================================================
+
+DROP VIEW IF EXISTS v_active_workers;
+
+CREATE VIEW v_active_workers AS
+SELECT
+  wr.id,
+  wr.worker_id,
+  wr.friendly_name,
+  wr.role,
+  wr.status,
+  wr.pod_name,
+  wr.hostname,
+  wr.started_at,
+  wr.last_heartbeat_at,
+  wr.last_task_at,
+  wr.tasks_completed,
+  wr.tasks_failed,
+  wr.current_task_id,
+  -- Preflight status
+  wr.preflight_curl_status,
+  wr.preflight_http_status,
+  wr.preflight_curl_at,
+  wr.preflight_http_at,
+  wr.preflight_curl_error,
+  wr.preflight_http_error,
+  wr.preflight_curl_ms,
+  wr.preflight_http_ms,
+  -- Computed fields
+  EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
+  CASE
+    WHEN wr.status = 'offline' THEN 'offline'
+    WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
+    WHEN wr.current_task_id IS NOT NULL THEN 'busy'
+    ELSE 'ready'
+  END as health_status,
+  -- Capability flags (can this worker handle curl/http tasks?)
+  (wr.preflight_curl_status = 'passed') as can_curl,
+  (wr.preflight_http_status = 'passed') as can_http
+FROM worker_registry wr
+WHERE wr.status != 'terminated'
+ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
+
+-- ===================================================================
+-- PART 5: View for task queue with method info
+-- ===================================================================
+
+DROP VIEW IF EXISTS v_task_history;
+
+CREATE VIEW v_task_history AS
+SELECT
+  t.id,
+  t.role,
+  t.dispensary_id,
+  d.name as dispensary_name,
+  t.platform,
+  t.status,
+  t.priority,
+  t.method,
+  t.worker_id,
+  t.scheduled_for,
+  t.claimed_at,
+  t.started_at,
+  t.completed_at,
+  t.error_message,
+  t.retry_count,
+  t.created_at,
+  EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
+FROM worker_tasks t
+LEFT JOIN dispensaries d ON d.id = t.dispensary_id
+ORDER BY t.created_at DESC;
+
+-- ===================================================================
+-- PART 6: Helper function to update worker preflight status
+-- ===================================================================
+
+CREATE OR REPLACE FUNCTION update_worker_preflight(
+  p_worker_id VARCHAR(100),
+  p_transport VARCHAR(10),  -- 'curl' or 'http'
+  p_status VARCHAR(20),     -- 'passed', 'failed', 'skipped'
+  p_response_ms INTEGER DEFAULT NULL,
+  p_error TEXT DEFAULT NULL
+) RETURNS VOID AS $$
+BEGIN
+  IF p_transport = 'curl' THEN
+    UPDATE worker_registry
+    SET
+      preflight_curl_status = p_status,
+      preflight_curl_at = NOW(),
+      preflight_curl_ms = p_response_ms,
+      preflight_curl_error = p_error,
+      updated_at = NOW()
+    WHERE worker_id = p_worker_id;
+  ELSIF p_transport = 'http' THEN
+    UPDATE worker_registry
+    SET
+      preflight_http_status = p_status,
+      preflight_http_at = NOW(),
+      preflight_http_ms = p_response_ms,
+      preflight_http_error = p_error,
+      updated_at = NOW()
+    WHERE worker_id = p_worker_id;
+  END IF;
+END;
+$$ LANGUAGE plpgsql;
+
+-- ===================================================================
+-- Comments
+-- ===================================================================
+
+COMMENT ON COLUMN worker_registry.preflight_curl_status IS 'Status of curl/axios preflight: pending, passed, failed, skipped';
+COMMENT ON COLUMN worker_registry.preflight_http_status IS 'Status of http/Puppeteer preflight: pending, passed, failed, skipped';
+COMMENT ON COLUMN worker_registry.preflight_curl_at IS 'When curl preflight completed';
+COMMENT ON COLUMN worker_registry.preflight_http_at IS 'When http preflight completed';
+COMMENT ON COLUMN worker_registry.preflight_curl_error IS 'Error message if curl preflight failed';
+COMMENT ON COLUMN worker_registry.preflight_http_error IS 'Error message if http preflight failed';
+COMMENT ON COLUMN worker_registry.preflight_curl_ms IS 'Response time of successful curl preflight (ms)';
+COMMENT ON COLUMN worker_registry.preflight_http_ms IS 'Response time of successful http preflight (ms)';
+
+COMMENT ON COLUMN worker_tasks.method IS 'Transport method required: NULL=any, curl=proxy-based, http=browser-based';
+
+COMMENT ON FUNCTION claim_task IS 'Atomically claim a task, respecting method requirements and per-store locking';
+COMMENT ON FUNCTION update_worker_preflight IS 'Update a workers preflight status for a given transport';
--- a/backend/migrations/085_preflight_ip_fingerprint.sql
+++ b/backend/migrations/085_preflight_ip_fingerprint.sql
@@ -0,0 +1,168 @@
+-- Migration 085: Add IP and fingerprint columns for preflight reporting
+-- These columns were missing from migration 084
+
+-- ===================================================================
+-- PART 1: Add IP address columns to worker_registry
+-- ===================================================================
+
+-- IP address detected during curl/axios preflight
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS curl_ip VARCHAR(45);
+
+-- IP address detected during http/Puppeteer preflight
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS http_ip VARCHAR(45);
+
+-- ===================================================================
+-- PART 2: Add fingerprint data column
+-- ===================================================================
+
+-- Browser fingerprint data captured during Puppeteer preflight
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS fingerprint_data JSONB;
+
+-- ===================================================================
+-- PART 3: Add combined preflight status/timestamp for convenience
+-- ===================================================================
+
+-- Overall preflight status (computed from both transports)
+-- Values: 'pending', 'passed', 'partial', 'failed'
+--   - 'pending': neither transport tested
+--   - 'passed': both transports passed (or http passed for browser-only)
+--   - 'partial': at least one passed
+--   - 'failed': no transport passed
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS preflight_status VARCHAR(20) DEFAULT 'pending';
+
+-- Most recent preflight completion timestamp
+ALTER TABLE worker_registry
+ADD COLUMN IF NOT EXISTS preflight_at TIMESTAMPTZ;
+
+-- ===================================================================
+-- PART 4: Update function to set preflight status
+-- ===================================================================
+
+CREATE OR REPLACE FUNCTION update_worker_preflight(
+  p_worker_id VARCHAR(100),
+  p_transport VARCHAR(10),  -- 'curl' or 'http'
+  p_status VARCHAR(20),     -- 'passed', 'failed', 'skipped'
+  p_ip VARCHAR(45) DEFAULT NULL,
+  p_response_ms INTEGER DEFAULT NULL,
+  p_error TEXT DEFAULT NULL,
+  p_fingerprint JSONB DEFAULT NULL
+) RETURNS VOID AS $$
+DECLARE
+  v_curl_status VARCHAR(20);
+  v_http_status VARCHAR(20);
+  v_overall_status VARCHAR(20);
+BEGIN
+  IF p_transport = 'curl' THEN
+    UPDATE worker_registry
+    SET
+      preflight_curl_status = p_status,
+      preflight_curl_at = NOW(),
+      preflight_curl_ms = p_response_ms,
+      preflight_curl_error = p_error,
+      curl_ip = p_ip,
+      updated_at = NOW()
+    WHERE worker_id = p_worker_id;
+  ELSIF p_transport = 'http' THEN
+    UPDATE worker_registry
+    SET
+      preflight_http_status = p_status,
+      preflight_http_at = NOW(),
+      preflight_http_ms = p_response_ms,
+      preflight_http_error = p_error,
+      http_ip = p_ip,
+      fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
+      updated_at = NOW()
+    WHERE worker_id = p_worker_id;
+  END IF;
+
+  -- Update overall preflight status
+  SELECT preflight_curl_status, preflight_http_status
+  INTO v_curl_status, v_http_status
+  FROM worker_registry
+  WHERE worker_id = p_worker_id;
+
+  -- Compute overall status
+  IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
+    v_overall_status := 'passed';
+  ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
+    v_overall_status := 'partial';
+  ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
+    v_overall_status := 'failed';
+  ELSE
+    v_overall_status := 'pending';
+  END IF;
+
+  UPDATE worker_registry
+  SET
+    preflight_status = v_overall_status,
+    preflight_at = NOW()
+  WHERE worker_id = p_worker_id;
+END;
+$$ LANGUAGE plpgsql;
+
+-- ===================================================================
+-- PART 5: Update v_active_workers view
+-- ===================================================================
+
+DROP VIEW IF EXISTS v_active_workers;
+
+CREATE VIEW v_active_workers AS
+SELECT
+  wr.id,
+  wr.worker_id,
+  wr.friendly_name,
+  wr.role,
+  wr.status,
+  wr.pod_name,
+  wr.hostname,
+  wr.started_at,
+  wr.last_heartbeat_at,
+  wr.last_task_at,
+  wr.tasks_completed,
+  wr.tasks_failed,
+  wr.current_task_id,
+  -- IP addresses from preflights
+  wr.curl_ip,
+  wr.http_ip,
+  -- Combined preflight status
+  wr.preflight_status,
+  wr.preflight_at,
+  -- Detailed preflight status per transport
+  wr.preflight_curl_status,
+  wr.preflight_http_status,
+  wr.preflight_curl_at,
+  wr.preflight_http_at,
+  wr.preflight_curl_error,
+  wr.preflight_http_error,
+  wr.preflight_curl_ms,
+  wr.preflight_http_ms,
+  -- Fingerprint data
+  wr.fingerprint_data,
+  -- Computed fields
+  EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
+  CASE
+    WHEN wr.status = 'offline' THEN 'offline'
+    WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
+    WHEN wr.current_task_id IS NOT NULL THEN 'busy'
+    ELSE 'ready'
+  END as health_status,
+  -- Capability flags (can this worker handle curl/http tasks?)
+  (wr.preflight_curl_status = 'passed') as can_curl,
+  (wr.preflight_http_status = 'passed') as can_http
+FROM worker_registry wr
+WHERE wr.status != 'terminated'
+ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
+
+-- ===================================================================
+-- Comments
+-- ===================================================================
+
+COMMENT ON COLUMN worker_registry.curl_ip IS 'IP address detected during curl/axios preflight';
+COMMENT ON COLUMN worker_registry.http_ip IS 'IP address detected during Puppeteer preflight';
+COMMENT ON COLUMN worker_registry.fingerprint_data IS 'Browser fingerprint captured during Puppeteer preflight';
+COMMENT ON COLUMN worker_registry.preflight_status IS 'Overall preflight status: pending, passed, partial, failed';
+COMMENT ON COLUMN worker_registry.preflight_at IS 'Most recent preflight completion timestamp';
--- a/backend/node_modules/.package-lock.json
+++ b/backend/node_modules/.package-lock.json
@@ -1,6 +1,6 @@
 {
  "name": "dutchie-menus-backend",
-  "version": "1.5.1",
+  "version": "1.6.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
@@ -46,6 +46,97 @@
      "resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
      "integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
    },
+    "node_modules/@jsep-plugin/assignment": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
+      "integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
+      "engines": {
+        "node": ">= 10.16.0"
+      },
+      "peerDependencies": {
+        "jsep": "^0.4.0||^1.0.0"
+      }
+    },
+    "node_modules/@jsep-plugin/regex": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
+      "integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
+      "engines": {
+        "node": ">= 10.16.0"
+      },
+      "peerDependencies": {
+        "jsep": "^0.4.0||^1.0.0"
+      }
+    },
+    "node_modules/@kubernetes/client-node": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
+      "integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
+      "dependencies": {
+        "@types/js-yaml": "^4.0.1",
+        "@types/node": "^24.0.0",
+        "@types/node-fetch": "^2.6.13",
+        "@types/stream-buffers": "^3.0.3",
+        "form-data": "^4.0.0",
+        "hpagent": "^1.2.0",
+        "isomorphic-ws": "^5.0.0",
+        "js-yaml": "^4.1.0",
+        "jsonpath-plus": "^10.3.0",
+        "node-fetch": "^2.7.0",
+        "openid-client": "^6.1.3",
+        "rfc4648": "^1.3.0",
+        "socks-proxy-agent": "^8.0.4",
+        "stream-buffers": "^3.0.2",
+        "tar-fs": "^3.0.9",
+        "ws": "^8.18.2"
+      }
+    },
+    "node_modules/@kubernetes/client-node/node_modules/@types/node": {
+      "version": "24.10.3",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
+      "integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
+      "dependencies": {
+        "undici-types": "~7.16.0"
+      }
+    },
+    "node_modules/@kubernetes/client-node/node_modules/tar-fs": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
+      "integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
+      "dependencies": {
+        "pump": "^3.0.0",
+        "tar-stream": "^3.1.5"
+      },
+      "optionalDependencies": {
+        "bare-fs": "^4.0.1",
+        "bare-path": "^3.0.0"
+      }
+    },
+    "node_modules/@kubernetes/client-node/node_modules/undici-types": {
+      "version": "7.16.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
+      "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
+    },
+    "node_modules/@kubernetes/client-node/node_modules/ws": {
+      "version": "8.18.3",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
+      "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
+      "engines": {
+        "node": ">=10.0.0"
+      },
+      "peerDependencies": {
+        "bufferutil": "^4.0.1",
+        "utf-8-validate": ">=5.0.2"
+      },
+      "peerDependenciesMeta": {
+        "bufferutil": {
+          "optional": true
+        },
+        "utf-8-validate": {
+          "optional": true
+        }
+      }
+    },
    "node_modules/@mapbox/node-pre-gyp": {
      "version": "1.0.11",
      "resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
@@ -251,6 +342,11 @@
      "integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
      "dev": true
    },
+    "node_modules/@types/js-yaml": {
+      "version": "4.0.9",
+      "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
+      "integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
+    },
    "node_modules/@types/jsonwebtoken": {
      "version": "9.0.10",
      "resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
@@ -276,7 +372,6 @@
      "version": "20.19.25",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
      "integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
-      "devOptional": true,
      "dependencies": {
        "undici-types": "~6.21.0"
      }
@@ -287,6 +382,15 @@
      "integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
      "dev": true
    },
+    "node_modules/@types/node-fetch": {
+      "version": "2.6.13",
+      "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
+      "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
+      "dependencies": {
+        "@types/node": "*",
+        "form-data": "^4.0.4"
+      }
+    },
    "node_modules/@types/pg": {
      "version": "8.15.6",
      "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
@@ -340,6 +444,14 @@
        "@types/node": "*"
      }
    },
+    "node_modules/@types/stream-buffers": {
+      "version": "3.0.8",
+      "resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
+      "integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
    "node_modules/@types/uuid": {
      "version": "9.0.8",
      "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
@@ -520,6 +632,78 @@
        }
      }
    },
+    "node_modules/bare-fs": {
+      "version": "4.5.2",
+      "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
+      "integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
+      "optional": true,
+      "dependencies": {
+        "bare-events": "^2.5.4",
+        "bare-path": "^3.0.0",
+        "bare-stream": "^2.6.4",
+        "bare-url": "^2.2.2",
+        "fast-fifo": "^1.3.2"
+      },
+      "engines": {
+        "bare": ">=1.16.0"
+      },
+      "peerDependencies": {
+        "bare-buffer": "*"
+      },
+      "peerDependenciesMeta": {
+        "bare-buffer": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/bare-os": {
+      "version": "3.6.2",
+      "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
+      "integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
+      "optional": true,
+      "engines": {
+        "bare": ">=1.14.0"
+      }
+    },
+    "node_modules/bare-path": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
+      "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
+      "optional": true,
+      "dependencies": {
+        "bare-os": "^3.0.1"
+      }
+    },
+    "node_modules/bare-stream": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
+      "integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
+      "optional": true,
+      "dependencies": {
+        "streamx": "^2.21.0"
+      },
+      "peerDependencies": {
+        "bare-buffer": "*",
+        "bare-events": "*"
+      },
+      "peerDependenciesMeta": {
+        "bare-buffer": {
+          "optional": true
+        },
+        "bare-events": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/bare-url": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
+      "integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
+      "optional": true,
+      "dependencies": {
+        "bare-path": "^3.0.0"
+      }
+    },
    "node_modules/base64-js": {
      "version": "1.5.1",
      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
@@ -1026,6 +1210,17 @@
        "url": "https://github.com/sponsors/fb55"
      }
    },
+    "node_modules/csv-parser": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
+      "integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
+      "bin": {
+        "csv-parser": "bin/csv-parser"
+      },
+      "engines": {
+        "node": ">= 10"
+      }
+    },
    "node_modules/data-uri-to-buffer": {
      "version": "6.0.2",
      "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
@@ -2008,6 +2203,14 @@
        "node": ">=16.0.0"
      }
    },
+    "node_modules/hpagent": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
+      "integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
+      "engines": {
+        "node": ">=14"
+      }
+    },
    "node_modules/htmlparser2": {
      "version": "10.0.0",
      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
@@ -2235,6 +2438,14 @@
        "node": ">= 12"
      }
    },
+    "node_modules/ip2location-nodejs": {
+      "version": "9.7.0",
+      "resolved": "https://registry.npmjs.org/ip2location-nodejs/-/ip2location-nodejs-9.7.0.tgz",
+      "integrity": "sha512-eQ4T5TXm1cx0+pQcRycPiuaiRuoDEMd9O89Be7Ugk555qi9UY9enXSznkkqr3kQRyUaXx7zj5dORC5LGTPOttA==",
+      "dependencies": {
+        "csv-parser": "^3.0.0"
+      }
+    },
    "node_modules/ipaddr.js": {
      "version": "2.2.0",
      "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
@@ -2363,6 +2574,22 @@
        "node": ">=0.10.0"
      }
    },
+    "node_modules/isomorphic-ws": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
+      "integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
+      "peerDependencies": {
+        "ws": "*"
+      }
+    },
+    "node_modules/jose": {
+      "version": "6.1.3",
+      "resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
+      "integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
+      "funding": {
+        "url": "https://github.com/sponsors/panva"
+      }
+    },
    "node_modules/js-tokens": {
      "version": "4.0.0",
      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
@@ -2379,6 +2606,14 @@
        "js-yaml": "bin/js-yaml.js"
      }
    },
+    "node_modules/jsep": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
+      "integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
+      "engines": {
+        "node": ">= 10.16.0"
+      }
+    },
    "node_modules/json-parse-even-better-errors": {
      "version": "2.3.1",
      "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
@@ -2400,6 +2635,23 @@
        "graceful-fs": "^4.1.6"
      }
    },
+    "node_modules/jsonpath-plus": {
+      "version": "10.3.0",
+      "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
+      "integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
+      "dependencies": {
+        "@jsep-plugin/assignment": "^1.3.0",
+        "@jsep-plugin/regex": "^1.0.4",
+        "jsep": "^1.4.0"
+      },
+      "bin": {
+        "jsonpath": "bin/jsonpath-cli.js",
+        "jsonpath-plus": "bin/jsonpath-cli.js"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
    "node_modules/jsonwebtoken": {
      "version": "9.0.2",
      "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
@@ -2474,6 +2726,11 @@
      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
      "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
    },
+    "node_modules/lodash.clonedeep": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
+      "integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
+    },
    "node_modules/lodash.defaults": {
      "version": "4.2.0",
      "resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
@@ -2923,6 +3180,14 @@
        "url": "https://github.com/fb55/nth-check?sponsor=1"
      }
    },
+    "node_modules/oauth4webapi": {
+      "version": "3.8.3",
+      "resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
+      "integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
+      "funding": {
+        "url": "https://github.com/sponsors/panva"
+      }
+    },
    "node_modules/object-assign": {
      "version": "4.1.1",
      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
@@ -2961,6 +3226,18 @@
        "wrappy": "1"
      }
    },
+    "node_modules/openid-client": {
+      "version": "6.8.1",
+      "resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
+      "integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
+      "dependencies": {
+        "jose": "^6.1.0",
+        "oauth4webapi": "^3.8.2"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/panva"
+      }
+    },
    "node_modules/pac-proxy-agent": {
      "version": "7.2.0",
      "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
@@ -3864,6 +4141,11 @@
        "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
      }
    },
+    "node_modules/rfc4648": {
+      "version": "1.5.4",
+      "resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
+      "integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
+    },
    "node_modules/rimraf": {
      "version": "3.0.2",
      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
@@ -4294,6 +4576,14 @@
        "node": ">= 0.8"
      }
    },
+    "node_modules/stream-buffers": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
+      "integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
+      "engines": {
+        "node": ">= 0.10.0"
+      }
+    },
    "node_modules/streamx": {
      "version": "2.23.0",
      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
@@ -4513,8 +4803,7 @@
    "node_modules/undici-types": {
      "version": "6.21.0",
      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
-      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
-      "devOptional": true
+      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
    },
    "node_modules/universalify": {
      "version": "2.0.1",
@@ -4537,6 +4826,14 @@
      "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
      "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
    },
+    "node_modules/user-agents": {
+      "version": "1.1.669",
+      "resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
+      "integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
+      "dependencies": {
+        "lodash.clonedeep": "^4.5.0"
+      }
+    },
    "node_modules/util": {
      "version": "0.12.5",
      "resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
--- a/backend/package-lock.json
+++ b/backend/package-lock.json
@@ -1,13 +1,14 @@
 {
  "name": "dutchie-menus-backend",
-  "version": "1.5.1",
+  "version": "1.6.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "dutchie-menus-backend",
-      "version": "1.5.1",
+      "version": "1.6.0",
      "dependencies": {
+        "@kubernetes/client-node": "^1.4.0",
        "@types/bcryptjs": "^3.0.0",
        "axios": "^1.6.2",
        "bcrypt": "^5.1.1",
@@ -21,6 +22,7 @@
        "helmet": "^7.1.0",
        "https-proxy-agent": "^7.0.2",
        "ioredis": "^5.8.2",
+        "ip2location-nodejs": "^9.7.0",
        "ipaddr.js": "^2.2.0",
        "jsonwebtoken": "^9.0.2",
        "minio": "^7.1.3",
@@ -33,6 +35,7 @@
        "puppeteer-extra-plugin-stealth": "^2.11.2",
        "sharp": "^0.32.0",
        "socks-proxy-agent": "^8.0.2",
+        "user-agents": "^1.1.669",
        "uuid": "^9.0.1",
        "zod": "^3.22.4"
      },
@@ -491,6 +494,97 @@
      "resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
      "integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
    },
+    "node_modules/@jsep-plugin/assignment": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
+      "integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
+      "engines": {
+        "node": ">= 10.16.0"
+      },
+      "peerDependencies": {
+        "jsep": "^0.4.0||^1.0.0"
+      }
+    },
+    "node_modules/@jsep-plugin/regex": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
+      "integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
+      "engines": {
+        "node": ">= 10.16.0"
+      },
+      "peerDependencies": {
+        "jsep": "^0.4.0||^1.0.0"
+      }
+    },
+    "node_modules/@kubernetes/client-node": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
+      "integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
+      "dependencies": {
+        "@types/js-yaml": "^4.0.1",
+        "@types/node": "^24.0.0",
+        "@types/node-fetch": "^2.6.13",
+        "@types/stream-buffers": "^3.0.3",
+        "form-data": "^4.0.0",
+        "hpagent": "^1.2.0",
+        "isomorphic-ws": "^5.0.0",
+        "js-yaml": "^4.1.0",
+        "jsonpath-plus": "^10.3.0",
+        "node-fetch": "^2.7.0",
+        "openid-client": "^6.1.3",
+        "rfc4648": "^1.3.0",
+        "socks-proxy-agent": "^8.0.4",
+        "stream-buffers": "^3.0.2",
+        "tar-fs": "^3.0.9",
+        "ws": "^8.18.2"
+      }
+    },
+    "node_modules/@kubernetes/client-node/node_modules/@types/node": {
+      "version": "24.10.3",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
+      "integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
+      "dependencies": {
+        "undici-types": "~7.16.0"
+      }
+    },
+    "node_modules/@kubernetes/client-node/node_modules/tar-fs": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
+      "integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
+      "dependencies": {
+        "pump": "^3.0.0",
+        "tar-stream": "^3.1.5"
+      },
+      "optionalDependencies": {
+        "bare-fs": "^4.0.1",
+        "bare-path": "^3.0.0"
+      }
+    },
+    "node_modules/@kubernetes/client-node/node_modules/undici-types": {
+      "version": "7.16.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
+      "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
+    },
+    "node_modules/@kubernetes/client-node/node_modules/ws": {
+      "version": "8.18.3",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
+      "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
+      "engines": {
+        "node": ">=10.0.0"
+      },
+      "peerDependencies": {
+        "bufferutil": "^4.0.1",
+        "utf-8-validate": ">=5.0.2"
+      },
+      "peerDependenciesMeta": {
+        "bufferutil": {
+          "optional": true
+        },
+        "utf-8-validate": {
+          "optional": true
+        }
+      }
+    },
    "node_modules/@mapbox/node-pre-gyp": {
      "version": "1.0.11",
      "resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
@@ -756,6 +850,11 @@
      "integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
      "dev": true
    },
+    "node_modules/@types/js-yaml": {
+      "version": "4.0.9",
+      "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
+      "integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
+    },
    "node_modules/@types/jsonwebtoken": {
      "version": "9.0.10",
      "resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
@@ -781,7 +880,6 @@
      "version": "20.19.25",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
      "integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
-      "devOptional": true,
      "dependencies": {
        "undici-types": "~6.21.0"
      }
@@ -792,6 +890,15 @@
      "integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
      "dev": true
    },
+    "node_modules/@types/node-fetch": {
+      "version": "2.6.13",
+      "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
+      "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
+      "dependencies": {
+        "@types/node": "*",
+        "form-data": "^4.0.4"
+      }
+    },
    "node_modules/@types/pg": {
      "version": "8.15.6",
      "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
@@ -845,6 +952,14 @@
        "@types/node": "*"
      }
    },
+    "node_modules/@types/stream-buffers": {
+      "version": "3.0.8",
+      "resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
+      "integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
+      "dependencies": {
+        "@types/node": "*"
+      }
+    },
    "node_modules/@types/uuid": {
      "version": "9.0.8",
      "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
@@ -1025,6 +1140,78 @@
        }
      }
    },
+    "node_modules/bare-fs": {
+      "version": "4.5.2",
+      "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
+      "integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
+      "optional": true,
+      "dependencies": {
+        "bare-events": "^2.5.4",
+        "bare-path": "^3.0.0",
+        "bare-stream": "^2.6.4",
+        "bare-url": "^2.2.2",
+        "fast-fifo": "^1.3.2"
+      },
+      "engines": {
+        "bare": ">=1.16.0"
+      },
+      "peerDependencies": {
+        "bare-buffer": "*"
+      },
+      "peerDependenciesMeta": {
+        "bare-buffer": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/bare-os": {
+      "version": "3.6.2",
+      "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
+      "integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
+      "optional": true,
+      "engines": {
+        "bare": ">=1.14.0"
+      }
+    },
+    "node_modules/bare-path": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
+      "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
+      "optional": true,
+      "dependencies": {
+        "bare-os": "^3.0.1"
+      }
+    },
+    "node_modules/bare-stream": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
+      "integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
+      "optional": true,
+      "dependencies": {
+        "streamx": "^2.21.0"
+      },
+      "peerDependencies": {
+        "bare-buffer": "*",
+        "bare-events": "*"
+      },
+      "peerDependenciesMeta": {
+        "bare-buffer": {
+          "optional": true
+        },
+        "bare-events": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/bare-url": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
+      "integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
+      "optional": true,
+      "dependencies": {
+        "bare-path": "^3.0.0"
+      }
+    },
    "node_modules/base64-js": {
      "version": "1.5.1",
      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
@@ -1531,6 +1718,17 @@
        "url": "https://github.com/sponsors/fb55"
      }
    },
+    "node_modules/csv-parser": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
+      "integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
+      "bin": {
+        "csv-parser": "bin/csv-parser"
+      },
+      "engines": {
+        "node": ">= 10"
+      }
+    },
    "node_modules/data-uri-to-buffer": {
      "version": "6.0.2",
      "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
@@ -2527,6 +2725,14 @@
        "node": ">=16.0.0"
      }
    },
+    "node_modules/hpagent": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
+      "integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
+      "engines": {
+        "node": ">=14"
+      }
+    },
    "node_modules/htmlparser2": {
      "version": "10.0.0",
      "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
@@ -2754,6 +2960,14 @@
        "node": ">= 12"
      }
    },
+    "node_modules/ip2location-nodejs": {
+      "version": "9.7.0",
+      "resolved": "https://registry.npmjs.org/ip2location-nodejs/-/ip2location-nodejs-9.7.0.tgz",
+      "integrity": "sha512-eQ4T5TXm1cx0+pQcRycPiuaiRuoDEMd9O89Be7Ugk555qi9UY9enXSznkkqr3kQRyUaXx7zj5dORC5LGTPOttA==",
+      "dependencies": {
+        "csv-parser": "^3.0.0"
+      }
+    },
    "node_modules/ipaddr.js": {
      "version": "2.2.0",
      "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
@@ -2882,6 +3096,22 @@
        "node": ">=0.10.0"
      }
    },
+    "node_modules/isomorphic-ws": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
+      "integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
+      "peerDependencies": {
+        "ws": "*"
+      }
+    },
+    "node_modules/jose": {
+      "version": "6.1.3",
+      "resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
+      "integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
+      "funding": {
+        "url": "https://github.com/sponsors/panva"
+      }
+    },
    "node_modules/js-tokens": {
      "version": "4.0.0",
      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
@@ -2898,6 +3128,14 @@
        "js-yaml": "bin/js-yaml.js"
      }
    },
+    "node_modules/jsep": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
+      "integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
+      "engines": {
+        "node": ">= 10.16.0"
+      }
+    },
    "node_modules/json-parse-even-better-errors": {
      "version": "2.3.1",
      "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
@@ -2919,6 +3157,23 @@
        "graceful-fs": "^4.1.6"
      }
    },
+    "node_modules/jsonpath-plus": {
+      "version": "10.3.0",
+      "resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
+      "integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
+      "dependencies": {
+        "@jsep-plugin/assignment": "^1.3.0",
+        "@jsep-plugin/regex": "^1.0.4",
+        "jsep": "^1.4.0"
+      },
+      "bin": {
+        "jsonpath": "bin/jsonpath-cli.js",
+        "jsonpath-plus": "bin/jsonpath-cli.js"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
    "node_modules/jsonwebtoken": {
      "version": "9.0.2",
      "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
@@ -2993,6 +3248,11 @@
      "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
      "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
    },
+    "node_modules/lodash.clonedeep": {
+      "version": "4.5.0",
+      "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
+      "integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
+    },
    "node_modules/lodash.defaults": {
      "version": "4.2.0",
      "resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
@@ -3442,6 +3702,14 @@
        "url": "https://github.com/fb55/nth-check?sponsor=1"
      }
    },
+    "node_modules/oauth4webapi": {
+      "version": "3.8.3",
+      "resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
+      "integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
+      "funding": {
+        "url": "https://github.com/sponsors/panva"
+      }
+    },
    "node_modules/object-assign": {
      "version": "4.1.1",
      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
@@ -3480,6 +3748,18 @@
        "wrappy": "1"
      }
    },
+    "node_modules/openid-client": {
+      "version": "6.8.1",
+      "resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
+      "integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
+      "dependencies": {
+        "jose": "^6.1.0",
+        "oauth4webapi": "^3.8.2"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/panva"
+      }
+    },
    "node_modules/pac-proxy-agent": {
      "version": "7.2.0",
      "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
@@ -4396,6 +4676,11 @@
        "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
      }
    },
+    "node_modules/rfc4648": {
+      "version": "1.5.4",
+      "resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
+      "integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
+    },
    "node_modules/rimraf": {
      "version": "3.0.2",
      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
@@ -4826,6 +5111,14 @@
        "node": ">= 0.8"
      }
    },
+    "node_modules/stream-buffers": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
+      "integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
+      "engines": {
+        "node": ">= 0.10.0"
+      }
+    },
    "node_modules/streamx": {
      "version": "2.23.0",
      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
@@ -5045,8 +5338,7 @@
    "node_modules/undici-types": {
      "version": "6.21.0",
      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
-      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
-      "devOptional": true
+      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
    },
    "node_modules/universalify": {
      "version": "2.0.1",
@@ -5069,6 +5361,14 @@
      "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
      "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
    },
+    "node_modules/user-agents": {
+      "version": "1.1.669",
+      "resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
+      "integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
+      "dependencies": {
+        "lodash.clonedeep": "^4.5.0"
+      }
+    },
    "node_modules/util": {
      "version": "0.12.5",
      "resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
--- a/backend/package.json
+++ b/backend/package.json
@@ -1,6 +1,6 @@
 {
  "name": "dutchie-menus-backend",
-  "version": "1.5.1",
+  "version": "1.6.0",
  "description": "Backend API for Dutchie Menus scraper and management",
  "main": "dist/index.js",
  "scripts": {
@@ -22,6 +22,7 @@
    "seed:dt:cities:bulk": "tsx src/scripts/seed-dt-cities-bulk.ts"
  },
  "dependencies": {
+    "@kubernetes/client-node": "^1.4.0",
    "@types/bcryptjs": "^3.0.0",
    "axios": "^1.6.2",
    "bcrypt": "^5.1.1",
@@ -35,6 +36,7 @@
    "helmet": "^7.1.0",
    "https-proxy-agent": "^7.0.2",
    "ioredis": "^5.8.2",
+    "ip2location-nodejs": "^9.7.0",
    "ipaddr.js": "^2.2.0",
    "jsonwebtoken": "^9.0.2",
    "minio": "^7.1.3",
@@ -47,6 +49,7 @@
    "puppeteer-extra-plugin-stealth": "^2.11.2",
    "sharp": "^0.32.0",
    "socks-proxy-agent": "^8.0.2",
+    "user-agents": "^1.1.669",
    "uuid": "^9.0.1",
    "zod": "^3.22.4"
  },
--- a/backend/public/downloads/cannaiq-menus-1.5.4.zip
+++ b/backend/public/downloads/cannaiq-menus-1.5.4.zip
--- a/backend/public/downloads/cannaiq-menus-1.6.0.zip
+++ b/backend/public/downloads/cannaiq-menus-1.6.0.zip
--- a/backend/public/downloads/cannaiq-menus-latest.zip
+++ b/backend/public/downloads/cannaiq-menus-latest.zip
@@ -0,0 +1 @@
+cannaiq-menus-1.6.0.zip
--- a/backend/scripts/download-ip2location.sh
+++ b/backend/scripts/download-ip2location.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# Download IP2Location LITE DB3 (City-level) database
+# Free for commercial use with attribution
+# https://lite.ip2location.com/database/db3-ip-country-region-city
+
+set -e
+
+DATA_DIR="${1:-./data/ip2location}"
+DB_FILE="IP2LOCATION-LITE-DB3.BIN"
+
+mkdir -p "$DATA_DIR"
+cd "$DATA_DIR"
+
+echo "Downloading IP2Location LITE DB3 database..."
+
+# IP2Location LITE DB3 - includes city, region, country, lat/lng
+# You need to register at https://lite.ip2location.com/ to get a download token
+# Then set IP2LOCATION_TOKEN environment variable
+
+if [ -z "$IP2LOCATION_TOKEN" ]; then
+  echo ""
+  echo "ERROR: IP2LOCATION_TOKEN not set"
+  echo ""
+  echo "To download the database:"
+  echo "1. Register free at https://lite.ip2location.com/"
+  echo "2. Get your download token from the dashboard"
+  echo "3. Run: IP2LOCATION_TOKEN=your_token ./scripts/download-ip2location.sh"
+  echo ""
+  exit 1
+fi
+
+# Download DB3.LITE (IPv4 + City)
+DOWNLOAD_URL="https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB3LITEBIN"
+
+echo "Downloading from IP2Location..."
+curl -L -o ip2location.zip "$DOWNLOAD_URL"
+
+echo "Extracting..."
+unzip -o ip2location.zip
+
+# Rename to standard name
+if [ -f "IP2LOCATION-LITE-DB3.BIN" ]; then
+  echo "Database ready: $DATA_DIR/IP2LOCATION-LITE-DB3.BIN"
+elif [ -f "IP-COUNTRY-REGION-CITY.BIN" ]; then
+  mv "IP-COUNTRY-REGION-CITY.BIN" "$DB_FILE"
+  echo "Database ready: $DATA_DIR/$DB_FILE"
+else
+  # Find whatever BIN file was extracted
+  BIN_FILE=$(ls *.BIN 2>/dev/null | head -1)
+  if [ -n "$BIN_FILE" ]; then
+    mv "$BIN_FILE" "$DB_FILE"
+    echo "Database ready: $DATA_DIR/$DB_FILE"
+  else
+    echo "ERROR: No BIN file found in archive"
+    ls -la
+    exit 1
+  fi
+fi
+
+# Cleanup
+rm -f ip2location.zip *.txt LICENSE* README*
+
+echo ""
+echo "Done! Database saved to: $DATA_DIR/$DB_FILE"
+echo "Update monthly by re-running this script."
--- a/backend/src/_deprecated/DONT_USE.md
+++ b/backend/src/_deprecated/DONT_USE.md
@@ -0,0 +1,46 @@
+# DEPRECATED CODE - DO NOT USE
+
+**These directories contain OLD, ABANDONED code.**
+
+## What's Here
+
+| Directory | What It Was | Why Deprecated |
+|-----------|-------------|----------------|
+| `hydration/` | Old pipeline for processing crawl data | Replaced by `src/tasks/handlers/` |
+| `scraper-v2/` | Old Puppeteer-based scraper engine | Replaced by curl-based `src/platforms/dutchie/client.ts` |
+| `canonical-hydration/` | Intermediate step toward canonical schema | Merged into task handlers |
+
+## What to Use Instead
+
+| Old (DONT USE) | New (USE THIS) |
+|----------------|----------------|
+| `hydration/normalizers/dutchie.ts` | `src/tasks/handlers/product-refresh.ts` |
+| `hydration/producer.ts` | `src/tasks/handlers/payload-fetch.ts` |
+| `scraper-v2/engine.ts` | `src/platforms/dutchie/client.ts` |
+| `scraper-v2/scheduler.ts` | `src/services/task-scheduler.ts` |
+
+## Why Keep This Code?
+
+- Historical reference only
+- Some patterns may be useful for debugging
+- Will be deleted once confirmed not needed
+
+## Claude Instructions
+
+**IF YOU ARE CLAUDE:**
+
+1. NEVER import from `src/_deprecated/`
+2. NEVER reference these files as examples
+3. NEVER try to "fix" or "update" code in here
+4. If you see imports from these directories, suggest replacing them
+
+**Correct imports:**
+```typescript
+// GOOD
+import { executeGraphQL } from '../platforms/dutchie/client';
+import { pool } from '../db/pool';
+
+// BAD - DO NOT USE
+import { something } from '../_deprecated/hydration/...';
+import { something } from '../_deprecated/scraper-v2/...';
+```
--- a/backend/src/_deprecated/canonical-hydration/RUNBOOK.md
+++ b/backend/src/_deprecated/canonical-hydration/RUNBOOK.md
--- a/backend/src/_deprecated/canonical-hydration/cli/backfill.ts
+++ b/backend/src/_deprecated/canonical-hydration/cli/backfill.ts
--- a/backend/src/_deprecated/canonical-hydration/cli/incremental.ts
+++ b/backend/src/_deprecated/canonical-hydration/cli/incremental.ts
--- a/backend/src/_deprecated/canonical-hydration/cli/products-only.ts
+++ b/backend/src/_deprecated/canonical-hydration/cli/products-only.ts
--- a/backend/src/_deprecated/canonical-hydration/crawl-run-recorder.ts
+++ b/backend/src/_deprecated/canonical-hydration/crawl-run-recorder.ts
--- a/backend/src/_deprecated/canonical-hydration/hydration-service.ts
+++ b/backend/src/_deprecated/canonical-hydration/hydration-service.ts
--- a/backend/src/_deprecated/canonical-hydration/index.ts
+++ b/backend/src/_deprecated/canonical-hydration/index.ts
--- a/backend/src/_deprecated/canonical-hydration/snapshot-writer.ts
+++ b/backend/src/_deprecated/canonical-hydration/snapshot-writer.ts
--- a/backend/src/_deprecated/canonical-hydration/store-product-normalizer.ts
+++ b/backend/src/_deprecated/canonical-hydration/store-product-normalizer.ts
--- a/backend/src/_deprecated/canonical-hydration/types.ts
+++ b/backend/src/_deprecated/canonical-hydration/types.ts
--- a/backend/src/_deprecated/routes/crawler-sandbox.ts
+++ b/backend/src/_deprecated/routes/crawler-sandbox.ts
--- a/backend/src/_deprecated/scraper-v2/README.md
+++ b/backend/src/_deprecated/scraper-v2/README.md
--- a/backend/src/_deprecated/scraper-v2/canonical-pipeline.ts
+++ b/backend/src/_deprecated/scraper-v2/canonical-pipeline.ts
@@ -0,0 +1,353 @@
+/**
+ * Canonical Database Pipeline
+ *
+ * Writes scraped products to the canonical tables:
+ * - store_products (current state)
+ * - store_product_snapshots (historical)
+ * - product_variants (per-weight pricing)
+ * - product_variant_snapshots (variant history)
+ *
+ * This replaces the legacy DatabasePipeline that wrote to `products` table.
+ */
+
+import { ItemPipeline, Product } from './types';
+import { logger } from '../services/logger';
+import { pool } from '../db/pool';
+import { v4 as uuidv4 } from 'uuid';
+
+interface VariantData {
+  option: string;
+  priceRec: number | null;
+  priceMed: number | null;
+  priceRecSpecial: number | null;
+  priceMedSpecial: number | null;
+  quantity: number | null;
+  inStock: boolean;
+  isOnSpecial: boolean;
+}
+
+/**
+ * Parse weight string like "1g", "3.5g", "1/8oz" into value and unit
+ */
+function parseWeight(option: string): { value: number | null; unit: string | null } {
+  if (!option) return { value: null, unit: null };
+
+  // Match patterns like "1g", "3.5g", "1/8oz", "100mg"
+  const match = option.match(/^([\d.\/]+)\s*(g|oz|mg|ml|each|pk|ct)?$/i);
+  if (!match) return { value: null, unit: null };
+
+  let value: number | null = null;
+  const rawValue = match[1];
+  const unit = match[2]?.toLowerCase() || null;
+
+  // Handle fractions like "1/8"
+  if (rawValue.includes('/')) {
+    const [num, denom] = rawValue.split('/');
+    value = parseFloat(num) / parseFloat(denom);
+  } else {
+    value = parseFloat(rawValue);
+  }
+
+  if (isNaN(value)) value = null;
+
+  return { value, unit };
+}
+
+/**
+ * Canonical Database Pipeline - saves items to canonical tables
+ *
+ * TABLES:
+ * - store_products: Current product state per store
+ * - store_product_snapshots: Historical snapshot per crawl
+ * - product_variants: Current variant state (per-weight pricing)
+ * - product_variant_snapshots: Historical variant snapshots
+ */
+export class CanonicalDatabasePipeline implements ItemPipeline<Product> {
+  name = 'CanonicalDatabasePipeline';
+  priority = 10; // Low priority - runs last
+
+  private crawlRunId: number | null = null;
+
+  setCrawlRunId(id: number): void {
+    this.crawlRunId = id;
+  }
+
+  async process(item: Product, spider: string): Promise<Product | null> {
+    const client = await pool.connect();
+
+    try {
+      // Extract metadata set by spider
+      const dispensaryId = (item as any).dispensaryId;
+      const categoryName = (item as any).categoryName;
+      const variants: VariantData[] = (item as any).variants || [];
+
+      if (!dispensaryId) {
+        logger.error('pipeline', `Missing dispensaryId for ${item.name}`);
+        return null;
+      }
+
+      const externalProductId = item.dutchieProductId || null;
+      const provider = 'dutchie';
+
+      // Determine stock status
+      const isInStock = (item as any).inStock !== false;
+      const stockQuantity = (item as any).stockQuantity || null;
+
+      // Extract pricing
+      const priceRec = item.price || null;
+      const priceMed = (item as any).priceMed || null;
+
+      let storeProductId: number | null = null;
+      let isNewProduct = false;
+
+      // ============================================================
+      // UPSERT store_products
+      // ============================================================
+
+      const upsertResult = await client.query(`
+        INSERT INTO store_products (
+          dispensary_id, provider, provider_product_id,
+          name_raw, brand_name_raw, category_raw,
+          price_rec, price_med,
+          thc_percent, cbd_percent,
+          is_in_stock, stock_quantity,
+          image_url, source_url,
+          raw_data,
+          first_seen_at, last_seen_at,
+          created_at, updated_at
+        ) VALUES (
+          $1, $2, $3,
+          $4, $5, $6,
+          $7, $8,
+          $9, $10,
+          $11, $12,
+          $13, $14,
+          $15,
+          NOW(), NOW(),
+          NOW(), NOW()
+        )
+        ON CONFLICT (dispensary_id, provider, provider_product_id)
+        DO UPDATE SET
+          name_raw = EXCLUDED.name_raw,
+          brand_name_raw = EXCLUDED.brand_name_raw,
+          category_raw = EXCLUDED.category_raw,
+          price_rec = EXCLUDED.price_rec,
+          price_med = EXCLUDED.price_med,
+          thc_percent = EXCLUDED.thc_percent,
+          cbd_percent = EXCLUDED.cbd_percent,
+          is_in_stock = EXCLUDED.is_in_stock,
+          stock_quantity = EXCLUDED.stock_quantity,
+          image_url = COALESCE(EXCLUDED.image_url, store_products.image_url),
+          source_url = EXCLUDED.source_url,
+          raw_data = EXCLUDED.raw_data,
+          last_seen_at = NOW(),
+          updated_at = NOW()
+        RETURNING id, (xmax = 0) as is_new
+      `, [
+        dispensaryId, provider, externalProductId,
+        item.name, item.brand || null, categoryName || null,
+        priceRec, priceMed,
+        item.thcPercentage || null, item.cbdPercentage || null,
+        isInStock, stockQuantity,
+        item.imageUrl || null, item.dutchieUrl || null,
+        JSON.stringify(item.metadata || {}),
+      ]);
+
+      storeProductId = upsertResult.rows[0].id;
+      isNewProduct = upsertResult.rows[0].is_new;
+
+      logger.debug('pipeline', `${isNewProduct ? 'Inserted' : 'Updated'} canonical product: ${item.name} (ID: ${storeProductId})`);
+
+      // ============================================================
+      // INSERT store_product_snapshots
+      // ============================================================
+
+      await client.query(`
+        INSERT INTO store_product_snapshots (
+          store_product_id, dispensary_id, crawl_run_id,
+          price_rec, price_med,
+          is_in_stock, stock_quantity,
+          is_present_in_feed,
+          captured_at, created_at
+        ) VALUES (
+          $1, $2, $3,
+          $4, $5,
+          $6, $7,
+          TRUE,
+          NOW(), NOW()
+        )
+        ON CONFLICT (store_product_id, crawl_run_id) WHERE crawl_run_id IS NOT NULL
+        DO UPDATE SET
+          price_rec = EXCLUDED.price_rec,
+          price_med = EXCLUDED.price_med,
+          is_in_stock = EXCLUDED.is_in_stock,
+          stock_quantity = EXCLUDED.stock_quantity
+      `, [
+        storeProductId, dispensaryId, this.crawlRunId,
+        priceRec, priceMed,
+        isInStock, stockQuantity,
+      ]);
+
+      // ============================================================
+      // UPSERT product_variants (if variants exist)
+      // ============================================================
+
+      if (variants.length > 0) {
+        for (const variant of variants) {
+          const { value: weightValue, unit: weightUnit } = parseWeight(variant.option);
+
+          const variantResult = await client.query(`
+            INSERT INTO product_variants (
+              store_product_id, dispensary_id,
+              option,
+              price_rec, price_med, price_rec_special, price_med_special,
+              quantity, quantity_available, in_stock, is_on_special,
+              weight_value, weight_unit,
+              first_seen_at, last_seen_at,
+              created_at, updated_at
+            ) VALUES (
+              $1, $2,
+              $3,
+              $4, $5, $6, $7,
+              $8, $8, $9, $10,
+              $11, $12,
+              NOW(), NOW(),
+              NOW(), NOW()
+            )
+            ON CONFLICT (store_product_id, option)
+            DO UPDATE SET
+              price_rec = EXCLUDED.price_rec,
+              price_med = EXCLUDED.price_med,
+              price_rec_special = EXCLUDED.price_rec_special,
+              price_med_special = EXCLUDED.price_med_special,
+              quantity = EXCLUDED.quantity,
+              quantity_available = EXCLUDED.quantity_available,
+              in_stock = EXCLUDED.in_stock,
+              is_on_special = EXCLUDED.is_on_special,
+              weight_value = EXCLUDED.weight_value,
+              weight_unit = EXCLUDED.weight_unit,
+              last_seen_at = NOW(),
+              last_price_change_at = CASE
+                WHEN product_variants.price_rec IS DISTINCT FROM EXCLUDED.price_rec
+                  OR product_variants.price_rec_special IS DISTINCT FROM EXCLUDED.price_rec_special
+                THEN NOW()
+                ELSE product_variants.last_price_change_at
+              END,
+              last_stock_change_at = CASE
+                WHEN product_variants.in_stock IS DISTINCT FROM EXCLUDED.in_stock
+                THEN NOW()
+                ELSE product_variants.last_stock_change_at
+              END,
+              updated_at = NOW()
+            RETURNING id
+          `, [
+            storeProductId, dispensaryId,
+            variant.option,
+            variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
+            variant.quantity, variant.inStock, variant.isOnSpecial,
+            weightValue, weightUnit,
+          ]);
+
+          const variantId = variantResult.rows[0].id;
+
+          // Insert variant snapshot
+          await client.query(`
+            INSERT INTO product_variant_snapshots (
+              product_variant_id, store_product_id, dispensary_id, crawl_run_id,
+              option,
+              price_rec, price_med, price_rec_special, price_med_special,
+              quantity, in_stock, is_on_special,
+              is_present_in_feed,
+              captured_at, created_at
+            ) VALUES (
+              $1, $2, $3, $4,
+              $5,
+              $6, $7, $8, $9,
+              $10, $11, $12,
+              TRUE,
+              NOW(), NOW()
+            )
+          `, [
+            variantId, storeProductId, dispensaryId, this.crawlRunId,
+            variant.option,
+            variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
+            variant.quantity, variant.inStock, variant.isOnSpecial,
+          ]);
+        }
+
+        logger.debug('pipeline', `Upserted ${variants.length} variants for ${item.name}`);
+      }
+
+      // Attach metadata for stats tracking
+      (item as any).isNewProduct = isNewProduct;
+      (item as any).storeProductId = storeProductId;
+
+      return item;
+
+    } catch (error) {
+      logger.error('pipeline', `Failed to save canonical product ${item.name}: ${error}`);
+      return null;
+    } finally {
+      client.release();
+    }
+  }
+}
+
+/**
+ * Create a crawl run record before starting crawl
+ */
+export async function createCrawlRun(
+  dispensaryId: number,
+  provider: string = 'dutchie',
+  triggerType: string = 'manual'
+): Promise<number> {
+  const result = await pool.query(`
+    INSERT INTO crawl_runs (
+      dispensary_id, provider,
+      started_at, status, trigger_type
+    ) VALUES ($1, $2, NOW(), 'running', $3)
+    RETURNING id
+  `, [dispensaryId, provider, triggerType]);
+
+  return result.rows[0].id;
+}
+
+/**
+ * Complete a crawl run with stats
+ */
+export async function completeCrawlRun(
+  crawlRunId: number,
+  stats: {
+    productsFound: number;
+    productsNew: number;
+    productsUpdated: number;
+    snapshotsWritten: number;
+    variantsUpserted?: number;
+    status?: 'completed' | 'failed' | 'partial';
+    error?: string;
+  }
+): Promise<void> {
+  await pool.query(`
+    UPDATE crawl_runs SET
+      finished_at = NOW(),
+      status = $2,
+      products_found = $3,
+      products_new = $4,
+      products_updated = $5,
+      snapshots_written = $6,
+      metadata = jsonb_build_object(
+        'variants_upserted', $7,
+        'error', $8
+      )
+    WHERE id = $1
+  `, [
+    crawlRunId,
+    stats.status || 'completed',
+    stats.productsFound,
+    stats.productsNew,
+    stats.productsUpdated,
+    stats.snapshotsWritten,
+    stats.variantsUpserted || 0,
+    stats.error || null,
+  ]);
+}
--- a/backend/src/_deprecated/scraper-v2/downloader.ts
+++ b/backend/src/_deprecated/scraper-v2/downloader.ts
--- a/backend/src/_deprecated/scraper-v2/engine.ts
+++ b/backend/src/_deprecated/scraper-v2/engine.ts
@@ -2,6 +2,7 @@ import { RequestScheduler } from './scheduler';
 import { Downloader } from './downloader';
 import { MiddlewareEngine, UserAgentMiddleware, ProxyMiddleware, RateLimitMiddleware, RetryMiddleware, BotDetectionMiddleware, StealthMiddleware } from './middlewares';
 import { PipelineEngine, ValidationPipeline, SanitizationPipeline, DeduplicationPipeline, ImagePipeline, DatabasePipeline, StatsPipeline } from './pipelines';
+import { CanonicalDatabasePipeline, createCrawlRun, completeCrawlRun } from './canonical-pipeline';
 import { ScraperRequest, ScraperResponse, ParseResult, Product, ScraperStats } from './types';
 import { logger } from '../services/logger';
 import { pool } from '../db/pool';
@@ -65,6 +66,9 @@ export class ScraperEngine {
    this.pipelineEngine.use(new DeduplicationPipeline());
    this.pipelineEngine.use(new ImagePipeline());
    this.pipelineEngine.use(new StatsPipeline());
+    // Use canonical pipeline for writing to store_products/product_variants
+    this.pipelineEngine.use(new CanonicalDatabasePipeline());
+    // Keep legacy pipeline for backwards compatibility with existing stores table
    this.pipelineEngine.use(new DatabasePipeline());
  }

--- a/backend/src/_deprecated/scraper-v2/index.ts
+++ b/backend/src/_deprecated/scraper-v2/index.ts
@@ -39,6 +39,11 @@ export {
  DatabasePipeline,
  StatsPipeline
 } from './pipelines';
+export {
+  CanonicalDatabasePipeline,
+  createCrawlRun,
+  completeCrawlRun
+} from './canonical-pipeline';
 export * from './types';

 // Main API functions
--- a/backend/src/_deprecated/scraper-v2/middlewares.ts
+++ b/backend/src/_deprecated/scraper-v2/middlewares.ts
--- a/backend/src/_deprecated/scraper-v2/navigation.ts
+++ b/backend/src/_deprecated/scraper-v2/navigation.ts
--- a/backend/src/_deprecated/scraper-v2/pipelines.ts
+++ b/backend/src/_deprecated/scraper-v2/pipelines.ts
--- a/backend/src/_deprecated/scraper-v2/scheduler.ts
+++ b/backend/src/_deprecated/scraper-v2/scheduler.ts
--- a/backend/src/_deprecated/scraper-v2/types.ts
+++ b/backend/src/_deprecated/scraper-v2/types.ts
--- a/backend/src/_deprecated/scripts/queue-dispensaries.ts
+++ b/backend/src/_deprecated/scripts/queue-dispensaries.ts
--- a/backend/src/_deprecated/scripts/run-backfill.ts
+++ b/backend/src/_deprecated/scripts/run-backfill.ts
--- a/backend/src/_deprecated/scripts/run-hydration.ts
+++ b/backend/src/_deprecated/scripts/run-hydration.ts
--- a/backend/src/_deprecated/scripts/test-crawl-to-canonical.ts
+++ b/backend/src/_deprecated/scripts/test-crawl-to-canonical.ts
@@ -0,0 +1,277 @@
+#!/usr/bin/env npx tsx
+/**
+ * Test Script: Crawl a single dispensary and write to canonical tables
+ *
+ * This script:
+ * 1. Fetches products from Dutchie GraphQL
+ * 2. Normalizes via DutchieNormalizer
+ * 3. Writes to store_products, product_variants, snapshots via hydrateToCanonical
+ *
+ * Usage:
+ *   npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>
+ *   npx tsx src/scripts/test-crawl-to-canonical.ts 235
+ */
+
+import { Pool } from 'pg';
+import dotenv from 'dotenv';
+import {
+  executeGraphQL,
+  GRAPHQL_HASHES,
+  DUTCHIE_CONFIG,
+} from '../platforms/dutchie';
+import {
+  DutchieNormalizer,
+  hydrateToCanonical,
+} from '../hydration';
+import { initializeImageStorage } from '../utils/image-storage';
+
+dotenv.config();
+
+// ============================================================
+// DATABASE CONNECTION
+// ============================================================
+
+function getConnectionString(): string {
+  if (process.env.CANNAIQ_DB_URL) {
+    return process.env.CANNAIQ_DB_URL;
+  }
+  if (process.env.DATABASE_URL) {
+    return process.env.DATABASE_URL;
+  }
+  const host = process.env.CANNAIQ_DB_HOST || 'localhost';
+  const port = process.env.CANNAIQ_DB_PORT || '54320';
+  const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
+  const user = process.env.CANNAIQ_DB_USER || 'dutchie';
+  const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
+  return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
+}
+
+const pool = new Pool({ connectionString: getConnectionString() });
+
+// ============================================================
+// FETCH PRODUCTS FROM DUTCHIE
+// ============================================================
+
+interface FetchResult {
+  products: any[];
+  totalPages: number;
+  totalProducts: number;
+}
+
+async function fetchAllProducts(platformDispensaryId: string, cName: string): Promise<FetchResult> {
+  const allProducts: any[] = [];
+  let page = 0;
+  let totalPages = 1;
+  let totalProducts = 0;
+
+  console.log(`[Fetch] Starting fetch for ${platformDispensaryId} (cName: ${cName})`);
+
+  while (page < totalPages && page < DUTCHIE_CONFIG.maxPages) {
+    const variables = {
+      includeEnterpriseSpecials: false,
+      productsFilter: {
+        dispensaryId: platformDispensaryId,
+        pricingType: 'rec',
+        Status: 'Active',  // 'Active' = in-stock products with pricing
+        types: [],
+        useCache: true,
+        isDefaultSort: true,
+        sortBy: 'popularSortIdx',
+        sortDirection: 1,
+        bypassOnlineThresholds: true,
+        isKioskMenu: false,
+        removeProductsBelowOptionThresholds: false,
+      },
+      page,
+      perPage: DUTCHIE_CONFIG.perPage,
+    };
+
+    try {
+      const result = await executeGraphQL(
+        'FilteredProducts',
+        variables,
+        GRAPHQL_HASHES.FilteredProducts,
+        { cName, maxRetries: 3 }
+      );
+
+      const data = result?.data?.filteredProducts;
+      if (!data) {
+        console.error(`[Fetch] No data returned for page ${page}`);
+        break;
+      }
+
+      const products = data.products || [];
+      totalProducts = data.queryInfo?.totalCount || 0;
+      totalPages = Math.ceil(totalProducts / DUTCHIE_CONFIG.perPage);
+
+      allProducts.push(...products);
+      console.log(`[Fetch] Page ${page + 1}/${totalPages}: ${products.length} products (total so far: ${allProducts.length})`);
+
+      page++;
+
+      if (page < totalPages) {
+        await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs));
+      }
+    } catch (error: any) {
+      console.error(`[Fetch] Error on page ${page}: ${error.message}`);
+      break;
+    }
+  }
+
+  return { products: allProducts, totalPages, totalProducts };
+}
+
+// ============================================================
+// MAIN
+// ============================================================
+
+async function main() {
+  const dispensaryId = parseInt(process.argv[2], 10);
+
+  if (!dispensaryId) {
+    console.error('Usage: npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>');
+    console.error('Example: npx tsx src/scripts/test-crawl-to-canonical.ts 235');
+    process.exit(1);
+  }
+
+  console.log('============================================================');
+  console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
+  console.log('============================================================\n');
+
+  // Initialize image storage
+  console.log('[Init] Initializing image storage...');
+  await initializeImageStorage();
+  console.log('  Image storage ready\n');
+
+  try {
+    // Step 1: Get dispensary info
+    console.log('[Step 1] Getting dispensary info...');
+    const dispResult = await pool.query(`
+      SELECT id, name, platform_dispensary_id, menu_url
+      FROM dispensaries
+      WHERE id = $1
+    `, [dispensaryId]);
+
+    if (dispResult.rows.length === 0) {
+      throw new Error(`Dispensary ${dispensaryId} not found`);
+    }
+
+    const disp = dispResult.rows[0];
+    console.log(`  Name: ${disp.name}`);
+    console.log(`  Platform ID: ${disp.platform_dispensary_id}`);
+    console.log(`  Menu URL: ${disp.menu_url}`);
+
+    if (!disp.platform_dispensary_id) {
+      throw new Error('Dispensary does not have a platform_dispensary_id');
+    }
+
+    // Extract cName from menu_url
+    const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
+    const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
+    console.log(`  cName: ${cName}\n`);
+
+    // Step 2: Fetch products from Dutchie
+    console.log('[Step 2] Fetching products from Dutchie GraphQL...');
+    const fetchResult = await fetchAllProducts(disp.platform_dispensary_id, cName);
+    console.log(`  Total products fetched: ${fetchResult.products.length}\n`);
+
+    if (fetchResult.products.length === 0) {
+      console.log('No products fetched. Exiting.');
+      process.exit(0);
+    }
+
+    // Step 3: Normalize
+    console.log('[Step 3] Normalizing products...');
+    const normalizer = new DutchieNormalizer();
+
+    // Construct a RawPayload structure that the normalizer expects
+    // The normalizer.normalize() expects: { raw_json, dispensary_id, ... }
+    const rawPayloadForValidation = {
+      products: fetchResult.products,
+      queryInfo: {
+        totalCount: fetchResult.totalProducts,
+      },
+    };
+
+    const validation = normalizer.validatePayload(rawPayloadForValidation);
+    if (!validation.valid) {
+      console.error(`  Validation failed: ${validation.errors?.join(', ')}`);
+      process.exit(1);
+    }
+    console.log(`  Validation: PASS`);
+
+    // Build proper RawPayload for normalize()
+    const rawPayload = {
+      id: `test-${Date.now()}`,
+      dispensary_id: dispensaryId,
+      crawl_run_id: null,
+      platform: 'dutchie',
+      payload_version: 1,
+      raw_json: rawPayloadForValidation,
+      product_count: fetchResult.totalProducts,
+      pricing_type: 'rec',
+      crawl_mode: 'active',
+      fetched_at: new Date(),
+      processed: false,
+      normalized_at: null,
+      hydration_error: null,
+      hydration_attempts: 0,
+      created_at: new Date(),
+    };
+
+    const normResult = normalizer.normalize(rawPayload);
+    console.log(`  Normalized products: ${normResult.products.length}`);
+    console.log(`  Brands extracted: ${normResult.brands.length}`);
+    console.log(`  Sample product: ${normResult.products[0]?.name}\n`);
+
+    // Step 4: Write to canonical tables
+    console.log('[Step 4] Writing to canonical tables via hydrateToCanonical...');
+    const hydrateResult = await hydrateToCanonical(
+      pool,
+      dispensaryId,
+      normResult,
+      null // no crawl_run_id for this test
+    );
+
+    console.log(`  Products upserted: ${hydrateResult.productsUpserted}`);
+    console.log(`  Products new: ${hydrateResult.productsNew}`);
+    console.log(`  Snapshots created: ${hydrateResult.snapshotsCreated}`);
+    console.log(`  Variants upserted: ${hydrateResult.variantsUpserted}`);
+    console.log(`  Brands created: ${hydrateResult.brandsCreated}\n`);
+
+    // Step 5: Verify
+    console.log('[Step 5] Verifying data in canonical tables...');
+
+    const productCount = await pool.query(`
+      SELECT COUNT(*) as count FROM store_products WHERE dispensary_id = $1
+    `, [dispensaryId]);
+    console.log(`  store_products count: ${productCount.rows[0].count}`);
+
+    const variantCount = await pool.query(`
+      SELECT COUNT(*) as count FROM product_variants WHERE dispensary_id = $1
+    `, [dispensaryId]);
+    console.log(`  product_variants count: ${variantCount.rows[0].count}`);
+
+    const snapshotCount = await pool.query(`
+      SELECT COUNT(*) as count FROM store_product_snapshots WHERE dispensary_id = $1
+    `, [dispensaryId]);
+    console.log(`  store_product_snapshots count: ${snapshotCount.rows[0].count}`);
+
+    console.log('\n============================================================');
+    console.log('SUCCESS - Crawl and hydration complete!');
+    console.log('============================================================');
+
+  } catch (error: any) {
+    console.error('\n============================================================');
+    console.error('ERROR:', error.message);
+    console.error('============================================================');
+    if (error.stack) {
+      console.error(error.stack);
+    }
+    process.exit(1);
+  } finally {
+    await pool.end();
+  }
+}
+
+main();
--- a/backend/src/_deprecated/services/DiscoveryGeoService.ts
+++ b/backend/src/_deprecated/services/DiscoveryGeoService.ts
--- a/backend/src/_deprecated/services/GeoValidationService.ts
+++ b/backend/src/_deprecated/services/GeoValidationService.ts
--- a/backend/src/_deprecated/services/availability.ts
+++ b/backend/src/_deprecated/services/availability.ts
--- a/backend/src/_deprecated/services/crawler-jobs.ts
+++ b/backend/src/_deprecated/services/crawler-jobs.ts
--- a/backend/src/_deprecated/services/crawler-logger.ts
+++ b/backend/src/_deprecated/services/crawler-logger.ts
--- a/backend/src/_deprecated/services/crawler-profiles.ts
+++ b/backend/src/_deprecated/services/crawler-profiles.ts
--- a/backend/src/_deprecated/services/intelligence-detector.ts
+++ b/backend/src/_deprecated/services/intelligence-detector.ts
--- a/backend/src/_deprecated/services/menu-provider-detector.ts
+++ b/backend/src/_deprecated/services/menu-provider-detector.ts
--- a/backend/src/_deprecated/services/scraper-debug.ts
+++ b/backend/src/_deprecated/services/scraper-debug.ts
--- a/backend/src/_deprecated/services/scraper.ts
+++ b/backend/src/_deprecated/services/scraper.ts
@@ -3,7 +3,7 @@ import StealthPlugin from 'puppeteer-extra-plugin-stealth';
 import { Browser, Page } from 'puppeteer';
 import { SocksProxyAgent } from 'socks-proxy-agent';
 import { pool } from '../db/pool';
-import { uploadImageFromUrl, getImageUrl } from '../utils/minio';
+import { downloadProductImageLegacy } from '../utils/image-storage';
 import { logger } from './logger';
 import { registerScraper, updateScraperStats, completeScraper } from '../routes/scraper-monitor';
 import { incrementProxyFailure, getActiveProxy, isBotDetectionError, putProxyInTimeout } from './proxy';
@@ -767,7 +767,8 @@ export async function saveProducts(storeId: number, categoryId: number, products
        
        if (product.imageUrl && !localImagePath) {
          try {
-            localImagePath = await uploadImageFromUrl(product.imageUrl, productId);
+            const result = await downloadProductImageLegacy(product.imageUrl, 0, productId);
+            localImagePath = result.urls?.original || null;
            await client.query(`
              UPDATE products
              SET local_image_path = $1
--- a/backend/src/_deprecated/system/routes/index.ts
+++ b/backend/src/_deprecated/system/routes/index.ts
@@ -0,0 +1,584 @@
+/**
+ * System API Routes
+ *
+ * Provides REST API endpoints for system monitoring and control:
+ * - /api/system/sync/* - Sync orchestrator
+ * - /api/system/dlq/* - Dead-letter queue
+ * - /api/system/integrity/* - Integrity checks
+ * - /api/system/fix/* - Auto-fix routines
+ * - /api/system/alerts/* - System alerts
+ * - /metrics - Prometheus metrics
+ *
+ * Phase 5: Full Production Sync + Monitoring
+ */
+
+import { Router, Request, Response } from 'express';
+import { Pool } from 'pg';
+import {
+  SyncOrchestrator,
+  MetricsService,
+  DLQService,
+  AlertService,
+  IntegrityService,
+  AutoFixService,
+} from '../services';
+
+export function createSystemRouter(pool: Pool): Router {
+  const router = Router();
+
+  // Initialize services
+  const metrics = new MetricsService(pool);
+  const dlq = new DLQService(pool);
+  const alerts = new AlertService(pool);
+  const integrity = new IntegrityService(pool, alerts);
+  const autoFix = new AutoFixService(pool, alerts);
+  const orchestrator = new SyncOrchestrator(pool, metrics, dlq, alerts);
+
+  // ============================================================
+  // SYNC ORCHESTRATOR ENDPOINTS
+  // ============================================================
+
+  /**
+   * GET /api/system/sync/status
+   * Get current sync status
+   */
+  router.get('/sync/status', async (_req: Request, res: Response) => {
+    try {
+      const status = await orchestrator.getStatus();
+      res.json(status);
+    } catch (error) {
+      console.error('[System] Sync status error:', error);
+      res.status(500).json({ error: 'Failed to get sync status' });
+    }
+  });
+
+  /**
+   * POST /api/system/sync/run
+   * Trigger a sync run
+   */
+  router.post('/sync/run', async (req: Request, res: Response) => {
+    try {
+      const triggeredBy = req.body.triggeredBy || 'api';
+      const result = await orchestrator.runSync();
+      res.json({
+        success: true,
+        triggeredBy,
+        metrics: result,
+      });
+    } catch (error) {
+      console.error('[System] Sync run error:', error);
+      res.status(500).json({
+        success: false,
+        error: error instanceof Error ? error.message : 'Sync run failed',
+      });
+    }
+  });
+
+  /**
+   * GET /api/system/sync/queue-depth
+   * Get queue depth information
+   */
+  router.get('/sync/queue-depth', async (_req: Request, res: Response) => {
+    try {
+      const depth = await orchestrator.getQueueDepth();
+      res.json(depth);
+    } catch (error) {
+      console.error('[System] Queue depth error:', error);
+      res.status(500).json({ error: 'Failed to get queue depth' });
+    }
+  });
+
+  /**
+   * GET /api/system/sync/health
+   * Get sync health status
+   */
+  router.get('/sync/health', async (_req: Request, res: Response) => {
+    try {
+      const health = await orchestrator.getHealth();
+      res.status(health.healthy ? 200 : 503).json(health);
+    } catch (error) {
+      console.error('[System] Health check error:', error);
+      res.status(500).json({ healthy: false, error: 'Health check failed' });
+    }
+  });
+
+  /**
+   * POST /api/system/sync/pause
+   * Pause the orchestrator
+   */
+  router.post('/sync/pause', async (req: Request, res: Response) => {
+    try {
+      const reason = req.body.reason || 'Manual pause';
+      await orchestrator.pause(reason);
+      res.json({ success: true, message: 'Orchestrator paused' });
+    } catch (error) {
+      console.error('[System] Pause error:', error);
+      res.status(500).json({ error: 'Failed to pause orchestrator' });
+    }
+  });
+
+  /**
+   * POST /api/system/sync/resume
+   * Resume the orchestrator
+   */
+  router.post('/sync/resume', async (_req: Request, res: Response) => {
+    try {
+      await orchestrator.resume();
+      res.json({ success: true, message: 'Orchestrator resumed' });
+    } catch (error) {
+      console.error('[System] Resume error:', error);
+      res.status(500).json({ error: 'Failed to resume orchestrator' });
+    }
+  });
+
+  // ============================================================
+  // DLQ ENDPOINTS
+  // ============================================================
+
+  /**
+   * GET /api/system/dlq
+   * List DLQ payloads
+   */
+  router.get('/dlq', async (req: Request, res: Response) => {
+    try {
+      const options = {
+        status: req.query.status as string,
+        errorType: req.query.errorType as string,
+        dispensaryId: req.query.dispensaryId ? parseInt(req.query.dispensaryId as string) : undefined,
+        limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
+        offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
+      };
+
+      const result = await dlq.listPayloads(options);
+      res.json(result);
+    } catch (error) {
+      console.error('[System] DLQ list error:', error);
+      res.status(500).json({ error: 'Failed to list DLQ payloads' });
+    }
+  });
+
+  /**
+   * GET /api/system/dlq/stats
+   * Get DLQ statistics
+   */
+  router.get('/dlq/stats', async (_req: Request, res: Response) => {
+    try {
+      const stats = await dlq.getStats();
+      res.json(stats);
+    } catch (error) {
+      console.error('[System] DLQ stats error:', error);
+      res.status(500).json({ error: 'Failed to get DLQ stats' });
+    }
+  });
+
+  /**
+   * GET /api/system/dlq/summary
+   * Get DLQ summary by error type
+   */
+  router.get('/dlq/summary', async (_req: Request, res: Response) => {
+    try {
+      const summary = await dlq.getSummary();
+      res.json(summary);
+    } catch (error) {
+      console.error('[System] DLQ summary error:', error);
+      res.status(500).json({ error: 'Failed to get DLQ summary' });
+    }
+  });
+
+  /**
+   * GET /api/system/dlq/:id
+   * Get a specific DLQ payload
+   */
+  router.get('/dlq/:id', async (req: Request, res: Response) => {
+    try {
+      const payload = await dlq.getPayload(req.params.id);
+      if (!payload) {
+        return res.status(404).json({ error: 'Payload not found' });
+      }
+      res.json(payload);
+    } catch (error) {
+      console.error('[System] DLQ get error:', error);
+      res.status(500).json({ error: 'Failed to get DLQ payload' });
+    }
+  });
+
+  /**
+   * POST /api/system/dlq/:id/retry
+   * Retry a DLQ payload
+   */
+  router.post('/dlq/:id/retry', async (req: Request, res: Response) => {
+    try {
+      const result = await dlq.retryPayload(req.params.id);
+      if (result.success) {
+        res.json(result);
+      } else {
+        res.status(400).json(result);
+      }
+    } catch (error) {
+      console.error('[System] DLQ retry error:', error);
+      res.status(500).json({ error: 'Failed to retry payload' });
+    }
+  });
+
+  /**
+   * POST /api/system/dlq/:id/abandon
+   * Abandon a DLQ payload
+   */
+  router.post('/dlq/:id/abandon', async (req: Request, res: Response) => {
+    try {
+      const reason = req.body.reason || 'Manually abandoned';
+      const abandonedBy = req.body.abandonedBy || 'api';
+      const success = await dlq.abandonPayload(req.params.id, reason, abandonedBy);
+      res.json({ success });
+    } catch (error) {
+      console.error('[System] DLQ abandon error:', error);
+      res.status(500).json({ error: 'Failed to abandon payload' });
+    }
+  });
+
+  /**
+   * POST /api/system/dlq/bulk-retry
+   * Bulk retry payloads by error type
+   */
+  router.post('/dlq/bulk-retry', async (req: Request, res: Response) => {
+    try {
+      const { errorType } = req.body;
+      if (!errorType) {
+        return res.status(400).json({ error: 'errorType is required' });
+      }
+      const result = await dlq.bulkRetryByErrorType(errorType);
+      res.json(result);
+    } catch (error) {
+      console.error('[System] DLQ bulk retry error:', error);
+      res.status(500).json({ error: 'Failed to bulk retry' });
+    }
+  });
+
+  // ============================================================
+  // INTEGRITY CHECK ENDPOINTS
+  // ============================================================
+
+  /**
+   * POST /api/system/integrity/run
+   * Run all integrity checks
+   */
+  router.post('/integrity/run', async (req: Request, res: Response) => {
+    try {
+      const triggeredBy = req.body.triggeredBy || 'api';
+      const result = await integrity.runAllChecks(triggeredBy);
+      res.json(result);
+    } catch (error) {
+      console.error('[System] Integrity run error:', error);
+      res.status(500).json({ error: 'Failed to run integrity checks' });
+    }
+  });
+
+  /**
+   * GET /api/system/integrity/runs
+   * Get recent integrity check runs
+   */
+  router.get('/integrity/runs', async (req: Request, res: Response) => {
+    try {
+      const limit = req.query.limit ? parseInt(req.query.limit as string) : 10;
+      const runs = await integrity.getRecentRuns(limit);
+      res.json(runs);
+    } catch (error) {
+      console.error('[System] Integrity runs error:', error);
+      res.status(500).json({ error: 'Failed to get integrity runs' });
+    }
+  });
+
+  /**
+   * GET /api/system/integrity/runs/:runId
+   * Get results for a specific integrity run
+   */
+  router.get('/integrity/runs/:runId', async (req: Request, res: Response) => {
+    try {
+      const results = await integrity.getRunResults(req.params.runId);
+      res.json(results);
+    } catch (error) {
+      console.error('[System] Integrity run results error:', error);
+      res.status(500).json({ error: 'Failed to get run results' });
+    }
+  });
+
+  // ============================================================
+  // AUTO-FIX ENDPOINTS
+  // ============================================================
+
+  /**
+   * GET /api/system/fix/routines
+   * Get available fix routines
+   */
+  router.get('/fix/routines', (_req: Request, res: Response) => {
+    try {
+      const routines = autoFix.getAvailableRoutines();
+      res.json(routines);
+    } catch (error) {
+      console.error('[System] Get routines error:', error);
+      res.status(500).json({ error: 'Failed to get routines' });
+    }
+  });
+
+  /**
+   * POST /api/system/fix/:routine
+   * Run a fix routine
+   */
+  router.post('/fix/:routine', async (req: Request, res: Response) => {
+    try {
+      const routineName = req.params.routine;
+      const dryRun = req.body.dryRun === true;
+      const triggeredBy = req.body.triggeredBy || 'api';
+
+      const result = await autoFix.runRoutine(routineName as any, triggeredBy, { dryRun });
+      res.json(result);
+    } catch (error) {
+      console.error('[System] Fix routine error:', error);
+      res.status(500).json({ error: 'Failed to run fix routine' });
+    }
+  });
+
+  /**
+   * GET /api/system/fix/runs
+   * Get recent fix runs
+   */
+  router.get('/fix/runs', async (req: Request, res: Response) => {
+    try {
+      const limit = req.query.limit ? parseInt(req.query.limit as string) : 20;
+      const runs = await autoFix.getRecentRuns(limit);
+      res.json(runs);
+    } catch (error) {
+      console.error('[System] Fix runs error:', error);
+      res.status(500).json({ error: 'Failed to get fix runs' });
+    }
+  });
+
+  // ============================================================
+  // ALERTS ENDPOINTS
+  // ============================================================
+
+  /**
+   * GET /api/system/alerts
+   * List alerts
+   */
+  router.get('/alerts', async (req: Request, res: Response) => {
+    try {
+      const options = {
+        status: req.query.status as any,
+        severity: req.query.severity as any,
+        type: req.query.type as string,
+        limit: req.query.limit ? parseInt(req.query.limit as string) : 50,
+        offset: req.query.offset ? parseInt(req.query.offset as string) : 0,
+      };
+
+      const result = await alerts.listAlerts(options);
+      res.json(result);
+    } catch (error) {
+      console.error('[System] Alerts list error:', error);
+      res.status(500).json({ error: 'Failed to list alerts' });
+    }
+  });
+
+  /**
+   * GET /api/system/alerts/active
+   * Get active alerts
+   */
+  router.get('/alerts/active', async (_req: Request, res: Response) => {
+    try {
+      const activeAlerts = await alerts.getActiveAlerts();
+      res.json(activeAlerts);
+    } catch (error) {
+      console.error('[System] Active alerts error:', error);
+      res.status(500).json({ error: 'Failed to get active alerts' });
+    }
+  });
+
+  /**
+   * GET /api/system/alerts/summary
+   * Get alert summary
+   */
+  router.get('/alerts/summary', async (_req: Request, res: Response) => {
+    try {
+      const summary = await alerts.getSummary();
+      res.json(summary);
+    } catch (error) {
+      console.error('[System] Alerts summary error:', error);
+      res.status(500).json({ error: 'Failed to get alerts summary' });
+    }
+  });
+
+  /**
+   * POST /api/system/alerts/:id/acknowledge
+   * Acknowledge an alert
+   */
+  router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
+    try {
+      const alertId = parseInt(req.params.id);
+      const acknowledgedBy = req.body.acknowledgedBy || 'api';
+      const success = await alerts.acknowledgeAlert(alertId, acknowledgedBy);
+      res.json({ success });
+    } catch (error) {
+      console.error('[System] Acknowledge alert error:', error);
+      res.status(500).json({ error: 'Failed to acknowledge alert' });
+    }
+  });
+
+  /**
+   * POST /api/system/alerts/:id/resolve
+   * Resolve an alert
+   */
+  router.post('/alerts/:id/resolve', async (req: Request, res: Response) => {
+    try {
+      const alertId = parseInt(req.params.id);
+      const resolvedBy = req.body.resolvedBy || 'api';
+      const success = await alerts.resolveAlert(alertId, resolvedBy);
+      res.json({ success });
+    } catch (error) {
+      console.error('[System] Resolve alert error:', error);
+      res.status(500).json({ error: 'Failed to resolve alert' });
+    }
+  });
+
+  /**
+   * POST /api/system/alerts/bulk-acknowledge
+   * Bulk acknowledge alerts
+   */
+  router.post('/alerts/bulk-acknowledge', async (req: Request, res: Response) => {
+    try {
+      const { ids, acknowledgedBy } = req.body;
+      if (!ids || !Array.isArray(ids)) {
+        return res.status(400).json({ error: 'ids array is required' });
+      }
+      const count = await alerts.bulkAcknowledge(ids, acknowledgedBy || 'api');
+      res.json({ acknowledged: count });
+    } catch (error) {
+      console.error('[System] Bulk acknowledge error:', error);
+      res.status(500).json({ error: 'Failed to bulk acknowledge' });
+    }
+  });
+
+  // ============================================================
+  // METRICS ENDPOINTS
+  // ============================================================
+
+  /**
+   * GET /api/system/metrics
+   * Get all current metrics
+   */
+  router.get('/metrics', async (_req: Request, res: Response) => {
+    try {
+      const allMetrics = await metrics.getAllMetrics();
+      res.json(allMetrics);
+    } catch (error) {
+      console.error('[System] Metrics error:', error);
+      res.status(500).json({ error: 'Failed to get metrics' });
+    }
+  });
+
+  /**
+   * GET /api/system/metrics/:name
+   * Get a specific metric
+   */
+  router.get('/metrics/:name', async (req: Request, res: Response) => {
+    try {
+      const metric = await metrics.getMetric(req.params.name);
+      if (!metric) {
+        return res.status(404).json({ error: 'Metric not found' });
+      }
+      res.json(metric);
+    } catch (error) {
+      console.error('[System] Metric error:', error);
+      res.status(500).json({ error: 'Failed to get metric' });
+    }
+  });
+
+  /**
+   * GET /api/system/metrics/:name/history
+   * Get metric time series
+   */
+  router.get('/metrics/:name/history', async (req: Request, res: Response) => {
+    try {
+      const hours = req.query.hours ? parseInt(req.query.hours as string) : 24;
+      const history = await metrics.getMetricHistory(req.params.name, hours);
+      res.json(history);
+    } catch (error) {
+      console.error('[System] Metric history error:', error);
+      res.status(500).json({ error: 'Failed to get metric history' });
+    }
+  });
+
+  /**
+   * GET /api/system/errors
+   * Get error summary
+   */
+  router.get('/errors', async (_req: Request, res: Response) => {
+    try {
+      const summary = await metrics.getErrorSummary();
+      res.json(summary);
+    } catch (error) {
+      console.error('[System] Error summary error:', error);
+      res.status(500).json({ error: 'Failed to get error summary' });
+    }
+  });
+
+  /**
+   * GET /api/system/errors/recent
+   * Get recent errors
+   */
+  router.get('/errors/recent', async (req: Request, res: Response) => {
+    try {
+      const limit = req.query.limit ? parseInt(req.query.limit as string) : 50;
+      const errorType = req.query.type as string;
+      const errors = await metrics.getRecentErrors(limit, errorType);
+      res.json(errors);
+    } catch (error) {
+      console.error('[System] Recent errors error:', error);
+      res.status(500).json({ error: 'Failed to get recent errors' });
+    }
+  });
+
+  /**
+   * POST /api/system/errors/acknowledge
+   * Acknowledge errors
+   */
+  router.post('/errors/acknowledge', async (req: Request, res: Response) => {
+    try {
+      const { ids, acknowledgedBy } = req.body;
+      if (!ids || !Array.isArray(ids)) {
+        return res.status(400).json({ error: 'ids array is required' });
+      }
+      const count = await metrics.acknowledgeErrors(ids, acknowledgedBy || 'api');
+      res.json({ acknowledged: count });
+    } catch (error) {
+      console.error('[System] Acknowledge errors error:', error);
+      res.status(500).json({ error: 'Failed to acknowledge errors' });
+    }
+  });
+
+  return router;
+}
+
+/**
+ * Create Prometheus metrics endpoint (standalone)
+ */
+export function createPrometheusRouter(pool: Pool): Router {
+  const router = Router();
+  const metrics = new MetricsService(pool);
+
+  /**
+   * GET /metrics
+   * Prometheus-compatible metrics endpoint
+   */
+  router.get('/', async (_req: Request, res: Response) => {
+    try {
+      const prometheusOutput = await metrics.getPrometheusMetrics();
+      res.set('Content-Type', 'text/plain; version=0.0.4');
+      res.send(prometheusOutput);
+    } catch (error) {
+      console.error('[Prometheus] Metrics error:', error);
+      res.status(500).send('# Error generating metrics');
+    }
+  });
+
+  return router;
+}
--- a/backend/src/_deprecated/system/services/sync-orchestrator.ts
+++ b/backend/src/_deprecated/system/services/sync-orchestrator.ts
@@ -17,6 +17,7 @@ import { Pool } from 'pg';
 import { MetricsService } from './metrics';
 import { DLQService } from './dlq';
 import { AlertService } from './alerts';
+import { DutchieNormalizer, hydrateToCanonical } from '../../hydration';

 export type OrchestratorStatus = 'RUNNING' | 'SLEEPING' | 'LOCKED' | 'PAUSED' | 'ERROR';

@@ -90,6 +91,7 @@ export class SyncOrchestrator {
  private workerId: string;
  private isRunning: boolean = false;
  private pollInterval: NodeJS.Timeout | null = null;
+  private normalizer: DutchieNormalizer;

  constructor(
    pool: Pool,
@@ -103,6 +105,7 @@ export class SyncOrchestrator {
    this.dlq = dlq;
    this.alerts = alerts;
    this.workerId = workerId || `orchestrator-${process.env.HOSTNAME || process.pid}`;
+    this.normalizer = new DutchieNormalizer();
  }

  /**
@@ -503,7 +506,7 @@ export class SyncOrchestrator {
  }

  /**
-   * Process a single payload
+   * Process a single payload - now uses canonical tables via hydration pipeline
   */
  private async processPayload(
    payload: any,
@@ -518,25 +521,52 @@ export class SyncOrchestrator {

    // Parse products from raw JSON
    const rawData = payload.raw_json;
-    const products = this.extractProducts(rawData);

-    if (!products || products.length === 0) {
+    // Validate the payload using normalizer
+    const validation = this.normalizer.validatePayload(rawData);
+    if (!validation.valid) {
      // Mark as processed with warning
      await this.pool.query(`
        UPDATE raw_payloads
        SET processed = TRUE,
            normalized_at = NOW(),
-            hydration_error = 'No products found in payload'
+            hydration_error = $2
+        WHERE id = $1
+      `, [payload.id, validation.errors.join('; ')]);
+
+      return { productsUpserted: 0, productsInserted: 0, productsUpdated: 0, snapshotsCreated: 0 };
+    }
+
+    // Normalize the payload using the hydration normalizer
+    const normResult = this.normalizer.normalize(rawData);
+
+    if (normResult.products.length === 0) {
+      // Mark as processed with warning
+      await this.pool.query(`
+        UPDATE raw_payloads
+        SET processed = TRUE,
+            normalized_at = NOW(),
+            hydration_error = 'No products found in payload after normalization'
        WHERE id = $1
      `, [payload.id]);

      return { productsUpserted: 0, productsInserted: 0, productsUpdated: 0, snapshotsCreated: 0 };
    }

-    // Upsert products to canonical table
-    const result = await this.upsertProducts(payload.dispensary_id, products);
+    // Get or create crawl_run for this payload
+    const crawlRunId = await this.getOrCreateCrawlRun(payload.dispensary_id, payload.id);

-    // Create snapshots
+    // Use canonical hydration to write to store_products, product_variants, etc.
+    const hydrateResult = await hydrateToCanonical(
+      this.pool,
+      payload.dispensary_id,
+      normResult,
+      crawlRunId
+    );
+
+    // Also write to legacy tables for backwards compatibility
+    const products = this.extractProducts(rawData);
+    await this.upsertProducts(payload.dispensary_id, products);
    const snapshotsCreated = await this.createSnapshots(payload.dispensary_id, products, payload.id);

    // Calculate latency
@@ -551,14 +581,32 @@ export class SyncOrchestrator {
      WHERE id = $1
    `, [payload.id]);

+    // Return combined metrics (canonical + legacy)
    return {
-      productsUpserted: result.upserted,
-      productsInserted: result.inserted,
-      productsUpdated: result.updated,
-      snapshotsCreated,
+      productsUpserted: hydrateResult.productsUpserted,
+      productsInserted: hydrateResult.productsNew,
+      productsUpdated: hydrateResult.productsUpdated,
+      snapshotsCreated: hydrateResult.snapshotsCreated + snapshotsCreated,
    };
  }

+  /**
+   * Get or create a crawl_run record for tracking
+   */
+  private async getOrCreateCrawlRun(dispensaryId: number, payloadId: string): Promise<number | null> {
+    try {
+      const result = await this.pool.query(`
+        INSERT INTO crawl_runs (dispensary_id, provider, started_at, status, trigger_type, metadata)
+        VALUES ($1, 'dutchie', NOW(), 'running', 'hydration', jsonb_build_object('payload_id', $2))
+        RETURNING id
+      `, [dispensaryId, payloadId]);
+      return result.rows[0].id;
+    } catch (error) {
+      console.warn('[SyncOrchestrator] Could not create crawl_run:', error);
+      return null;
+    }
+  }
+
  /**
   * Extract products from raw payload
   */
--- a/backend/src/_deprecated/utils/HomepageValidator.ts
+++ b/backend/src/_deprecated/utils/HomepageValidator.ts
--- a/backend/src/_deprecated/utils/age-gate-playwright.ts
+++ b/backend/src/_deprecated/utils/age-gate-playwright.ts
--- a/backend/src/_deprecated/utils/stealthBrowser.ts
+++ b/backend/src/_deprecated/utils/stealthBrowser.ts
--- a/backend/src/auth/middleware.ts
+++ b/backend/src/auth/middleware.ts
@@ -29,6 +29,12 @@ const TRUSTED_ORIGINS = [
  'http://localhost:5173',
 ];

+// Pattern-based trusted origins (wildcards)
+const TRUSTED_ORIGIN_PATTERNS = [
+  /^https:\/\/.*\.cannabrands\.app$/,  // *.cannabrands.app
+  /^https:\/\/.*\.cannaiq\.co$/,       // *.cannaiq.co
+];
+
 // Trusted IPs for internal pod-to-pod communication
 const TRUSTED_IPS = [
  '127.0.0.1',
@@ -42,8 +48,16 @@ const TRUSTED_IPS = [
 function isTrustedRequest(req: Request): boolean {
  // Check origin header
  const origin = req.headers.origin;
-  if (origin && TRUSTED_ORIGINS.includes(origin)) {
-    return true;
+  if (origin) {
+    if (TRUSTED_ORIGINS.includes(origin)) {
+      return true;
+    }
+    // Check pattern-based origins (wildcards like *.cannabrands.app)
+    for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
+      if (pattern.test(origin)) {
+        return true;
+      }
+    }
  }

  // Check referer header (for same-origin requests without CORS)
@@ -54,6 +68,18 @@ function isTrustedRequest(req: Request): boolean {
        return true;
      }
    }
+    // Check pattern-based referers
+    try {
+      const refererUrl = new URL(referer);
+      const refererOrigin = refererUrl.origin;
+      for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
+        if (pattern.test(refererOrigin)) {
+          return true;
+        }
+      }
+    } catch {
+      // Invalid referer URL, skip
+    }
  }

  // Check IP for internal requests (pod-to-pod, localhost)
@@ -127,7 +153,53 @@ export async function authenticateUser(email: string, password: string): Promise
 }

 export async function authMiddleware(req: AuthRequest, res: Response, next: NextFunction) {
-  // Allow trusted origins/IPs to bypass auth (internal services, same-origin)
+  const authHeader = req.headers.authorization;
+
+  // If a Bearer token is provided, always try to use it first (logged-in user)
+  if (authHeader && authHeader.startsWith('Bearer ')) {
+    const token = authHeader.substring(7);
+
+    // Try JWT first
+    const jwtUser = verifyToken(token);
+
+    if (jwtUser) {
+      req.user = jwtUser;
+      return next();
+    }
+
+    // If JWT fails, try API token
+    try {
+      const result = await pool.query(`
+        SELECT id, name, rate_limit, active, expires_at, allowed_endpoints
+        FROM api_tokens
+        WHERE token = $1
+      `, [token]);
+
+      if (result.rows.length > 0) {
+        const apiToken = result.rows[0];
+        if (!apiToken.active) {
+          return res.status(401).json({ error: 'API token is inactive' });
+        }
+        if (apiToken.expires_at && new Date(apiToken.expires_at) < new Date()) {
+          return res.status(401).json({ error: 'API token has expired' });
+        }
+        req.user = {
+          id: 0,
+          email: `api:${apiToken.name}`,
+          role: 'api_token'
+        };
+        req.apiToken = apiToken;
+        return next();
+      }
+    } catch (err) {
+      console.error('API token lookup error:', err);
+    }
+
+    // Token provided but invalid
+    return res.status(401).json({ error: 'Invalid token' });
+  }
+
+  // No token provided - check trusted origins for API access (WordPress, etc.)
  if (isTrustedRequest(req)) {
    req.user = {
      id: 0,
@@ -137,80 +209,10 @@ export async function authMiddleware(req: AuthRequest, res: Response, next: Next
    return next();
  }

-  const authHeader = req.headers.authorization;
-
-  if (!authHeader || !authHeader.startsWith('Bearer ')) {
-    return res.status(401).json({ error: 'No token provided' });
-  }
-
-  const token = authHeader.substring(7);
-
-  // Try JWT first
-  const jwtUser = verifyToken(token);
-
-  if (jwtUser) {
-    req.user = jwtUser;
-    return next();
-  }
-
-  // If JWT fails, try API token
-  try {
-    const result = await pool.query(`
-      SELECT id, name, rate_limit, active, expires_at, allowed_endpoints
-      FROM api_tokens
-      WHERE token = $1
-    `, [token]);
-
-    if (result.rows.length === 0) {
-      return res.status(401).json({ error: 'Invalid token' });
-    }
-
-    const apiToken = result.rows[0];
-
-    // Check if token is active
-    if (!apiToken.active) {
-      return res.status(401).json({ error: 'Token is disabled' });
-    }
-
-    // Check if token is expired
-    if (apiToken.expires_at && new Date(apiToken.expires_at) < new Date()) {
-      return res.status(401).json({ error: 'Token has expired' });
-    }
-
-    // Check allowed endpoints
-    if (apiToken.allowed_endpoints && apiToken.allowed_endpoints.length > 0) {
-      const isAllowed = apiToken.allowed_endpoints.some((pattern: string) => {
-        // Simple wildcard matching
-        const regex = new RegExp('^' + pattern.replace('*', '.*') + '$');
-        return regex.test(req.path);
-      });
-
-      if (!isAllowed) {
-        return res.status(403).json({ error: 'Endpoint not allowed for this token' });
-      }
-    }
-
-    // Set API token on request for tracking
-    req.apiToken = {
-      id: apiToken.id,
-      name: apiToken.name,
-      rate_limit: apiToken.rate_limit
-    };
-
-    // Set a generic user for compatibility with existing code
-    req.user = {
-      id: apiToken.id,
-      email: `api-token-${apiToken.id}@system`,
-      role: 'api'
-    };
-
-    next();
-  } catch (error) {
-    console.error('Error verifying API token:', error);
-    return res.status(500).json({ error: 'Authentication failed' });
-  }
+  return res.status(401).json({ error: 'No token provided' });
 }

+
 /**
 * Require specific role(s) to access endpoint.
 *
--- a/backend/src/db/auto-migrate.ts
+++ b/backend/src/db/auto-migrate.ts
@@ -0,0 +1,141 @@
+/**
+ * Auto-Migration System
+ *
+ * Runs SQL migration files from the migrations/ folder automatically on server startup.
+ * Uses a schema_migrations table to track which migrations have been applied.
+ *
+ * Safe to run multiple times - only applies new migrations.
+ */
+
+import { Pool } from 'pg';
+import fs from 'fs';
+import path from 'path';
+
+const MIGRATIONS_DIR = path.join(__dirname, '../../migrations');
+
+/**
+ * Ensure schema_migrations table exists
+ */
+async function ensureMigrationsTable(pool: Pool): Promise<void> {
+  await pool.query(`
+    CREATE TABLE IF NOT EXISTS schema_migrations (
+      id SERIAL PRIMARY KEY,
+      name VARCHAR(255) UNIQUE NOT NULL,
+      applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+    )
+  `);
+}
+
+/**
+ * Get list of already-applied migrations
+ */
+async function getAppliedMigrations(pool: Pool): Promise<Set<string>> {
+  const result = await pool.query('SELECT name FROM schema_migrations');
+  return new Set(result.rows.map(row => row.name));
+}
+
+/**
+ * Get list of migration files from disk
+ */
+function getMigrationFiles(): string[] {
+  if (!fs.existsSync(MIGRATIONS_DIR)) {
+    console.log('[AutoMigrate] No migrations directory found');
+    return [];
+  }
+
+  return fs.readdirSync(MIGRATIONS_DIR)
+    .filter(f => f.endsWith('.sql'))
+    .sort(); // Sort alphabetically (001_, 002_, etc.)
+}
+
+/**
+ * Run a single migration file
+ */
+async function runMigration(pool: Pool, filename: string): Promise<void> {
+  const filepath = path.join(MIGRATIONS_DIR, filename);
+  const sql = fs.readFileSync(filepath, 'utf8');
+
+  const client = await pool.connect();
+  try {
+    await client.query('BEGIN');
+
+    // Run the migration SQL
+    await client.query(sql);
+
+    // Record that this migration was applied
+    await client.query(
+      'INSERT INTO schema_migrations (name) VALUES ($1) ON CONFLICT (name) DO NOTHING',
+      [filename]
+    );
+
+    await client.query('COMMIT');
+    console.log(`[AutoMigrate] ✓ Applied: ${filename}`);
+  } catch (error: any) {
+    await client.query('ROLLBACK');
+    console.error(`[AutoMigrate] ✗ Failed: ${filename}`);
+    throw error;
+  } finally {
+    client.release();
+  }
+}
+
+/**
+ * Run all pending migrations
+ *
+ * @param pool - Database connection pool
+ * @returns Number of migrations applied
+ */
+export async function runAutoMigrations(pool: Pool): Promise<number> {
+  console.log('[AutoMigrate] Checking for pending migrations...');
+
+  try {
+    // Ensure migrations table exists
+    await ensureMigrationsTable(pool);
+
+    // Get applied and available migrations
+    const applied = await getAppliedMigrations(pool);
+    const available = getMigrationFiles();
+
+    // Find pending migrations
+    const pending = available.filter(f => !applied.has(f));
+
+    if (pending.length === 0) {
+      console.log('[AutoMigrate] No pending migrations');
+      return 0;
+    }
+
+    console.log(`[AutoMigrate] Found ${pending.length} pending migrations`);
+
+    // Run each pending migration in order
+    for (const filename of pending) {
+      await runMigration(pool, filename);
+    }
+
+    console.log(`[AutoMigrate] Successfully applied ${pending.length} migrations`);
+    return pending.length;
+
+  } catch (error: any) {
+    console.error('[AutoMigrate] Migration failed:', error.message);
+    // Don't crash the server - log and continue
+    // The specific failing migration will have been rolled back
+    return -1;
+  }
+}
+
+/**
+ * Check migration status without running anything
+ */
+export async function checkMigrationStatus(pool: Pool): Promise<{
+  applied: string[];
+  pending: string[];
+}> {
+  await ensureMigrationsTable(pool);
+
+  const applied = await getAppliedMigrations(pool);
+  const available = getMigrationFiles();
+
+  return {
+    applied: available.filter(f => applied.has(f)),
+    pending: available.filter(f => !applied.has(f)),
+  };
+}
--- a/backend/src/db/migrate.ts
+++ b/backend/src/db/migrate.ts
@@ -372,6 +372,51 @@ async function runMigrations() {
      ON CONFLICT (key) DO NOTHING;
    `);

+    // SEO Pages table
+    await client.query(`
+      CREATE TABLE IF NOT EXISTS seo_pages (
+        id SERIAL PRIMARY KEY,
+        type VARCHAR(50) NOT NULL,
+        slug VARCHAR(255) NOT NULL UNIQUE,
+        page_key VARCHAR(255) NOT NULL,
+        primary_keyword VARCHAR(255),
+        status VARCHAR(50) DEFAULT 'pending_generation',
+        data_source VARCHAR(100),
+        meta_title VARCHAR(255),
+        meta_description TEXT,
+        last_generated_at TIMESTAMPTZ,
+        last_reviewed_at TIMESTAMPTZ,
+        created_at TIMESTAMPTZ DEFAULT NOW(),
+        updated_at TIMESTAMPTZ DEFAULT NOW()
+      );
+      CREATE INDEX IF NOT EXISTS idx_seo_pages_type ON seo_pages(type);
+      CREATE INDEX IF NOT EXISTS idx_seo_pages_status ON seo_pages(status);
+      CREATE INDEX IF NOT EXISTS idx_seo_pages_slug ON seo_pages(slug);
+    `);
+
+    // SEO Page Contents table
+    await client.query(`
+      CREATE TABLE IF NOT EXISTS seo_page_contents (
+        id SERIAL PRIMARY KEY,
+        page_id INTEGER NOT NULL REFERENCES seo_pages(id) ON DELETE CASCADE,
+        version INTEGER DEFAULT 1,
+        blocks JSONB NOT NULL DEFAULT '[]',
+        meta JSONB NOT NULL DEFAULT '{}',
+        meta_title VARCHAR(255),
+        meta_description TEXT,
+        h1 VARCHAR(255),
+        canonical_url TEXT,
+        og_title VARCHAR(255),
+        og_description TEXT,
+        og_image_url TEXT,
+        generated_by VARCHAR(50) DEFAULT 'claude',
+        created_at TIMESTAMPTZ DEFAULT NOW(),
+        updated_at TIMESTAMPTZ DEFAULT NOW(),
+        UNIQUE(page_id, version)
+      );
+      CREATE INDEX IF NOT EXISTS idx_seo_page_contents_page ON seo_page_contents(page_id);
+    `);
+
    await client.query('COMMIT');
    console.log('✅ Migrations completed successfully');
  } catch (error) {
--- a/backend/src/db/run-migrations.ts
+++ b/backend/src/db/run-migrations.ts
@@ -0,0 +1,200 @@
+#!/usr/bin/env npx tsx
+/**
+ * Database Migration Runner
+ *
+ * Runs SQL migrations from backend/migrations/*.sql in order.
+ * Tracks applied migrations in schema_migrations table.
+ *
+ * Usage:
+ *   npx tsx src/db/run-migrations.ts
+ *
+ * Environment:
+ *   DATABASE_URL or CANNAIQ_DB_* variables
+ */
+
+import { Pool } from 'pg';
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import dotenv from 'dotenv';
+
+dotenv.config();
+
+function getConnectionString(): string {
+  if (process.env.DATABASE_URL) {
+    return process.env.DATABASE_URL;
+  }
+  if (process.env.CANNAIQ_DB_URL) {
+    return process.env.CANNAIQ_DB_URL;
+  }
+
+  const host = process.env.CANNAIQ_DB_HOST || 'localhost';
+  const port = process.env.CANNAIQ_DB_PORT || '54320';
+  const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
+  const user = process.env.CANNAIQ_DB_USER || 'dutchie';
+  const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
+
+  return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
+}
+
+interface MigrationFile {
+  filename: string;
+  number: number;
+  path: string;
+}
+
+async function getMigrationFiles(migrationsDir: string): Promise<MigrationFile[]> {
+  const files = await fs.readdir(migrationsDir);
+
+  const migrations: MigrationFile[] = files
+    .filter(f => f.endsWith('.sql'))
+    .map(filename => {
+      // Extract number from filename like "005_api_tokens.sql" or "073_proxy_timezone.sql"
+      const match = filename.match(/^(\d+)_/);
+      if (!match) return null;
+
+      return {
+        filename,
+        number: parseInt(match[1], 10),
+        path: path.join(migrationsDir, filename),
+      };
+    })
+    .filter((m): m is MigrationFile => m !== null)
+    .sort((a, b) => a.number - b.number);
+
+  return migrations;
+}
+
+async function ensureMigrationsTable(pool: Pool): Promise<void> {
+  // Migrate to filename-based tracking (handles duplicate version numbers)
+  // Check if old version-based PK exists
+  const pkCheck = await pool.query(`
+    SELECT constraint_name FROM information_schema.table_constraints
+    WHERE table_name = 'schema_migrations' AND constraint_type = 'PRIMARY KEY'
+  `);
+
+  if (pkCheck.rows.length === 0) {
+    // Table doesn't exist, create with filename as PK
+    await pool.query(`
+      CREATE TABLE IF NOT EXISTS schema_migrations (
+        filename VARCHAR(255) NOT NULL PRIMARY KEY,
+        version VARCHAR(10),
+        name VARCHAR(255),
+        applied_at TIMESTAMPTZ DEFAULT NOW()
+      )
+    `);
+  } else {
+    // Table exists - add filename column if missing
+    await pool.query(`
+      ALTER TABLE schema_migrations ADD COLUMN IF NOT EXISTS filename VARCHAR(255)
+    `);
+    // Populate filename from version+name for existing rows
+    await pool.query(`
+      UPDATE schema_migrations SET filename = version || '_' || name || '.sql'
+      WHERE filename IS NULL
+    `);
+  }
+}
+
+async function getAppliedMigrations(pool: Pool): Promise<Set<string>> {
+  // Try filename first, fall back to version_name combo
+  const result = await pool.query(`
+    SELECT COALESCE(filename, version || '_' || name || '.sql') as filename
+    FROM schema_migrations
+  `);
+  return new Set(result.rows.map(r => r.filename));
+}
+
+async function applyMigration(pool: Pool, migration: MigrationFile): Promise<void> {
+  const sql = await fs.readFile(migration.path, 'utf-8');
+
+  // Extract version and name from filename like "005_api_tokens.sql"
+  const version = String(migration.number).padStart(3, '0');
+  const name = migration.filename.replace(/^\d+_/, '').replace(/\.sql$/, '');
+
+  const client = await pool.connect();
+  try {
+    await client.query('BEGIN');
+
+    // Run the migration SQL
+    await client.query(sql);
+
+    // Record that it was applied - use INSERT with ON CONFLICT for safety
+    await client.query(`
+      INSERT INTO schema_migrations (filename, version, name)
+      VALUES ($1, $2, $3)
+      ON CONFLICT DO NOTHING
+    `, [migration.filename, version, name]);
+
+    await client.query('COMMIT');
+  } catch (error) {
+    await client.query('ROLLBACK');
+    throw error;
+  } finally {
+    client.release();
+  }
+}
+
+async function main() {
+  const pool = new Pool({ connectionString: getConnectionString() });
+
+  // Migrations directory relative to this file
+  const migrationsDir = path.resolve(__dirname, '../../migrations');
+
+  console.log('╔════════════════════════════════════════════════════════════╗');
+  console.log('║           DATABASE MIGRATION RUNNER                        ║');
+  console.log('╚════════════════════════════════════════════════════════════╝');
+  console.log(`Migrations dir: ${migrationsDir}`);
+  console.log('');
+
+  try {
+    // Ensure tracking table exists
+    await ensureMigrationsTable(pool);
+
+    // Get all migration files
+    const allMigrations = await getMigrationFiles(migrationsDir);
+    console.log(`Found ${allMigrations.length} migration files`);
+
+    // Get already-applied migrations
+    const applied = await getAppliedMigrations(pool);
+    console.log(`Already applied: ${applied.size} migrations`);
+    console.log('');
+
+    // Find pending migrations (compare by filename)
+    const pending = allMigrations.filter(m => !applied.has(m.filename));
+
+    if (pending.length === 0) {
+      console.log('✅ No pending migrations. Database is up to date.');
+      await pool.end();
+      return;
+    }
+
+    console.log(`Pending migrations: ${pending.length}`);
+    console.log('─'.repeat(60));
+
+    // Apply each pending migration
+    for (const migration of pending) {
+      process.stdout.write(`  ${migration.filename}... `);
+      try {
+        await applyMigration(pool, migration);
+        console.log('✅');
+      } catch (error: any) {
+        console.log('❌');
+        console.error(`\nError applying ${migration.filename}:`);
+        console.error(error.message);
+        process.exit(1);
+      }
+    }
+
+    console.log('');
+    console.log('═'.repeat(60));
+    console.log(`✅ Applied ${pending.length} migrations successfully`);
+
+  } catch (error: any) {
+    console.error('Migration runner failed:', error.message);
+    process.exit(1);
+  } finally {
+    await pool.end();
+  }
+}
+
+main();
--- a/backend/src/discovery/discovery-crawler.ts
+++ b/backend/src/discovery/discovery-crawler.ts
@@ -3,14 +3,23 @@
 *
 * Main orchestrator for the Dutchie store discovery pipeline.
 *
- * Flow:
- * 1. Discover cities from Dutchie (or use seeded cities)
- * 2. For each city, discover store locations
- * 3. Upsert all data to discovery tables
- * 4. Admin verifies locations manually
- * 5. Verified locations are promoted to canonical dispensaries
+ * AUTOMATED FLOW (as of 2025-01):
+ * 1. Fetch cities dynamically from Dutchie GraphQL (getAllCitiesByState)
+ * 2. For each city, discover store locations via ConsumerDispensaries query
+ * 3. Upsert locations to dutchie_discovery_locations (keyed by platform_location_id)
+ * 4. AUTO-VALIDATE: Check required fields (name, city, state, platform_menu_url, platform_location_id)
+ * 5. AUTO-PROMOTE: Valid locations are upserted to dispensaries table with crawl_enabled=true
+ * 6. All actions logged to dutchie_promotion_log for audit
 *
- * This module does NOT create canonical dispensaries automatically.
+ * Tables involved:
+ * - dutchie_discovery_cities: Known cities for each state
+ * - dutchie_discovery_locations: Raw discovered store data
+ * - dispensaries: Canonical store records (promoted from discovery)
+ * - dutchie_promotion_log: Audit trail for validation/promotion
+ *
+ * Usage:
+ *   npx tsx src/scripts/run-discovery.ts discover:state AZ
+ *   npx tsx src/scripts/run-discovery.ts discover:state CA
 */

 import { Pool } from 'pg';
@@ -24,11 +33,12 @@ import {
  getCitiesToCrawl,
  getCityBySlug,
  seedKnownCities,
-  ARIZONA_CITIES,
 } from './city-discovery';
 import {
  discoverLocationsForCity,
+  getCitiesForState,
 } from './location-discovery';
+import { promoteDiscoveredLocations } from './promotion';

 // ============================================================
 // FULL DISCOVERY
@@ -162,12 +172,161 @@ export async function runFullDiscovery(
    console.log(`Errors: ${totalErrors}`);
  }

+  // Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
+  let newDispensaryIds: number[] = [];
+
+  // Step 4: Auto-validate and promote discovered locations
+  if (!dryRun && totalLocationsUpserted > 0) {
+    console.log('\n[Discovery] Step 4: Auto-promoting discovered locations...');
+    const promotionResult = await promoteDiscoveredLocations(stateCode, false);
+    console.log(`[Discovery] Promotion complete:`);
+    console.log(`  Created: ${promotionResult.created} new dispensaries`);
+    console.log(`  Updated: ${promotionResult.updated} existing dispensaries`);
+    console.log(`  Rejected: ${promotionResult.rejected} (validation failed)`);
+
+    // Per TASK_WORKFLOW_2024-12-10.md: Capture new IDs for task chaining
+    newDispensaryIds = promotionResult.newDispensaryIds;
+    if (newDispensaryIds.length > 0) {
+      console.log(`  New store IDs for crawl: [${newDispensaryIds.join(', ')}]`);
+    }
+
+    if (promotionResult.rejectedRecords.length > 0) {
+      console.log(`  Rejection reasons:`);
+      promotionResult.rejectedRecords.slice(0, 5).forEach(r => {
+        console.log(`    - ${r.name}: ${r.errors.join(', ')}`);
+      });
+      if (promotionResult.rejectedRecords.length > 5) {
+        console.log(`    ... and ${promotionResult.rejectedRecords.length - 5} more`);
+      }
+    }
+  }
+
+  // Step 5: Detect dropped stores (in DB but not in discovery results)
+  if (!dryRun) {
+    console.log('\n[Discovery] Step 5: Detecting dropped stores...');
+    const droppedResult = await detectDroppedStores(pool, stateCode);
+    if (droppedResult.droppedCount > 0) {
+      console.log(`[Discovery] Found ${droppedResult.droppedCount} dropped stores:`);
+      droppedResult.droppedStores.slice(0, 10).forEach(s => {
+        console.log(`    - ${s.name} (${s.city}, ${s.state}) - last seen: ${s.lastSeenAt}`);
+      });
+      if (droppedResult.droppedCount > 10) {
+        console.log(`    ... and ${droppedResult.droppedCount - 10} more`);
+      }
+    } else {
+      console.log(`[Discovery] No dropped stores detected`);
+    }
+  }
+
  return {
    cities: cityResult,
    locations: locationResults,
    totalLocationsFound,
    totalLocationsUpserted,
    durationMs,
+    // Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
+    newDispensaryIds,
+  };
+}
+
+// ============================================================
+// DROPPED STORE DETECTION
+// ============================================================
+
+export interface DroppedStoreResult {
+  droppedCount: number;
+  droppedStores: Array<{
+    id: number;
+    name: string;
+    city: string;
+    state: string;
+    platformDispensaryId: string;
+    lastSeenAt: string;
+  }>;
+}
+
+/**
+ * Detect stores that exist in dispensaries but were not found in discovery.
+ * Marks them as status='dropped' for manual review.
+ *
+ * A store is considered "dropped" if:
+ * 1. It has a platform_dispensary_id (was verified via Dutchie)
+ * 2. It was NOT seen in the latest discovery crawl (last_seen_at in discovery < 24h ago)
+ * 3. It's currently marked as 'open' status
+ */
+export async function detectDroppedStores(
+  pool: Pool,
+  stateCode?: string
+): Promise<DroppedStoreResult> {
+  // Find dispensaries that:
+  // 1. Have platform_dispensary_id (verified Dutchie stores)
+  // 2. Are currently 'open' status
+  // 3. Have a linked discovery record that wasn't seen in the last discovery run
+  //    (last_seen_at in dutchie_discovery_locations is older than 24 hours)
+  const params: any[] = [];
+  let stateFilter = '';
+
+  if (stateCode) {
+    stateFilter = ` AND d.state = $1`;
+    params.push(stateCode);
+  }
+
+  const query = `
+    WITH recently_seen AS (
+      SELECT DISTINCT platform_location_id
+      FROM dutchie_discovery_locations
+      WHERE last_seen_at > NOW() - INTERVAL '24 hours'
+        AND active = true
+    )
+    SELECT
+      d.id,
+      d.name,
+      d.city,
+      d.state,
+      d.platform_dispensary_id,
+      d.updated_at as last_seen_at
+    FROM dispensaries d
+    WHERE d.platform_dispensary_id IS NOT NULL
+      AND d.platform = 'dutchie'
+      AND (d.status = 'open' OR d.status IS NULL)
+      AND d.crawl_enabled = true
+      AND d.platform_dispensary_id NOT IN (SELECT platform_location_id FROM recently_seen)
+      ${stateFilter}
+    ORDER BY d.name
+  `;
+
+  const result = await pool.query(query, params);
+  const droppedStores = result.rows;
+
+  // Mark these stores as 'dropped' status
+  if (droppedStores.length > 0) {
+    const ids = droppedStores.map(s => s.id);
+    await pool.query(`
+      UPDATE dispensaries
+      SET status = 'dropped', updated_at = NOW()
+      WHERE id = ANY($1::int[])
+    `, [ids]);
+
+    // Log to promotion log for audit
+    for (const store of droppedStores) {
+      await pool.query(`
+        INSERT INTO dutchie_promotion_log
+        (dispensary_id, action, state_code, store_name, triggered_by)
+        VALUES ($1, 'dropped', $2, $3, 'discovery_detection')
+      `, [store.id, store.state, store.name]);
+    }
+  }
+
+  return {
+    droppedCount: droppedStores.length,
+    droppedStores: droppedStores.map(s => ({
+      id: s.id,
+      name: s.name,
+      city: s.city,
+      state: s.state,
+      platformDispensaryId: s.platform_dispensary_id,
+      lastSeenAt: s.last_seen_at,
+    })),
  };
 }

@@ -235,11 +394,19 @@ export async function discoverState(

  console.log(`[Discovery] Discovering state: ${stateCode}`);

-  // Seed known cities for this state
-  if (stateCode === 'AZ') {
-    console.log('[Discovery] Seeding Arizona cities...');
-    const seeded = await seedKnownCities(pool, ARIZONA_CITIES);
-    console.log(`[Discovery] Seeded ${seeded.created} new cities, ${seeded.updated} updated`);
+  // Dynamically fetch and seed cities for this state
+  console.log(`[Discovery] Fetching cities for ${stateCode} from Dutchie...`);
+  const cityNames = await getCitiesForState(stateCode);
+  if (cityNames.length > 0) {
+    const cities = cityNames.map(name => ({
+      name,
+      slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
+      stateCode,
+    }));
+    const seeded = await seedKnownCities(pool, cities);
+    console.log(`[Discovery] Seeded ${seeded.created} new cities, ${seeded.updated} updated for ${stateCode}`);
+  } else {
+    console.log(`[Discovery] No cities found for ${stateCode}`);
  }

  // Run full discovery for this state
--- a/backend/src/discovery/index.ts
+++ b/backend/src/discovery/index.ts
@@ -13,7 +13,6 @@ export {
  getCitiesToCrawl,
  getCityBySlug,
  seedKnownCities,
-  ARIZONA_CITIES,
 } from './city-discovery';

 // Location Discovery
@@ -33,5 +32,17 @@ export {
  DiscoveryStats,
 } from './discovery-crawler';

+// Promotion
+export {
+  validateForPromotion,
+  validateDiscoveredLocations,
+  promoteDiscoveredLocations,
+  promoteSingleLocation,
+  ValidationResult,
+  ValidationSummary,
+  PromotionResult,
+  PromotionSummary,
+} from './promotion';
+
 // Routes
 export { createDiscoveryRoutes } from './routes';
--- a/backend/src/discovery/location-discovery.ts
+++ b/backend/src/discovery/location-discovery.ts
@@ -134,10 +134,10 @@ export interface StateWithCities {
 }

 /**
- * Fetch all states with their cities from Dutchie's __NEXT_DATA__
+ * Fetch all states with their cities via direct GraphQL query
 *
- * This fetches a city page and extracts the statesWithDispensaries data
- * which contains all states and their cities where Dutchie has dispensaries.
+ * Uses the getAllCitiesByState persisted query which returns all states
+ * and cities where Dutchie has dispensaries.
 */
 export async function fetchStatesWithDispensaries(
  options: { verbose?: boolean } = {}
@@ -147,84 +147,53 @@ export async function fetchStatesWithDispensaries(
  // Initialize proxy if USE_PROXY=true
  await initDiscoveryProxy();

-  console.log('[LocationDiscovery] Fetching statesWithDispensaries from Dutchie...');
+  console.log('[LocationDiscovery] Fetching statesWithDispensaries via GraphQL...');

-  // Fetch any city page to get the __NEXT_DATA__ with statesWithDispensaries
-  // Using a known city that's likely to exist
-  const result = await fetchPage('/dispensaries/az/phoenix', { maxRetries: 3 });
+  try {
+    // Use direct GraphQL query - much cleaner than scraping __NEXT_DATA__
+    const result = await executeGraphQL(
+      'getAllCitiesByState',
+      {}, // No variables needed
+      GRAPHQL_HASHES.GetAllCitiesByState,
+      { maxRetries: 3, retryOn403: true }
+    );

-  if (!result || result.status !== 200) {
-    console.error('[LocationDiscovery] Failed to fetch city page');
-    return [];
-  }
-
-  const nextData = extractNextData(result.html);
-  if (!nextData) {
-    console.error('[LocationDiscovery] No __NEXT_DATA__ found');
-    return [];
-  }
-
-  // Extract statesWithDispensaries from Apollo state
-  const apolloState = nextData.props?.pageProps?.initialApolloState;
-  if (!apolloState) {
-    console.error('[LocationDiscovery] No initialApolloState found');
-    return [];
-  }
-
-  // Find ROOT_QUERY.statesWithDispensaries
-  const rootQuery = apolloState['ROOT_QUERY'];
-  if (!rootQuery) {
-    console.error('[LocationDiscovery] No ROOT_QUERY found');
-    return [];
-  }
-
-  // The statesWithDispensaries is at ROOT_QUERY.statesWithDispensaries
-  const statesRefs = rootQuery.statesWithDispensaries;
-  if (!Array.isArray(statesRefs)) {
-    console.error('[LocationDiscovery] statesWithDispensaries not found or not an array');
-    return [];
-  }
-
-  // Resolve the references to actual state data
-  const states: StateWithCities[] = [];
-  for (const ref of statesRefs) {
-    // ref might be { __ref: "StateWithDispensaries:0" } or direct object
-    let stateData: any;
-
-    if (ref && ref.__ref) {
-      stateData = apolloState[ref.__ref];
-    } else {
-      stateData = ref;
+    const statesData = result?.data?.statesWithDispensaries;
+    if (!Array.isArray(statesData)) {
+      console.error('[LocationDiscovery] statesWithDispensaries not found in response');
+      return [];
    }

-    if (stateData && stateData.name) {
-      // Parse cities JSON array if it's a string
-      let cities = stateData.cities;
-      if (typeof cities === 'string') {
-        try {
-          cities = JSON.parse(cities);
-        } catch {
-          cities = [];
-        }
+    // Map to our StateWithCities format
+    const states: StateWithCities[] = [];
+    for (const state of statesData) {
+      if (state && state.name) {
+        // Filter out null cities
+        const cities = Array.isArray(state.cities)
+          ? state.cities.filter((c: string | null) => c !== null)
+          : [];
+
+        states.push({
+          name: state.name,
+          country: state.country || 'US',
+          cities,
+        });
      }
-
-      states.push({
-        name: stateData.name,
-        country: stateData.country || 'US',
-        cities: Array.isArray(cities) ? cities : [],
-      });
    }
-  }

-  if (verbose) {
-    console.log(`[LocationDiscovery] Found ${states.length} states`);
-    for (const state of states) {
-      console.log(`  ${state.name}: ${state.cities.length} cities`);
+    if (verbose) {
+      console.log(`[LocationDiscovery] Found ${states.length} states`);
+      for (const state of states) {
+        console.log(`  ${state.name}: ${state.cities.length} cities`);
+      }
    }
-  }

-  console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
-  return states;
+    console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
+    return states;
+  } catch (error: any) {
+    console.error(`[LocationDiscovery] Failed to fetch states: ${error.message}`);
+    return [];
+  }
 }

 /**
@@ -751,31 +720,57 @@ async function scrapeLocationCards(

 /**
 * Normalize a raw location response to a consistent format.
+ * Maps Dutchie camelCase fields to our snake_case equivalents.
 */
 function normalizeLocationResponse(raw: any): DutchieLocationResponse {
  const slug = raw.slug || raw.cName || raw.urlSlug || '';
  const id = raw.id || raw._id || raw.dispensaryId || '';

+  // Extract location data - GraphQL response nests address info in .location
+  const loc = raw.location || {};
+
+  // Extract coordinates from geometry.coordinates [longitude, latitude]
+  const coords = loc.geometry?.coordinates || [];
+  const longitude = coords[0] || raw.longitude || raw.lng || loc.longitude || loc.lng;
+  const latitude = coords[1] || raw.latitude || raw.lat || loc.latitude || loc.lat;
+
  return {
    id,
    name: raw.name || raw.dispensaryName || '',
    slug,
-    address: raw.address || raw.fullAddress || '',
-    address1: raw.address1 || raw.addressLine1 || raw.streetAddress || '',
-    address2: raw.address2 || raw.addressLine2 || '',
-    city: raw.city || '',
-    state: raw.state || raw.stateCode || '',
-    zip: raw.zip || raw.zipCode || raw.postalCode || '',
-    country: raw.country || raw.countryCode || 'US',
-    latitude: raw.latitude || raw.lat || raw.location?.latitude,
-    longitude: raw.longitude || raw.lng || raw.location?.longitude,
+    cName: raw.cName || raw.slug || '',
+    address: raw.address || raw.fullAddress || loc.ln1 || '',
+    address1: raw.address1 || raw.addressLine1 || raw.streetAddress || loc.ln1 || '',
+    address2: raw.address2 || raw.addressLine2 || loc.ln2 || '',
+    city: raw.city || loc.city || '',
+    state: raw.state || raw.stateCode || loc.state || '',
+    zip: raw.zip || raw.zipCode || raw.postalCode || loc.zipcode || loc.zip || '',
+    country: raw.country || raw.countryCode || loc.country || 'United States',
+    latitude,
+    longitude,
    timezone: raw.timezone || raw.tz || '',
    menuUrl: raw.menuUrl || (slug ? `https://dutchie.com/dispensary/${slug}` : ''),
    retailType: raw.retailType || raw.type || '',
+    // Service offerings
    offerPickup: raw.offerPickup ?? raw.storeSettings?.offerPickup ?? true,
    offerDelivery: raw.offerDelivery ?? raw.storeSettings?.offerDelivery ?? false,
-    isRecreational: raw.isRecreational ?? raw.retailType?.includes('Recreational') ?? true,
-    isMedical: raw.isMedical ?? raw.retailType?.includes('Medical') ?? true,
+    offerCurbsidePickup: raw.offerCurbsidePickup ?? false,
+    // License types
+    isRecreational: raw.isRecreational ?? raw.recDispensary ?? raw.retailType?.includes('Recreational') ?? true,
+    isMedical: raw.isMedical ?? raw.medicalDispensary ?? raw.retailType?.includes('Medical') ?? true,
+    // Contact info
+    phone: raw.phone || '',
+    email: raw.email || '',
+    website: raw.embedBackUrl || '',
+    // Branding
+    description: raw.description || '',
+    logoImage: raw.logoImage || '',
+    bannerImage: raw.bannerImage || '',
+    // Chain/enterprise info
+    chainSlug: raw.chain || '',
+    enterpriseId: raw.retailer?.enterpriseId || '',
+    // Status
+    status: raw.status || '',
    // Preserve raw data
    ...raw,
  };
@@ -826,15 +821,27 @@ export async function upsertLocation(
      offers_pickup,
      is_recreational,
      is_medical,
+      phone,
+      website,
+      email,
+      description,
+      logo_image,
+      banner_image,
+      chain_slug,
+      enterprise_id,
+      c_name,
+      country,
+      store_status,
      last_seen_at,
      updated_at
-    ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, NOW(), NOW())
+    ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, NOW(), NOW())
    ON CONFLICT (platform, platform_location_id)
    DO UPDATE SET
      name = EXCLUDED.name,
      platform_menu_url = EXCLUDED.platform_menu_url,
      raw_address = COALESCE(EXCLUDED.raw_address, dutchie_discovery_locations.raw_address),
      address_line1 = COALESCE(EXCLUDED.address_line1, dutchie_discovery_locations.address_line1),
+      address_line2 = COALESCE(EXCLUDED.address_line2, dutchie_discovery_locations.address_line2),
      city = COALESCE(EXCLUDED.city, dutchie_discovery_locations.city),
      state_code = COALESCE(EXCLUDED.state_code, dutchie_discovery_locations.state_code),
      postal_code = COALESCE(EXCLUDED.postal_code, dutchie_discovery_locations.postal_code),
@@ -846,6 +853,17 @@ export async function upsertLocation(
      offers_pickup = COALESCE(EXCLUDED.offers_pickup, dutchie_discovery_locations.offers_pickup),
      is_recreational = COALESCE(EXCLUDED.is_recreational, dutchie_discovery_locations.is_recreational),
      is_medical = COALESCE(EXCLUDED.is_medical, dutchie_discovery_locations.is_medical),
+      phone = COALESCE(EXCLUDED.phone, dutchie_discovery_locations.phone),
+      website = COALESCE(EXCLUDED.website, dutchie_discovery_locations.website),
+      email = COALESCE(EXCLUDED.email, dutchie_discovery_locations.email),
+      description = COALESCE(EXCLUDED.description, dutchie_discovery_locations.description),
+      logo_image = COALESCE(EXCLUDED.logo_image, dutchie_discovery_locations.logo_image),
+      banner_image = COALESCE(EXCLUDED.banner_image, dutchie_discovery_locations.banner_image),
+      chain_slug = COALESCE(EXCLUDED.chain_slug, dutchie_discovery_locations.chain_slug),
+      enterprise_id = COALESCE(EXCLUDED.enterprise_id, dutchie_discovery_locations.enterprise_id),
+      c_name = COALESCE(EXCLUDED.c_name, dutchie_discovery_locations.c_name),
+      country = COALESCE(EXCLUDED.country, dutchie_discovery_locations.country),
+      store_status = COALESCE(EXCLUDED.store_status, dutchie_discovery_locations.store_status),
      last_seen_at = NOW(),
      updated_at = NOW()
    RETURNING id, (xmax = 0) as is_new`,
@@ -861,7 +879,7 @@ export async function upsertLocation(
      location.city || null,
      location.state || null,
      location.zip || null,
-      location.country || 'US',
+      location.country || 'United States',
      location.latitude || null,
      location.longitude || null,
      location.timezone || null,
@@ -871,6 +889,17 @@ export async function upsertLocation(
      location.offerPickup ?? null,
      location.isRecreational ?? null,
      location.isMedical ?? null,
+      location.phone || null,
+      location.website || null,
+      location.email || null,
+      location.description || null,
+      location.logoImage || null,
+      location.bannerImage || null,
+      location.chainSlug || null,
+      location.enterpriseId || null,
+      location.cName || null,
+      location.country || 'United States',
+      location.status || null,
    ]
  );

--- a/backend/src/discovery/promotion.ts
+++ b/backend/src/discovery/promotion.ts
@@ -0,0 +1,587 @@
+/**
+ * Discovery Promotion Service
+ *
+ * Handles the promotion of discovery locations to dispensaries:
+ * 1. Discovery → Raw data in dutchie_discovery_locations (status='discovered')
+ * 2. Validation → Check required fields, reject incomplete records
+ * 3. Promotion → Idempotent upsert to dispensaries, link back via dispensary_id
+ */
+
+import { pool } from '../db/pool';
+import { DiscoveryLocationRow, DiscoveryStatus } from './types';
+
+// ============================================================
+// VALIDATION
+// ============================================================
+
+export interface ValidationResult {
+  valid: boolean;
+  errors: string[];
+}
+
+export interface ValidationSummary {
+  totalChecked: number;
+  validCount: number;
+  invalidCount: number;
+  invalidRecords: Array<{
+    id: number;
+    name: string;
+    errors: string[];
+  }>;
+}
+
+/**
+ * Validate a single discovery location has all required fields for promotion
+ */
+export function validateForPromotion(loc: DiscoveryLocationRow): ValidationResult {
+  const errors: string[] = [];
+
+  // Required fields
+  if (!loc.platform_location_id) {
+    errors.push('Missing platform_location_id');
+  }
+  if (!loc.name || loc.name.trim() === '') {
+    errors.push('Missing name');
+  }
+  if (!loc.city || loc.city.trim() === '') {
+    errors.push('Missing city');
+  }
+  if (!loc.state_code || loc.state_code.trim() === '') {
+    errors.push('Missing state_code');
+  }
+  if (!loc.platform_menu_url) {
+    errors.push('Missing platform_menu_url');
+  }
+
+  return {
+    valid: errors.length === 0,
+    errors,
+  };
+}
+
+/**
+ * Validate all discovered locations and return summary
+ */
+export async function validateDiscoveredLocations(
+  stateCode?: string
+): Promise<ValidationSummary> {
+  let query = `
+    SELECT * FROM dutchie_discovery_locations
+    WHERE status = 'discovered'
+  `;
+  const params: string[] = [];
+
+  if (stateCode) {
+    query += ` AND state_code = $1`;
+    params.push(stateCode);
+  }
+
+  const result = await pool.query(query, params);
+  const locations = result.rows as DiscoveryLocationRow[];
+
+  const invalidRecords: ValidationSummary['invalidRecords'] = [];
+  let validCount = 0;
+
+  for (const loc of locations) {
+    const validation = validateForPromotion(loc);
+    if (validation.valid) {
+      validCount++;
+    } else {
+      invalidRecords.push({
+        id: loc.id,
+        name: loc.name,
+        errors: validation.errors,
+      });
+    }
+  }
+
+  return {
+    totalChecked: locations.length,
+    validCount,
+    invalidCount: invalidRecords.length,
+    invalidRecords,
+  };
+}
+
+// ============================================================
+// PROMOTION
+// ============================================================
+
+export interface PromotionResult {
+  discoveryId: number;
+  dispensaryId: number;
+  action: 'created' | 'updated' | 'skipped';
+  name: string;
+}
+
+export interface PromotionSummary {
+  totalProcessed: number;
+  created: number;
+  updated: number;
+  skipped: number;
+  rejected: number;
+  results: PromotionResult[];
+  rejectedRecords: Array<{
+    id: number;
+    name: string;
+    errors: string[];
+  }>;
+  durationMs: number;
+  // Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
+  newDispensaryIds: number[];
+}
+
+/**
+ * Generate a URL-safe slug from name and city
+ */
+function generateSlug(name: string, city: string, state: string): string {
+  const base = `${name}-${city}-${state}`
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-|-$/g, '')
+    .substring(0, 100);
+  return base;
+}
+
+/**
+ * Log a promotion action to dutchie_promotion_log
+ */
+async function logPromotionAction(
+  action: string,
+  discoveryId: number | null,
+  dispensaryId: number | null,
+  stateCode: string | null,
+  storeName: string | null,
+  validationErrors: string[] | null = null,
+  fieldChanges: Record<string, any> | null = null,
+  triggeredBy: string = 'auto'
+): Promise<void> {
+  await pool.query(`
+    INSERT INTO dutchie_promotion_log
+    (discovery_id, dispensary_id, action, state_code, store_name, validation_errors, field_changes, triggered_by)
+    VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
+  `, [
+    discoveryId,
+    dispensaryId,
+    action,
+    stateCode,
+    storeName,
+    validationErrors,
+    fieldChanges ? JSON.stringify(fieldChanges) : null,
+    triggeredBy,
+  ]);
+}
+
+/**
+ * Create a status alert for the dashboard
+ */
+export async function createStatusAlert(
+  dispensaryId: number,
+  profileId: number | null,
+  alertType: string,
+  severity: 'info' | 'warning' | 'error' | 'critical',
+  message: string,
+  previousStatus?: string | null,
+  newStatus?: string | null,
+  metadata?: Record<string, any>
+): Promise<number> {
+  const result = await pool.query(`
+    INSERT INTO crawler_status_alerts
+    (dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata)
+    VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
+    RETURNING id
+  `, [
+    dispensaryId,
+    profileId,
+    alertType,
+    severity,
+    message,
+    previousStatus || null,
+    newStatus || null,
+    metadata ? JSON.stringify(metadata) : null,
+  ]);
+  return result.rows[0].id;
+}
+
+/**
+ * Create or update crawler profile for a dispensary with initial sandbox status
+ */
+async function ensureCrawlerProfile(
+  dispensaryId: number,
+  dispensaryName: string,
+  platformDispensaryId: string
+): Promise<{ profileId: number; created: boolean }> {
+  // Check if profile already exists
+  const existingResult = await pool.query(`
+    SELECT id FROM dispensary_crawler_profiles
+    WHERE dispensary_id = $1 AND enabled = true
+    LIMIT 1
+  `, [dispensaryId]);
+
+  if (existingResult.rows.length > 0) {
+    return { profileId: existingResult.rows[0].id, created: false };
+  }
+
+  // Create new profile with sandbox status
+  const profileKey = dispensaryName
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-|-$/g, '')
+    .substring(0, 50);
+
+  const insertResult = await pool.query(`
+    INSERT INTO dispensary_crawler_profiles (
+      dispensary_id,
+      profile_name,
+      profile_key,
+      crawler_type,
+      status,
+      status_reason,
+      status_changed_at,
+      config,
+      enabled,
+      consecutive_successes,
+      consecutive_failures,
+      created_at,
+      updated_at
+    ) VALUES (
+      $1, $2, $3, 'dutchie', 'sandbox', 'Newly promoted from discovery', CURRENT_TIMESTAMP,
+      $4::jsonb, true, 0, 0, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
+    )
+    RETURNING id
+  `, [
+    dispensaryId,
+    dispensaryName,
+    profileKey,
+    JSON.stringify({
+      platformDispensaryId,
+      useBothModes: true,
+      downloadImages: true,
+      trackStock: true,
+    }),
+  ]);
+
+  const profileId = insertResult.rows[0].id;
+
+  // Create status alert for new sandbox store
+  await createStatusAlert(
+    dispensaryId,
+    profileId,
+    'promoted',
+    'info',
+    `${dispensaryName} promoted to sandbox - awaiting first successful crawl`,
+    null,
+    'sandbox',
+    { source: 'discovery_promotion', platformDispensaryId }
+  );
+
+  return { profileId, created: true };
+}
+
+/**
+ * Promote a single discovery location to dispensaries table
+ * Idempotent: uses ON CONFLICT on platform_dispensary_id
+ */
+async function promoteLocation(
+  loc: DiscoveryLocationRow
+): Promise<PromotionResult> {
+  const slug = loc.platform_slug || generateSlug(loc.name, loc.city || '', loc.state_code || '');
+
+  // Upsert into dispensaries
+  // ON CONFLICT by platform_dispensary_id ensures idempotency
+  const upsertResult = await pool.query(`
+    INSERT INTO dispensaries (
+      platform,
+      name,
+      slug,
+      city,
+      state,
+      address1,
+      address2,
+      zipcode,
+      postal_code,
+      phone,
+      website,
+      email,
+      latitude,
+      longitude,
+      timezone,
+      platform_dispensary_id,
+      menu_url,
+      menu_type,
+      description,
+      logo_image,
+      banner_image,
+      offer_pickup,
+      offer_delivery,
+      is_medical,
+      is_recreational,
+      chain_slug,
+      enterprise_id,
+      c_name,
+      country,
+      status,
+      crawl_enabled,
+      dutchie_verified,
+      dutchie_verified_at,
+      dutchie_discovery_id,
+      created_at,
+      updated_at
+    ) VALUES (
+      $1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
+      $11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
+      $21, $22, $23, $24, $25, $26, $27, $28, $29, $30,
+      $31, $32, $33, $34, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
+    )
+    ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
+    DO UPDATE SET
+      name = EXCLUDED.name,
+      city = EXCLUDED.city,
+      state = EXCLUDED.state,
+      address1 = EXCLUDED.address1,
+      address2 = EXCLUDED.address2,
+      zipcode = EXCLUDED.zipcode,
+      postal_code = EXCLUDED.postal_code,
+      phone = EXCLUDED.phone,
+      website = EXCLUDED.website,
+      email = EXCLUDED.email,
+      latitude = EXCLUDED.latitude,
+      longitude = EXCLUDED.longitude,
+      timezone = EXCLUDED.timezone,
+      menu_url = EXCLUDED.menu_url,
+      description = EXCLUDED.description,
+      logo_image = EXCLUDED.logo_image,
+      banner_image = EXCLUDED.banner_image,
+      offer_pickup = EXCLUDED.offer_pickup,
+      offer_delivery = EXCLUDED.offer_delivery,
+      is_medical = EXCLUDED.is_medical,
+      is_recreational = EXCLUDED.is_recreational,
+      chain_slug = EXCLUDED.chain_slug,
+      enterprise_id = EXCLUDED.enterprise_id,
+      c_name = EXCLUDED.c_name,
+      country = EXCLUDED.country,
+      status = EXCLUDED.status,
+      dutchie_discovery_id = EXCLUDED.dutchie_discovery_id,
+      updated_at = CURRENT_TIMESTAMP
+    RETURNING id, (xmax = 0) AS inserted
+  `, [
+    loc.platform || 'dutchie',                    // $1 platform
+    loc.name,                                      // $2 name
+    slug,                                          // $3 slug
+    loc.city,                                      // $4 city
+    loc.state_code,                                // $5 state
+    loc.address_line1,                             // $6 address1
+    loc.address_line2,                             // $7 address2
+    loc.postal_code,                               // $8 zipcode
+    loc.postal_code,                               // $9 postal_code
+    loc.phone,                                     // $10 phone
+    loc.website,                                   // $11 website
+    loc.email,                                     // $12 email
+    loc.latitude,                                  // $13 latitude
+    loc.longitude,                                 // $14 longitude
+    loc.timezone,                                  // $15 timezone
+    loc.platform_location_id,                      // $16 platform_dispensary_id
+    loc.platform_menu_url,                         // $17 menu_url
+    'dutchie',                                     // $18 menu_type
+    loc.description,                               // $19 description
+    loc.logo_image,                                // $20 logo_image
+    loc.banner_image,                              // $21 banner_image
+    loc.offers_pickup ?? true,                     // $22 offer_pickup
+    loc.offers_delivery ?? false,                  // $23 offer_delivery
+    loc.is_medical ?? false,                       // $24 is_medical
+    loc.is_recreational ?? true,                   // $25 is_recreational
+    loc.chain_slug,                                // $26 chain_slug
+    loc.enterprise_id,                             // $27 enterprise_id
+    loc.c_name,                                    // $28 c_name
+    loc.country || 'United States',                // $29 country
+    loc.store_status || 'open',                    // $30 status
+    true,                                          // $31 crawl_enabled
+    true,                                          // $32 dutchie_verified
+    new Date(),                                    // $33 dutchie_verified_at
+    loc.id,                                        // $34 dutchie_discovery_id
+  ]);
+
+  const dispensaryId = upsertResult.rows[0].id;
+  const wasInserted = upsertResult.rows[0].inserted;
+
+  // Link discovery location back to dispensary and update status
+  await pool.query(`
+    UPDATE dutchie_discovery_locations
+    SET
+      dispensary_id = $1,
+      status = 'verified',
+      verified_at = CURRENT_TIMESTAMP,
+      verified_by = 'auto-promotion'
+    WHERE id = $2
+  `, [dispensaryId, loc.id]);
+
+  // Create crawler profile with sandbox status for new dispensaries
+  if (wasInserted && loc.platform_location_id) {
+    await ensureCrawlerProfile(dispensaryId, loc.name, loc.platform_location_id);
+  }
+
+  const action = wasInserted ? 'promoted_create' : 'promoted_update';
+
+  // Log the promotion
+  await logPromotionAction(
+    action,
+    loc.id,
+    dispensaryId,
+    loc.state_code,
+    loc.name,
+    null,
+    { slug, city: loc.city, platform_location_id: loc.platform_location_id }
+  );
+
+  return {
+    discoveryId: loc.id,
+    dispensaryId,
+    action: wasInserted ? 'created' : 'updated',
+    name: loc.name,
+  };
+}
+
+/**
+ * Promote all valid discovered locations to dispensaries
+ *
+ * @param stateCode Optional filter by state (e.g., 'CA', 'AZ')
+ * @param dryRun If true, only validate without making changes
+ */
+export async function promoteDiscoveredLocations(
+  stateCode?: string,
+  dryRun = false
+): Promise<PromotionSummary> {
+  const startTime = Date.now();
+
+  let query = `
+    SELECT * FROM dutchie_discovery_locations
+    WHERE status = 'discovered'
+  `;
+  const params: string[] = [];
+
+  if (stateCode) {
+    query += ` AND state_code = $1`;
+    params.push(stateCode);
+  }
+
+  query += ` ORDER BY id`;
+
+  const result = await pool.query(query, params);
+  const locations = result.rows as DiscoveryLocationRow[];
+
+  const results: PromotionResult[] = [];
+  const rejectedRecords: PromotionSummary['rejectedRecords'] = [];
+  // Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
+  const newDispensaryIds: number[] = [];
+  let created = 0;
+  let updated = 0;
+  let skipped = 0;
+  let rejected = 0;
+
+  for (const loc of locations) {
+    // Step 2: Validation
+    const validation = validateForPromotion(loc);
+
+    if (!validation.valid) {
+      rejected++;
+      rejectedRecords.push({
+        id: loc.id,
+        name: loc.name,
+        errors: validation.errors,
+      });
+
+      // Mark as rejected if not dry run
+      if (!dryRun) {
+        await pool.query(`
+          UPDATE dutchie_discovery_locations
+          SET status = 'rejected', notes = $1
+          WHERE id = $2
+        `, [validation.errors.join('; '), loc.id]);
+
+        // Log the rejection
+        await logPromotionAction(
+          'rejected',
+          loc.id,
+          null,
+          loc.state_code,
+          loc.name,
+          validation.errors
+        );
+      }
+      continue;
+    }
+
+    // Step 3: Promotion (skip if dry run)
+    if (dryRun) {
+      skipped++;
+      results.push({
+        discoveryId: loc.id,
+        dispensaryId: 0,
+        action: 'skipped',
+        name: loc.name,
+      });
+      continue;
+    }
+
+    try {
+      const promotionResult = await promoteLocation(loc);
+      results.push(promotionResult);
+
+      if (promotionResult.action === 'created') {
+        created++;
+        // Per TASK_WORKFLOW_2024-12-10.md: Track new IDs for task chaining
+        newDispensaryIds.push(promotionResult.dispensaryId);
+      } else {
+        updated++;
+      }
+    } catch (error: any) {
+      console.error(`Failed to promote location ${loc.id} (${loc.name}):`, error.message);
+      rejected++;
+      rejectedRecords.push({
+        id: loc.id,
+        name: loc.name,
+        errors: [`Promotion error: ${error.message}`],
+      });
+    }
+  }
+
+  return {
+    totalProcessed: locations.length,
+    created,
+    updated,
+    skipped,
+    rejected,
+    results,
+    rejectedRecords,
+    durationMs: Date.now() - startTime,
+    // Per TASK_WORKFLOW_2024-12-10.md: Return new IDs for task chaining
+    newDispensaryIds,
+  };
+}
+
+/**
+ * Promote a single discovery location by ID
+ */
+export async function promoteSingleLocation(
+  discoveryId: number
+): Promise<PromotionResult> {
+  const result = await pool.query(
+    `SELECT * FROM dutchie_discovery_locations WHERE id = $1`,
+    [discoveryId]
+  );
+
+  if (result.rows.length === 0) {
+    throw new Error(`Discovery location ${discoveryId} not found`);
+  }
+
+  const loc = result.rows[0] as DiscoveryLocationRow;
+
+  // Validate
+  const validation = validateForPromotion(loc);
+  if (!validation.valid) {
+    throw new Error(`Validation failed: ${validation.errors.join(', ')}`);
+  }
+
+  // Promote
+  return promoteLocation(loc);
+}
--- a/backend/src/discovery/routes.ts
+++ b/backend/src/discovery/routes.ts
@@ -18,8 +18,8 @@ import {
  getCitiesToCrawl,
  getCityBySlug,
  seedKnownCities,
-  ARIZONA_CITIES,
 } from './city-discovery';
+import { getCitiesForState } from './location-discovery';
 import {
  DiscoveryLocation,
  DiscoveryCity,
@@ -27,6 +27,11 @@ import {
  mapLocationRowToLocation,
  mapCityRowToCity,
 } from './types';
+import {
+  validateDiscoveredLocations,
+  promoteDiscoveredLocations,
+  promoteSingleLocation,
+} from './promotion';

 export function createDiscoveryRoutes(pool: Pool): Router {
  const router = Router();
@@ -53,44 +58,44 @@ export function createDiscoveryRoutes(pool: Pool): Router {
        offset = '0',
      } = req.query;

-      let whereClause = 'WHERE platform = $1 AND active = TRUE';
+      let whereClause = 'WHERE dl.platform = $1 AND dl.active = TRUE';
      const params: any[] = [platform];
      let paramIndex = 2;

      if (status) {
-        whereClause += ` AND status = $${paramIndex}`;
+        whereClause += ` AND dl.status = $${paramIndex}`;
        params.push(status);
        paramIndex++;
      }

      if (stateCode) {
-        whereClause += ` AND state_code = $${paramIndex}`;
+        whereClause += ` AND dl.state_code = $${paramIndex}`;
        params.push(stateCode);
        paramIndex++;
      }

      if (countryCode) {
-        whereClause += ` AND country_code = $${paramIndex}`;
+        whereClause += ` AND dl.country_code = $${paramIndex}`;
        params.push(countryCode);
        paramIndex++;
      }

      if (city) {
-        whereClause += ` AND city ILIKE $${paramIndex}`;
+        whereClause += ` AND dl.city ILIKE $${paramIndex}`;
        params.push(`%${city}%`);
        paramIndex++;
      }

      if (search) {
-        whereClause += ` AND (name ILIKE $${paramIndex} OR platform_slug ILIKE $${paramIndex})`;
+        whereClause += ` AND (dl.name ILIKE $${paramIndex} OR dl.platform_slug ILIKE $${paramIndex})`;
        params.push(`%${search}%`);
        paramIndex++;
      }

      if (hasDispensary === 'true') {
-        whereClause += ' AND dispensary_id IS NOT NULL';
+        whereClause += ' AND dl.dispensary_id IS NOT NULL';
      } else if (hasDispensary === 'false') {
-        whereClause += ' AND dispensary_id IS NULL';
+        whereClause += ' AND dl.dispensary_id IS NULL';
      }

      params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
@@ -705,15 +710,22 @@ export function createDiscoveryRoutes(pool: Pool): Router {
        return res.status(400).json({ error: 'stateCode is required' });
      }

-      let cities: any[] = [];
-      if (stateCode === 'AZ') {
-        cities = ARIZONA_CITIES;
-      } else {
+      // Dynamically fetch cities from Dutchie for any state
+      const cityNames = await getCitiesForState(stateCode as string);
+
+      if (cityNames.length === 0) {
        return res.status(400).json({
-          error: `No predefined cities for state: ${stateCode}. Add cities to city-discovery.ts`,
+          error: `No cities found for state: ${stateCode}`,
        });
      }

+      // Convert to seed format
+      const cities = cityNames.map(name => ({
+        name,
+        slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
+        stateCode: stateCode as string,
+      }));
+
      const result = await seedKnownCities(pool, cities);

      res.json({
@@ -834,6 +846,136 @@ export function createDiscoveryRoutes(pool: Pool): Router {
    }
  });

+  // ============================================================
+  // PROMOTION ENDPOINTS
+  // ============================================================
+
+  /**
+   * GET /api/discovery/admin/validate
+   * Validate discovered locations before promotion
+   */
+  router.get('/admin/validate', async (req: Request, res: Response) => {
+    try {
+      const { stateCode } = req.query;
+      const summary = await validateDiscoveredLocations(stateCode as string | undefined);
+
+      res.json({
+        success: true,
+        ...summary,
+      });
+    } catch (error: any) {
+      res.status(500).json({ error: error.message });
+    }
+  });
+
+  /**
+   * POST /api/discovery/admin/promote
+   * Promote all valid discovered locations to dispensaries (idempotent)
+   *
+   * Query params:
+   * - stateCode: Filter by state (e.g., 'CA', 'AZ')
+   * - dryRun: If true, only validate without making changes
+   */
+  router.post('/admin/promote', async (req: Request, res: Response) => {
+    try {
+      const { stateCode, dryRun = false } = req.body;
+
+      console.log(`[Discovery API] Starting promotion for ${stateCode || 'all states'} (dryRun=${dryRun})`);
+      const summary = await promoteDiscoveredLocations(stateCode, dryRun);
+
+      res.json({
+        success: true,
+        ...summary,
+      });
+    } catch (error: any) {
+      res.status(500).json({ error: error.message });
+    }
+  });
+
+  /**
+   * POST /api/discovery/admin/promote/:id
+   * Promote a single discovery location by ID
+   */
+  router.post('/admin/promote/:id', async (req: Request, res: Response) => {
+    try {
+      const { id } = req.params;
+
+      console.log(`[Discovery API] Promoting single location ${id}`);
+      const result = await promoteSingleLocation(parseInt(id, 10));
+
+      res.json({
+        success: true,
+        ...result,
+      });
+    } catch (error: any) {
+      res.status(500).json({ error: error.message });
+    }
+  });
+
+  // ============================================================
+  // PROMOTION LOG
+  // ============================================================
+
+  /**
+   * GET /api/discovery/promotion-log
+   * Get promotion audit log
+   */
+  router.get('/promotion-log', async (req: Request, res: Response) => {
+    try {
+      const { state, dispensary_id, limit = '100' } = req.query;
+
+      let whereClause = 'WHERE 1=1';
+      const params: any[] = [];
+      let paramIndex = 1;
+
+      if (state) {
+        whereClause += ` AND pl.state_code = $${paramIndex}`;
+        params.push(state);
+        paramIndex++;
+      }
+
+      if (dispensary_id) {
+        whereClause += ` AND pl.dispensary_id = $${paramIndex}`;
+        params.push(parseInt(dispensary_id as string, 10));
+        paramIndex++;
+      }
+
+      params.push(parseInt(limit as string, 10));
+
+      const { rows } = await pool.query(`
+        SELECT
+          pl.*,
+          dl.name as discovery_name,
+          d.name as dispensary_name
+        FROM dutchie_promotion_log pl
+        LEFT JOIN dutchie_discovery_locations dl ON pl.discovery_id = dl.id
+        LEFT JOIN dispensaries d ON pl.dispensary_id = d.id
+        ${whereClause}
+        ORDER BY pl.created_at DESC
+        LIMIT $${paramIndex}
+      `, params);
+
+      res.json({
+        logs: rows.map((r: any) => ({
+          id: r.id,
+          discoveryId: r.discovery_id,
+          dispensaryId: r.dispensary_id,
+          action: r.action,
+          stateCode: r.state_code,
+          storeName: r.store_name,
+          validationErrors: r.validation_errors,
+          fieldChanges: r.field_changes,
+          triggeredBy: r.triggered_by,
+          createdAt: r.created_at,
+          discoveryName: r.discovery_name,
+          dispensaryName: r.dispensary_name,
+        })),
+      });
+    } catch (error: any) {
+      res.status(500).json({ error: error.message });
+    }
+  });
+
  return router;
 }

--- a/backend/src/discovery/types.ts
+++ b/backend/src/discovery/types.ts
@@ -60,6 +60,7 @@ export interface DiscoveryLocation {
  stateCode: string | null;
  postalCode: string | null;
  countryCode: string | null;
+  country: string | null;
  latitude: number | null;
  longitude: number | null;
  timezone: string | null;
@@ -72,6 +73,18 @@ export interface DiscoveryLocation {
  offersPickup: boolean | null;
  isRecreational: boolean | null;
  isMedical: boolean | null;
+  // New Dutchie fields
+  phone: string | null;
+  website: string | null;
+  email: string | null;
+  description: string | null;
+  logoImage: string | null;
+  bannerImage: string | null;
+  chainSlug: string | null;
+  enterpriseId: string | null;
+  cName: string | null;
+  storeStatus: string | null;
+  // Timestamps
  firstSeenAt: Date;
  lastSeenAt: Date;
  lastCheckedAt: Date | null;
@@ -96,6 +109,7 @@ export interface DiscoveryLocationRow {
  state_code: string | null;
  postal_code: string | null;
  country_code: string | null;
+  country: string | null;
  latitude: number | null;
  longitude: number | null;
  timezone: string | null;
@@ -108,6 +122,18 @@ export interface DiscoveryLocationRow {
  offers_pickup: boolean | null;
  is_recreational: boolean | null;
  is_medical: boolean | null;
+  // New Dutchie fields (snake_case for DB row)
+  phone: string | null;
+  website: string | null;
+  email: string | null;
+  description: string | null;
+  logo_image: string | null;
+  banner_image: string | null;
+  chain_slug: string | null;
+  enterprise_id: string | null;
+  c_name: string | null;
+  store_status: string | null;
+  // Timestamps
  first_seen_at: Date;
  last_seen_at: Date;
  last_checked_at: Date | null;
@@ -185,6 +211,8 @@ export interface FullDiscoveryResult {
  totalLocationsFound: number;
  totalLocationsUpserted: number;
  durationMs: number;
+  // Per TASK_WORKFLOW_2024-12-10.md: Track new dispensary IDs for task chaining
+  newDispensaryIds?: number[];
 }

 // ============================================================
@@ -245,6 +273,7 @@ export function mapLocationRowToLocation(row: DiscoveryLocationRow): DiscoveryLo
    stateCode: row.state_code,
    postalCode: row.postal_code,
    countryCode: row.country_code,
+    country: row.country,
    latitude: row.latitude,
    longitude: row.longitude,
    timezone: row.timezone,
@@ -257,6 +286,18 @@ export function mapLocationRowToLocation(row: DiscoveryLocationRow): DiscoveryLo
    offersPickup: row.offers_pickup,
    isRecreational: row.is_recreational,
    isMedical: row.is_medical,
+    // New Dutchie fields
+    phone: row.phone,
+    website: row.website,
+    email: row.email,
+    description: row.description,
+    logoImage: row.logo_image,
+    bannerImage: row.banner_image,
+    chainSlug: row.chain_slug,
+    enterpriseId: row.enterprise_id,
+    cName: row.c_name,
+    storeStatus: row.store_status,
+    // Timestamps
    firstSeenAt: row.first_seen_at,
    lastSeenAt: row.last_seen_at,
    lastCheckedAt: row.last_checked_at,
--- a/backend/src/hydration/canonical-upsert.ts
+++ b/backend/src/hydration/canonical-upsert.ts
@@ -16,6 +16,12 @@ import {
  NormalizedBrand,
  NormalizationResult,
 } from './types';
+import {
+  downloadProductImage,
+  ProductImageContext,
+  isImageStorageReady,
+  LocalImageSizes,
+} from '../utils/image-storage';

 const BATCH_SIZE = 100;

@@ -23,10 +29,21 @@ const BATCH_SIZE = 100;
 // PRODUCT UPSERTS
 // ============================================================

+export interface NewProductInfo {
+  id: number;                    // store_products.id
+  externalProductId: string;     // provider_product_id
+  name: string;
+  brandName: string | null;
+  primaryImageUrl: string | null;
+  hasLocalImage?: boolean;       // True if local_image_path is already set
+}
+
 export interface UpsertProductsResult {
  upserted: number;
  new: number;
  updated: number;
+  newProducts: NewProductInfo[];           // Details of newly created products
+  productsNeedingImages: NewProductInfo[]; // Products (new or updated) that need image downloads
 }

 /**
@@ -41,12 +58,14 @@ export async function upsertStoreProducts(
  options: { dryRun?: boolean } = {}
 ): Promise<UpsertProductsResult> {
  if (products.length === 0) {
-    return { upserted: 0, new: 0, updated: 0 };
+    return { upserted: 0, new: 0, updated: 0, newProducts: [], productsNeedingImages: [] };
  }

  const { dryRun = false } = options;
  let newCount = 0;
  let updatedCount = 0;
+  const newProducts: NewProductInfo[] = [];
+  const productsNeedingImages: NewProductInfo[] = [];

  // Process in batches
  for (let i = 0; i < products.length; i += BATCH_SIZE) {
@@ -68,10 +87,10 @@ export async function upsertStoreProducts(
        const result = await client.query(
          `INSERT INTO store_products (
            dispensary_id, provider, provider_product_id, provider_brand_id,
-            name, brand_name, category, subcategory,
+            name_raw, brand_name_raw, category_raw, subcategory_raw,
            price_rec, price_med, price_rec_special, price_med_special,
            is_on_special, discount_percent,
-            is_in_stock, stock_status,
+            is_in_stock, stock_status, stock_quantity, total_quantity_available,
            thc_percent, cbd_percent,
            image_url,
            first_seen_at, last_seen_at, updated_at
@@ -80,17 +99,17 @@ export async function upsertStoreProducts(
            $5, $6, $7, $8,
            $9, $10, $11, $12,
            $13, $14,
-            $15, $16,
-            $17, $18,
-            $19,
+            $15, $16, $17, $17,
+            $18, $19,
+            $20,
            NOW(), NOW(), NOW()
          )
          ON CONFLICT (dispensary_id, provider, provider_product_id)
          DO UPDATE SET
-            name = EXCLUDED.name,
-            brand_name = EXCLUDED.brand_name,
-            category = EXCLUDED.category,
-            subcategory = EXCLUDED.subcategory,
+            name_raw = EXCLUDED.name_raw,
+            brand_name_raw = EXCLUDED.brand_name_raw,
+            category_raw = EXCLUDED.category_raw,
+            subcategory_raw = EXCLUDED.subcategory_raw,
            price_rec = EXCLUDED.price_rec,
            price_med = EXCLUDED.price_med,
            price_rec_special = EXCLUDED.price_rec_special,
@@ -99,12 +118,14 @@ export async function upsertStoreProducts(
            discount_percent = EXCLUDED.discount_percent,
            is_in_stock = EXCLUDED.is_in_stock,
            stock_status = EXCLUDED.stock_status,
+            stock_quantity = EXCLUDED.stock_quantity,
+            total_quantity_available = EXCLUDED.total_quantity_available,
            thc_percent = EXCLUDED.thc_percent,
            cbd_percent = EXCLUDED.cbd_percent,
            image_url = EXCLUDED.image_url,
            last_seen_at = NOW(),
            updated_at = NOW()
-          RETURNING (xmax = 0) as is_new`,
+          RETURNING id, (xmax = 0) as is_new, (local_image_path IS NOT NULL) as has_local_image`,
          [
            product.dispensaryId,
            product.platform,
@@ -122,16 +143,38 @@ export async function upsertStoreProducts(
            productPricing?.discountPercent,
            productAvailability?.inStock ?? true,
            productAvailability?.stockStatus || 'unknown',
-            product.thcPercent,
-            product.cbdPercent,
+            productAvailability?.quantity ?? null,  // stock_quantity and total_quantity_available
+            // Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
+            product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
+            product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
            product.primaryImageUrl,
          ]
        );

-        if (result.rows[0]?.is_new) {
+        const row = result.rows[0];
+        const productInfo: NewProductInfo = {
+          id: row.id,
+          externalProductId: product.externalProductId,
+          name: product.name,
+          brandName: product.brandName,
+          primaryImageUrl: product.primaryImageUrl,
+          hasLocalImage: row.has_local_image,
+        };
+
+        if (row.is_new) {
          newCount++;
+          // Track new products
+          newProducts.push(productInfo);
+          // New products always need images (if they have a source URL)
+          if (product.primaryImageUrl && !row.has_local_image) {
+            productsNeedingImages.push(productInfo);
+          }
        } else {
          updatedCount++;
+          // Updated products need images only if they don't have a local image yet
+          if (product.primaryImageUrl && !row.has_local_image) {
+            productsNeedingImages.push(productInfo);
+          }
        }
      }

@@ -148,6 +191,8 @@ export async function upsertStoreProducts(
    upserted: newCount + updatedCount,
    new: newCount,
    updated: updatedCount,
+    newProducts,
+    productsNeedingImages,
  };
 }

@@ -212,8 +257,9 @@ export async function createStoreProductSnapshots(
        productAvailability?.inStock ?? true,
        productAvailability?.quantity,
        productAvailability?.stockStatus || 'unknown',
-        product.thcPercent,
-        product.cbdPercent,
+        // Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
+        product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
+        product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
        product.primaryImageUrl,
        JSON.stringify(product.rawProduct),
      ]);
@@ -229,7 +275,7 @@ export async function createStoreProductSnapshots(
      `INSERT INTO store_product_snapshots (
        dispensary_id, provider, provider_product_id, crawl_run_id,
        captured_at,
-        name, brand_name, category, subcategory,
+        name_raw, brand_name_raw, category_raw, subcategory_raw,
        price_rec, price_med, price_rec_special, price_med_special,
        is_on_special, discount_percent,
        is_in_stock, stock_quantity, stock_status,
@@ -245,6 +291,202 @@ export async function createStoreProductSnapshots(
  return { created };
 }

+// ============================================================
+// VARIANT UPSERTS
+// ============================================================
+
+export interface UpsertVariantsResult {
+  upserted: number;
+  new: number;
+  updated: number;
+  snapshotsCreated: number;
+}
+
+/**
+ * Extract variant data from raw Dutchie product
+ */
+function extractVariantsFromRaw(rawProduct: any): any[] {
+  const children = rawProduct?.POSMetaData?.children || [];
+  return children.map((child: any) => ({
+    option: child.option || child.key || '',
+    canonicalSku: child.canonicalSKU || null,
+    canonicalId: child.canonicalID || null,
+    canonicalName: child.canonicalName || null,
+    priceRec: child.recPrice || child.price || null,
+    priceMed: child.medPrice || null,
+    priceRecSpecial: child.recSpecialPrice || null,
+    priceMedSpecial: child.medSpecialPrice || null,
+    quantity: child.quantityAvailable ?? child.quantity ?? null,
+    inStock: (child.quantityAvailable ?? child.quantity ?? 0) > 0,
+  }));
+}
+
+/**
+ * Parse weight value and unit from option string
+ * e.g., "1g" -> { value: 1, unit: "g" }
+ *       "3.5g" -> { value: 3.5, unit: "g" }
+ *       "1/8oz" -> { value: 0.125, unit: "oz" }
+ */
+function parseWeight(option: string): { value: number | null; unit: string | null } {
+  if (!option) return { value: null, unit: null };
+
+  // Handle fractions like "1/8oz"
+  const fractionMatch = option.match(/^(\d+)\/(\d+)\s*(g|oz|mg|ml)?$/i);
+  if (fractionMatch) {
+    const value = parseInt(fractionMatch[1]) / parseInt(fractionMatch[2]);
+    return { value, unit: fractionMatch[3]?.toLowerCase() || 'oz' };
+  }
+
+  // Handle decimals like "3.5g" or "100mg"
+  const decimalMatch = option.match(/^([\d.]+)\s*(g|oz|mg|ml|each)?$/i);
+  if (decimalMatch) {
+    return {
+      value: parseFloat(decimalMatch[1]),
+      unit: decimalMatch[2]?.toLowerCase() || null
+    };
+  }
+
+  return { value: null, unit: null };
+}
+
+/**
+ * Upsert variants for products and create variant snapshots
+ */
+export async function upsertProductVariants(
+  pool: Pool,
+  dispensaryId: number,
+  products: NormalizedProduct[],
+  crawlRunId: number | null,
+  options: { dryRun?: boolean } = {}
+): Promise<UpsertVariantsResult> {
+  if (products.length === 0) {
+    return { upserted: 0, new: 0, updated: 0, snapshotsCreated: 0 };
+  }
+
+  const { dryRun = false } = options;
+  let newCount = 0;
+  let updatedCount = 0;
+  let snapshotsCreated = 0;
+
+  for (const product of products) {
+    // Get the store_product_id for this product
+    const productResult = await pool.query(
+      `SELECT id FROM store_products
+       WHERE dispensary_id = $1 AND provider = $2 AND provider_product_id = $3`,
+      [dispensaryId, product.platform, product.externalProductId]
+    );
+
+    if (productResult.rows.length === 0) {
+      continue; // Product not found, skip variants
+    }
+
+    const storeProductId = productResult.rows[0].id;
+    const variants = extractVariantsFromRaw(product.rawProduct);
+
+    if (variants.length === 0) {
+      continue; // No variants to process
+    }
+
+    if (dryRun) {
+      console.log(`[DryRun] Would upsert ${variants.length} variants for product ${product.externalProductId}`);
+      continue;
+    }
+
+    for (const variant of variants) {
+      const { value: weightValue, unit: weightUnit } = parseWeight(variant.option);
+      const isOnSpecial = (variant.priceRecSpecial !== null && variant.priceRecSpecial < variant.priceRec) ||
+                          (variant.priceMedSpecial !== null && variant.priceMedSpecial < variant.priceMed);
+
+      // Upsert variant
+      const variantResult = await pool.query(
+        `INSERT INTO product_variants (
+          store_product_id, dispensary_id,
+          option, canonical_sku, canonical_id, canonical_name,
+          price_rec, price_med, price_rec_special, price_med_special,
+          quantity, quantity_available, in_stock, is_on_special,
+          weight_value, weight_unit,
+          first_seen_at, last_seen_at, updated_at
+        ) VALUES (
+          $1, $2,
+          $3, $4, $5, $6,
+          $7, $8, $9, $10,
+          $11, $11, $12, $13,
+          $14, $15,
+          NOW(), NOW(), NOW()
+        )
+        ON CONFLICT (store_product_id, option)
+        DO UPDATE SET
+          canonical_sku = COALESCE(EXCLUDED.canonical_sku, product_variants.canonical_sku),
+          canonical_id = COALESCE(EXCLUDED.canonical_id, product_variants.canonical_id),
+          canonical_name = COALESCE(EXCLUDED.canonical_name, product_variants.canonical_name),
+          price_rec = EXCLUDED.price_rec,
+          price_med = EXCLUDED.price_med,
+          price_rec_special = EXCLUDED.price_rec_special,
+          price_med_special = EXCLUDED.price_med_special,
+          quantity = EXCLUDED.quantity,
+          quantity_available = EXCLUDED.quantity_available,
+          in_stock = EXCLUDED.in_stock,
+          is_on_special = EXCLUDED.is_on_special,
+          weight_value = COALESCE(EXCLUDED.weight_value, product_variants.weight_value),
+          weight_unit = COALESCE(EXCLUDED.weight_unit, product_variants.weight_unit),
+          last_seen_at = NOW(),
+          last_price_change_at = CASE
+            WHEN product_variants.price_rec IS DISTINCT FROM EXCLUDED.price_rec
+              OR product_variants.price_rec_special IS DISTINCT FROM EXCLUDED.price_rec_special
+            THEN NOW()
+            ELSE product_variants.last_price_change_at
+          END,
+          last_stock_change_at = CASE
+            WHEN product_variants.quantity IS DISTINCT FROM EXCLUDED.quantity
+            THEN NOW()
+            ELSE product_variants.last_stock_change_at
+          END,
+          updated_at = NOW()
+        RETURNING id, (xmax = 0) as is_new`,
+        [
+          storeProductId, dispensaryId,
+          variant.option, variant.canonicalSku, variant.canonicalId, variant.canonicalName,
+          variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
+          variant.quantity, variant.inStock, isOnSpecial,
+          weightValue, weightUnit,
+        ]
+      );
+
+      const variantId = variantResult.rows[0].id;
+      if (variantResult.rows[0]?.is_new) {
+        newCount++;
+      } else {
+        updatedCount++;
+      }
+
+      // Create variant snapshot
+      await pool.query(
+        `INSERT INTO product_variant_snapshots (
+          product_variant_id, store_product_id, dispensary_id, crawl_run_id,
+          option,
+          price_rec, price_med, price_rec_special, price_med_special,
+          quantity, in_stock, is_on_special,
+          captured_at
+        ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW())`,
+        [
+          variantId, storeProductId, dispensaryId, crawlRunId,
+          variant.option,
+          variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
+          variant.quantity, variant.inStock, isOnSpecial,
+        ]
+      );
+      snapshotsCreated++;
+    }
+  }
+
+  return {
+    upserted: newCount + updatedCount,
+    new: newCount,
+    updated: updatedCount,
+    snapshotsCreated,
+  };
+}
+
 // ============================================================
 // DISCONTINUED PRODUCTS
 // ============================================================
@@ -366,6 +608,19 @@ export async function upsertBrands(
 // FULL HYDRATION
 // ============================================================

+export interface ImageDownloadResult {
+  downloaded: number;
+  skipped: number;
+  failed: number;
+  bytesTotal: number;
+}
+
+export interface DispensaryContext {
+  stateCode: string;
+  storeSlug: string;
+  hasExistingProducts?: boolean;  // True if store already has products with local images
+}
+
 export interface HydratePayloadResult {
  productsUpserted: number;
  productsNew: number;
@@ -373,6 +628,157 @@ export interface HydratePayloadResult {
  productsDiscontinued: number;
  snapshotsCreated: number;
  brandsCreated: number;
+  variantsUpserted: number;
+  variantsNew: number;
+  variantSnapshotsCreated: number;
+  imagesDownloaded: number;
+  imagesSkipped: number;
+  imagesFailed: number;
+  imagesBytesTotal: number;
+}
+
+/**
+ * Helper to create slug from string
+ */
+function slugify(str: string): string {
+  return str
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '')
+    .substring(0, 50) || 'unknown';
+}
+
+/**
+ * Download images for new products and update their local paths
+ */
+export async function downloadProductImages(
+  pool: Pool,
+  newProducts: NewProductInfo[],
+  dispensaryContext: DispensaryContext,
+  options: { dryRun?: boolean; concurrency?: number } = {}
+): Promise<ImageDownloadResult> {
+  const { dryRun = false, concurrency = 5 } = options;
+
+  // Filter products that have images to download
+  const productsWithImages = newProducts.filter(p => p.primaryImageUrl);
+
+  if (productsWithImages.length === 0) {
+    return { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
+  }
+
+  // Check if image storage is ready
+  if (!isImageStorageReady()) {
+    console.warn('[ImageDownload] Image storage not initialized, skipping downloads');
+    return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
+  }
+
+  if (dryRun) {
+    console.log(`[DryRun] Would download ${productsWithImages.length} images`);
+    return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
+  }
+
+  let downloaded = 0;
+  let skipped = 0;
+  let failed = 0;
+  let bytesTotal = 0;
+
+  // Process in batches with concurrency limit
+  for (let i = 0; i < productsWithImages.length; i += concurrency) {
+    const batch = productsWithImages.slice(i, i + concurrency);
+
+    const results = await Promise.allSettled(
+      batch.map(async (product) => {
+        const ctx: ProductImageContext = {
+          stateCode: dispensaryContext.stateCode,
+          storeSlug: dispensaryContext.storeSlug,
+          brandSlug: slugify(product.brandName || 'unknown'),
+          productId: product.externalProductId,
+        };
+
+        const result = await downloadProductImage(product.primaryImageUrl!, ctx, { skipIfExists: true });
+
+        if (result.success) {
+          // Update the database with local image path
+          const imagesJson = JSON.stringify({
+            full: result.urls!.full,
+            medium: result.urls!.medium,
+            thumb: result.urls!.thumb,
+          });
+
+          await pool.query(
+            `UPDATE store_products
+             SET local_image_path = $1, images = $2
+             WHERE id = $3`,
+            [result.urls!.full, imagesJson, product.id]
+          );
+        }
+
+        return result;
+      })
+    );
+
+    for (const result of results) {
+      if (result.status === 'fulfilled') {
+        const downloadResult = result.value;
+        if (downloadResult.success) {
+          if (downloadResult.skipped) {
+            skipped++;
+          } else {
+            downloaded++;
+            bytesTotal += downloadResult.bytesDownloaded || 0;
+          }
+        } else {
+          failed++;
+          console.warn(`[ImageDownload] Failed: ${downloadResult.error}`);
+        }
+      } else {
+        failed++;
+        console.error(`[ImageDownload] Error:`, result.reason);
+      }
+    }
+  }
+
+  console.log(`[ImageDownload] Downloaded: ${downloaded}, Skipped: ${skipped}, Failed: ${failed}, Bytes: ${bytesTotal}`);
+  return { downloaded, skipped, failed, bytesTotal };
+}
+
+/**
+ * Get dispensary context for image paths
+ * Also checks if this dispensary already has products with local images
+ * to skip unnecessary filesystem checks for existing stores
+ */
+async function getDispensaryContext(pool: Pool, dispensaryId: number): Promise<DispensaryContext | null> {
+  try {
+    const result = await pool.query(
+      `SELECT
+        d.state,
+        d.slug,
+        d.name,
+        EXISTS(
+          SELECT 1 FROM store_products sp
+          WHERE sp.dispensary_id = d.id
+          AND sp.local_image_path IS NOT NULL
+          LIMIT 1
+        ) as has_local_images
+      FROM dispensaries d
+      WHERE d.id = $1`,
+      [dispensaryId]
+    );
+
+    if (result.rows.length === 0) {
+      return null;
+    }
+
+    const row = result.rows[0];
+    return {
+      stateCode: row.state || 'unknown',
+      storeSlug: row.slug || slugify(row.name || `store-${dispensaryId}`),
+      hasExistingProducts: row.has_local_images,
+    };
+  } catch (error) {
+    console.error('[getDispensaryContext] Error:', error);
+    return null;
+  }
 }

 /**
@@ -383,9 +789,9 @@ export async function hydrateToCanonical(
  dispensaryId: number,
  normResult: NormalizationResult,
  crawlRunId: number | null,
-  options: { dryRun?: boolean } = {}
+  options: { dryRun?: boolean; downloadImages?: boolean } = {}
 ): Promise<HydratePayloadResult> {
-  const { dryRun = false } = options;
+  const { dryRun = false, downloadImages: shouldDownloadImages = true } = options;

  // 1. Upsert brands
  const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
@@ -399,7 +805,7 @@ export async function hydrateToCanonical(
    { dryRun }
  );

-  // 3. Create snapshots
+  // 3. Create product snapshots
  const snapshotResult = await createStoreProductSnapshots(
    pool,
    dispensaryId,
@@ -410,7 +816,16 @@ export async function hydrateToCanonical(
    { dryRun }
  );

-  // 4. Mark discontinued products
+  // 4. Upsert variants and create variant snapshots
+  const variantResult = await upsertProductVariants(
+    pool,
+    dispensaryId,
+    normResult.products,
+    crawlRunId,
+    { dryRun }
+  );
+
+  // 5. Mark discontinued products
  const currentProductIds = new Set(
    normResult.products.map((p) => p.externalProductId)
  );
@@ -424,6 +839,36 @@ export async function hydrateToCanonical(
    { dryRun }
  );

+  // 6. Download images for products that need them
+  // This includes:
+  //   - New products (always need images)
+  //   - Updated products that don't have local images yet (backfill)
+  // This avoids:
+  //   - Filesystem checks for products that already have local images
+  //   - Unnecessary HTTP requests for products with existing images
+  let imageResult: ImageDownloadResult = { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
+
+  if (shouldDownloadImages && productResult.productsNeedingImages.length > 0) {
+    const dispensaryContext = await getDispensaryContext(pool, dispensaryId);
+
+    if (dispensaryContext) {
+      const newCount = productResult.productsNeedingImages.filter(p => !p.hasLocalImage).length;
+      const backfillCount = productResult.productsNeedingImages.length - newCount;
+      console.log(`[Hydration] Downloading images for ${productResult.productsNeedingImages.length} products (${productResult.new} new, ${backfillCount} backfill)...`);
+      imageResult = await downloadProductImages(
+        pool,
+        productResult.productsNeedingImages,
+        dispensaryContext,
+        { dryRun }
+      );
+    } else {
+      console.warn(`[Hydration] Could not get dispensary context for ID ${dispensaryId}, skipping image downloads`);
+    }
+  } else if (productResult.productsNeedingImages.length === 0 && productResult.upserted > 0) {
+    // All products already have local images
+    console.log(`[Hydration] All ${productResult.upserted} products already have local images, skipping downloads`);
+  }
+
  return {
    productsUpserted: productResult.upserted,
    productsNew: productResult.new,
@@ -431,5 +876,12 @@ export async function hydrateToCanonical(
    productsDiscontinued: discontinuedCount,
    snapshotsCreated: snapshotResult.created,
    brandsCreated: brandResult.new,
+    variantsUpserted: variantResult.upserted,
+    variantsNew: variantResult.new,
+    variantSnapshotsCreated: variantResult.snapshotsCreated,
+    imagesDownloaded: imageResult.downloaded,
+    imagesSkipped: imageResult.skipped,
+    imagesFailed: imageResult.failed,
+    imagesBytesTotal: imageResult.bytesTotal,
  };
 }
--- a/Show More
+++ b/Show More