Compare commits

...

70 Commits

Author SHA1 Message Date
Kelly
97b1ab23d8 fix(ci): Use YAML list format for docker-buildx build_args
The woodpecker docker-buildx plugin expects build_args as a YAML list,
not a comma-separated string. The previous format resulted in all args
being passed as a single malformed arg with "*=" prefix.

This fix ensures APP_GIT_SHA, APP_BUILD_TIME, etc. are properly passed
to the Dockerfile so the /api/version endpoint returns correct values.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 14:56:18 -07:00
kelly
7d3e91b2e6 Merge pull request 'feat(wordpress): Add new Elementor widgets and dynamic selectors v1.6.0' (#17) from feat/wordpress-widgets into master 2025-12-10 20:41:44 +00:00
Kelly
74957a9ec5 feat(wordpress): Add new Elementor widgets and dynamic selectors v1.6.0
New Widgets:
- Brand Grid: Display brands in a grid with product counts
- Category List: Show categories in grid/list/pills layouts
- Specials Grid: Display products on sale with discount badges

Enhanced Product Grid Widget:
- Dynamic category dropdown (fetches from API)
- Dynamic brand dropdown (fetches from API)
- "On Special Only" toggle filter

New Plugin Methods:
- fetch_categories() - Get categories from API
- fetch_brands() - Get brands from API
- fetch_specials() - Get products on sale
- get_category_options() - Cached options for Elementor
- get_brand_options() - Cached options for Elementor

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 13:41:17 -07:00
kelly
2d035c46cf Merge pull request 'fix: Findagram brands page crash and PWA icon errors' (#16) from fix/findagram-brands-crash into master 2025-12-10 20:11:40 +00:00
Kelly
53445fe72a fix: Findagram brands page crash and PWA icon errors
- Fix mapBrandForUI to use correct 'brand' field from API response
- Add null check in Brands.jsx filter to prevent crash on undefined names
- Fix BrandPenetrationService sps.brand_name -> sps.brand_name_raw
- Remove missing logo192.png and logo512.png from PWA manifest

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 13:06:23 -07:00
kelly
37cc8956c5 Merge pull request 'fix: Join states through dispensaries in BrandPenetrationService' (#15) from feat/ci-auto-merge into master 2025-12-10 19:36:06 +00:00
Kelly
197c82f921 fix: Join states through dispensaries in BrandPenetrationService
The store_products table doesn't have a state_id column - must join
through dispensaries to get state info. Also fixed column references
to use brand_name_raw and category_raw.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 12:18:10 -07:00
kelly
2c52493a9c Merge pull request 'fix(docker): Use npm install instead of npm ci for reliability' (#14) from feat/ci-auto-merge into master 2025-12-10 18:44:21 +00:00
Kelly
2ee2ba6b8c fix(docker): Use npm install instead of npm ci for reliability
npm ci can fail when package-lock.json has minor mismatches with
package.json. npm install is more forgiving and appropriate for
Docker builds where determinism is less critical than reliability.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-10 11:28:29 -07:00
kelly
bafcf1694a Merge pull request 'feat(analytics): Brand promotional history + specials fix + API key editing' (#13) from feat/ci-auto-merge into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/13
2025-12-10 18:12:59 +00:00
Kelly
95792aab15 feat(analytics): Brand promotional history + specials fix + API key editing
- Add brand promotional history endpoint (GET /api/analytics/v2/brand/:name/promotions)
  - Tracks when products go on special, duration, discounts, quantity sold estimates
  - Aggregates by category with frequency metrics (weekly/monthly)
- Add quantity changes endpoint (GET /api/analytics/v2/store/:id/quantity-changes)
  - Filter by direction (increase/decrease/all) for sales vs restock estimation
- Fix canonical-upsert to populate stock_quantity and total_quantity_available
- Add API key edit functionality in admin UI
  - Edit allowed domains and IPs
  - Display domains in list view

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-10 10:59:03 -07:00
kelly
38ae2c3a3e Merge pull request 'feat/ci-auto-merge' (#12) from feat/ci-auto-merge into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/12
2025-12-10 17:26:21 +00:00
Kelly
249d3c1b7f fix: Build args format for version info + schema-tolerant routes
CI/CD:
- Fix build_args format in woodpecker CI (comma-separated, not YAML list)
- This fixes "unknown" SHA/version showing on remote deployments

Backend schema-tolerant fixes (graceful fallbacks when tables missing):
- users.ts: Check which columns exist before querying
- worker-registry.ts: Return empty result if table doesn't exist
- task-service.ts: Add tableExists() helper, handle missing tables/views
- proxies.ts: Return totalProxies in test-all response

Frontend fixes:
- Proxies: Use total from response for accurate progress display
- SEO PagesTab: Dim Generate button when no AI provider active

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 09:53:21 -07:00
Kelly
9647f94f89 fix: Copy migrations folder to Docker image + fix SQL FILTER syntax
- Dockerfile: Add COPY migrations ./migrations so auto-migrate works on remote
- intelligence.ts: Fix FILTER clause placement in aggregate functions
  - FILTER must be inside AVG(), not wrapping ROUND()
  - Remove redundant FILTER on MIN (already filtered by WHERE)
  - Remove unsupported FILTER on PERCENTILE_CONT

These fixes resolve:
- "Failed to get task counts" (worker_tasks table missing)
- "FILTER specified but round is not an aggregate function" errors
- /national page "column m.state does not exist" (mv_state_metrics missing)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 09:38:05 -07:00
Kelly
afc288d2cf feat(ci): Auto-merge PRs after all type checks pass
Uses Gitea API to merge PR automatically when all typecheck jobs succeed.
Requires gitea_token secret in Woodpecker.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 09:27:26 -07:00
kelly
df01ce6aad Merge pull request 'feat: Auto-migrations on startup, worker exit location, proxy improvements' (#11) from feat/auto-migrations into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/11
2025-12-10 16:07:17 +00:00
Kelly
aea93bc96b fix(ci): Revert volume caching - may have broken CI trigger 2025-12-10 08:53:10 -07:00
Kelly
4e84f30f8b feat: Auto-retry tasks, 403 proxy rotation, task deletion
- Fix 403 handler to rotate BOTH proxy and fingerprint (was only fingerprint)
- Add auto-retry logic to task service (retry up to max_retries before failing)
- Add error tooltip on task status badge showing retry count and error message
- Add DELETE /api/tasks/:id endpoint (only for non-running tasks)
- Add delete button to JobQueue task table

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 08:41:14 -07:00
Kelly
b20a0a4fa5 fix: Add generic delete method to ApiClient + CI speedups
- Add delete<T>() method to ApiClient for WorkersDashboard cleanup
- Add npm cache volume for faster npm ci
- Add TypeScript incremental builds with tsBuildInfoFile cache
- Should significantly speed up repeated CI runs

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 08:27:11 -07:00
Kelly
6eb1babc86 feat: Auto-migrations on startup, worker exit location, proxy improvements
- Add auto-migration system that runs SQL files from migrations/ on server startup
- Track applied migrations in schema_migrations table
- Show proxy exit location in Workers dashboard
- Add "Cleanup Stale" button to remove old workers
- Add remove button for individual workers
- Include proxy location (city, state, country) in worker heartbeats
- Update Proxy interface with location fields
- Re-enable bulk proxy import without ON CONFLICT

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 08:05:24 -07:00
kelly
9a9c2f76a2 Merge pull request 'feat: Stealth worker system with mandatory proxy rotation' (#10) from feat/stealth-worker-system into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/10
2025-12-10 08:13:42 +00:00
Kelly
56cc171287 feat: Stealth worker system with mandatory proxy rotation
## Worker System
- Role-agnostic workers that can handle any task type
- Pod-based architecture with StatefulSet (5-15 pods, 5 workers each)
- Custom pod names (Aethelgard, Xylos, Kryll, etc.)
- Worker registry with friendly names and resource monitoring
- Hub-and-spoke visualization on JobQueue page

## Stealth & Anti-Detection (REQUIRED)
- Proxies are MANDATORY - workers fail to start without active proxies
- CrawlRotator initializes on worker startup
- Loads proxies from `proxies` table
- Auto-rotates proxy + fingerprint on 403 errors
- 12 browser fingerprints (Chrome, Firefox, Safari, Edge)
- Locale/timezone matching for geographic consistency

## Task System
- Renamed product_resync → product_refresh
- Task chaining: store_discovery → entry_point → product_discovery
- Priority-based claiming with FOR UPDATE SKIP LOCKED
- Heartbeat and stale task recovery

## UI Updates
- JobQueue: Pod visualization, resource monitoring on hover
- WorkersDashboard: Simplified worker list
- Removed unused filters from task list

## Other
- IP2Location service for visitor analytics
- Findagram consumer features scaffolding
- Documentation updates

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 00:44:59 -07:00
Kelly
0295637ed6 fix: Public API column mappings and OOS detection
- Fix store_products column references (name_raw, brand_name_raw, category_raw)
- Fix v_product_snapshots column references (crawled_at, *_cents pricing)
- Fix dispensaries column references (zipcode, logo_image, remove hours/amenities)
- Add services and license_type to dispensary API response
- Add consecutive_misses OOS tracking to product-resync handler
- Add migration 075 for consecutive_misses column
- Add CRAWL_PIPELINE.md documentation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 20:44:53 -07:00
Kelly
9c6dd37316 fix(ci): Use YAML list format for docker-buildx build_args
The woodpecker docker-buildx plugin requires build_args as a YAML list,
not a comma-separated string. This fixes the build version/hash not being
passed to the Docker image.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 18:03:50 -07:00
kelly
524d13209a Merge pull request 'fix: Remove legacy imports from task handlers' (#9) from fix/task-handler-typescript-errors into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/9
2025-12-10 00:42:39 +00:00
Kelly
9199db3927 fix: Remove legacy imports from task handlers
- Remove non-existent DutchieClient import from product-resync and entry-point-discovery
- Remove non-existent DiscoveryCrawler import from store-discovery
- Use scrapeStore from scraper-v2 for product resync
- Use discoverState from discovery module for store discovery
- Fix Pool type by using getPool() instead of pool wrapper
- Update FullDiscoveryResult property access to use correct field names

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 17:25:19 -07:00
Kelly
a0652c7c73 fix(types): Fix TypeScript errors in TasksDashboard, Layout, and Users
- Fix TaskCounts type in api.ts to match TasksDashboard interface
- Make VersionInfo.version optional in Layout.tsx
- Fix boolean type in Users.tsx disabled prop

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 17:02:40 -07:00
Kelly
89c262ee20 feat(tasks): Add unified task-based worker architecture
Replace fragmented job systems (job_schedules, dispensary_crawl_jobs, SyncOrchestrator)
with a single unified task queue:

- Add worker_tasks table with atomic task claiming via SELECT FOR UPDATE SKIP LOCKED
- Add TaskService for CRUD, claiming, and capacity metrics
- Add TaskWorker with role-based handlers (resync, discovery, analytics)
- Add /api/tasks endpoints for management and migration from legacy systems
- Add TasksDashboard UI and integrate task counts into main dashboard
- Add comprehensive documentation

Task roles: store_discovery, entry_point_discovery, product_discovery, product_resync, analytics_refresh

Run workers with: WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 16:27:03 -07:00
Kelly
7f9cf559cf fix(k8s): Update worker deployment to use v2 hydration worker
The old dutchie-az/services/worker.js no longer exists. Workers now use
the hydration pipeline at dist/scripts/run-hydration.js with --loop mode.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 15:01:18 -07:00
Kelly
bbe039c868 feat(api): Add job queue management endpoints and fix SQL type errors
- Add GET /api/job-queue/available - list dispensaries available for crawling
- Add GET /api/job-queue/history - get recent job history with results
- Add POST /api/job-queue/enqueue-batch - queue multiple dispensaries at once
- Add POST /api/job-queue/enqueue-state - queue all crawl-enabled dispensaries for a state
- Add POST /api/job-queue/clear-pending - clear pending jobs with optional filters
- Fix SQL parameter type errors by adding explicit casts ($2::text, $3::integer)
- Fix route ordering to prevent /:id from matching /available and /history

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 14:10:55 -07:00
Kelly
4e5c09a2a5 chore(dashboard): Remove DeployStatus block
Version info already shown in Layout sidebar header.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 14:10:22 -07:00
Kelly
7f65598332 feat(admin): Show version info at top of sidebar
- Add package.json version to /api/version endpoint
- Move version display from footer to top (next to logo)
- Show format: v1.5.1 (abc1234) - 12/9/2024

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 13:58:36 -07:00
Kelly
75315ed91e fix(ci): Use comma-separated build_args for docker-buildx plugin
The docker-buildx plugin expects build_args as a comma-separated string,
not a YAML list. This should fix the build_sha/build_time being null.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 13:56:37 -07:00
Kelly
7fe7d17b43 fix(consumer): Use relative API URLs for findadispo/findagram
The consumer frontends were hardcoded to use cannaiq.co as the API
URL, but each domain has its own /api path in the ingress that routes
to the shared backend. Using relative URLs allows each site to make
API calls to its own domain.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 13:38:10 -07:00
Kelly
7e517b5801 ci: Use self-hosted base images to avoid Docker Hub rate limits
Cached node:20, node:20-slim, and nginx:alpine to code.cannabrands.app.
No more Docker Hub dependency for builds.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 13:07:21 -07:00
Kelly
38ba9021d1 ci: Retry build (Docker Hub rate limit) 2025-12-09 12:58:36 -07:00
Kelly
ddebad48d3 ci: Remove auto-migrations from deploy step
Database was restored from backup - no migrations needed.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 12:52:04 -07:00
Kelly
1cebf2e296 fix(health): Add build_sha and build_time to health endpoint
Reads APP_GIT_SHA and APP_BUILD_TIME env vars set during Docker build.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 12:22:52 -07:00
Kelly
1d6e67d837 feat(api): Add store metrics endpoints with localhost bypass
New public API v1 endpoints for third-party integrations:
- GET /api/v1/stores/:id/metrics - Store performance metrics
- GET /api/v1/stores/:id/product-metrics - Product-level price changes
- GET /api/v1/stores/:id/competitor-snapshot - Competitive intelligence

Also adds localhost IP bypass for local development testing.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 12:14:13 -07:00
Kelly
cfb4b6e4ce fix(cannaiq): Fix TypeScript error in DeployStatus component
Properly destructure api.get response which returns { data: T }

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 12:08:29 -07:00
Kelly
f418c403d6 feat(auth): Add *.cannabrands.app to trusted origins whitelist
Adds pattern-based origin matching to support wildcard subdomains.
All *.cannabrands.app origins now bypass API key authentication.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 12:06:14 -07:00
Kelly
be4221af46 ci: Retrigger build 2025-12-09 11:54:16 -07:00
Kelly
ca07606b05 feat(k8s): Add Redis deployment for production
- Add k8s/redis.yaml with Redis 7 Alpine deployment
- Add REDIS_HOST and REDIS_PORT to configmap
- Redis configured with 200MB max memory and LRU eviction
- 1GB persistent volume for data persistence

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 11:40:11 -07:00
Kelly
baf1bf2eb7 fix(health): Require Redis in production, optional in local
Redis health check now returns error status when not configured in
production/staging environments, but remains optional in local dev.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 11:38:49 -07:00
Kelly
4ef3a8d72b fix(build): Fix TypeScript errors breaking CI build
- Add missing 'original' property to LocalImageSizes in brand logo download
- Remove test scripts with type errors (test-image-download.ts, test-stealth-with-db.ts)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 11:36:28 -07:00
Kelly
09dd756eff feat(admin): Add deploy status panel to dashboard
Shows running version vs latest git commit, pipeline status with steps,
and how many commits behind if not on latest. Uses Woodpecker and Gitea
APIs to fetch CI/CD information. Auto-refreshes every 30 seconds.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 11:26:41 -07:00
Kelly
ec8ef6210c ci: Run migrations inside K8s cluster after deploy
DB is internal to the cluster, so migrations must run via kubectl exec
into the scraper pod after deployment completes.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 11:16:21 -07:00
Kelly
a9b7a4d7a9 ci: Add proper SQL migration runner with tracking
- Creates run-migrations.ts that reads migrations/*.sql files
- Tracks applied migrations in schema_migrations table by filename
- Handles existing version-based schema by adding filename column
- CI now runs migrations before deploy

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 11:12:50 -07:00
Kelly
5119d5ccf9 ci: Add migration step before deploy
Migrations now run automatically after Docker builds but before K8s deploy.
Requires DATABASE_URL secret to be configured in Woodpecker.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 11:09:49 -07:00
Kelly
91efd1d03d feat(images): Add local image storage with on-demand resizing
- Store product images locally with hierarchy: /images/products/<state>/<store>/<brand>/<product>/
- Add /img/* proxy endpoint for on-demand resizing via Sharp
- Implement per-product image checking to skip existing downloads
- Fix pathToUrl() to correctly generate /images/... URLs
- Add frontend getImageUrl() helper with preset sizes (thumb, medium, large)
- Update all product pages to use optimized image URLs
- Add stealth session support for Dutchie GraphQL crawls
- Include test scripts for crawl and image verification

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 11:04:50 -07:00
Kelly
aa776226b0 fix(consumer): Wire findagram/findadispo to public API
- Update Dockerfiles to use cannaiq.co as API base URL
- Change findagram API client from /api/az to /api/v1 endpoints
- Add trusted origin bypass in public-api middleware for consumer sites
- Consumer sites (findagram.co, findadispo.com) can now access /api/v1
  endpoints without API key authentication

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 11:04:50 -07:00
kelly
e9435150e9 Merge pull request 'feature/wp-plugin-versioning-and-fixes' (#7) from feature/wp-plugin-versioning-and-fixes into master 2025-12-09 17:15:33 +00:00
Kelly
d399b966e6 ci: trigger build 2025-12-09 10:03:29 -07:00
Kelly
f5f0e25384 ci: trigger build 2025-12-09 10:03:06 -07:00
Kelly
04de33e5f7 fix(ci): Use correct container name 'worker' for scraper-worker deployment
Verified via kubectl: container name in cluster is 'worker', not 'scraper-worker'.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 09:43:39 -07:00
Kelly
37dfea25e1 feat: WordPress plugin versioning + heatmap fix + dynamic latest download
- Add VERSION file (1.5.4) for tracking WP plugin version
- Update plugin headers to 1.5.4 (cannaiq-menus.php, crawlsy-menus.php)
- Add dynamic /downloads/cannaiq-menus-latest.zip route that auto-redirects
  to highest version (no manual symlinks needed)
- Update frontend download links to use -latest.zip
- Fix StateHeatmap.tsx to parse API values as numbers (fixes string concat bug)
- Document versioning rules in CLAUDE.md

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 09:43:39 -07:00
Kelly
e2166bc25f fix(cannaiq): Parse heatmap values as numbers in frontend
Ensures values from the API are parsed as numbers before using them
in calculations. Fixes string concatenation bug in stats summary.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 09:43:39 -07:00
kelly
b5e8f039bf Merge pull request 'fix(backend): Parse bigint values in heatmap API response' (#6) from feature/seo-template-library-and-enhancements into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/6
2025-12-09 16:26:19 +00:00
Kelly
346e6d1cd8 perf(ci): Parallelize builds, typechecks on PRs only
- PRs: 4 parallel typechecks (~5 mins)
- Master: 4 parallel Docker builds + deploy (~10-15 mins)
- Total time reduced from ~2 hours to ~15-20 mins

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 09:08:12 -07:00
Kelly
be434d25e3 fix(backend): Round heatmap values to 2 decimal places
Prevents long decimal numbers like 37.805740635007325 from displaying
in the UI. Now shows clean values like 37.81.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 08:50:53 -07:00
Kelly
ecc201e9d4 fix(backend): Parse bigint values in heatmap API response
PostgreSQL returns bigint columns as strings. The heatmap API was
returning these raw strings, causing string concatenation instead
of numeric addition in the frontend when summing values.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 08:45:05 -07:00
kelly
67bfdf47a5 Merge pull request 'fix: Add missing type field and pass build args to CI' (#5) from feature/seo-template-library-and-enhancements into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/5
2025-12-09 15:41:57 +00:00
Kelly
3fa22a6ba1 fix: Add missing type field and pass build args to CI
- Add outOfStockProducts to StateMetrics interface
- Add onSpecialProducts to getStateSummary return
- Pass APP_GIT_SHA and other build args to docker build

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 07:44:38 -07:00
kelly
9f898f68db Merge pull request 'feat: SEO template library, discovery pipeline, and orchestrator enhancements' (#4) from feature/seo-template-library-and-enhancements into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/4
2025-12-09 08:13:11 +00:00
Kelly
f78b05360a fix(cannaiq): Fix TypeScript build errors in ApiClient and pages
- Add put() method to ApiClient class
- Update get() method to accept optional params config
- Fix formatDuration to accept undefined type in JobQueue
- Fix DiscoveryLocations API parameter (state -> stateCode)
- Fix stats display path in DiscoveryLocations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 00:44:35 -07:00
Kelly
2f483b3084 feat: SEO template library, discovery pipeline, and orchestrator enhancements
## SEO Template Library
- Add complete template library with 7 page types (state, city, category, brand, product, search, regeneration)
- Add Template Library tab in SEO Orchestrator with accordion-based editors
- Add template preview, validation, and variable injection engine
- Add API endpoints: /api/seo/templates, preview, validate, generate, regenerate

## Discovery Pipeline
- Add promotion.ts for discovery location validation and promotion
- Add discover-all-states.ts script for multi-state discovery
- Add promotion log migration (067)
- Enhance discovery routes and types

## Orchestrator & Admin
- Add crawl_enabled filter to stores page
- Add API permissions page
- Add job queue management
- Add price analytics routes
- Add markets and intelligence routes
- Enhance dashboard and worker monitoring

## Infrastructure
- Add migrations for worker definitions, SEO settings, field alignment
- Add canonical pipeline for scraper v2
- Update hydration and sync orchestrator
- Enhance multi-state query service

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 00:05:34 -07:00
Kelly
9711d594db feat(orchestrator): Add crawl_enabled filter to stores page
- Backend: Filter stores by crawl_enabled (default: enabled only)
- API: Support crawl_enabled param in getOrchestratorStores
- UI: Add Enabled/Disabled/All filter toggle buttons
- UI: Show crawl status icon in stores table

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-08 14:18:28 -07:00
Kelly
39aebfcb82 fix: Static file paths and crawl_enabled API filters
- Fix static file paths for local development (./public/* instead of /app/public/*)
- Add crawl_enabled and dutchie_verified filters to /api/stores and /api/dispensaries
- Default API to return only enabled stores (crawl_enabled=true)
- Add ?crawl_enabled=false to show disabled, ?crawl_enabled=all to show all

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-08 14:07:17 -07:00
Kelly
5415cac2f3 feat(seo): Add SEO tables to migration and ingress config
- Add seo_pages and seo_page_contents tables to migrate.ts for
  automatic creation on deployment
- Update Home.tsx with minor formatting
- Add ingress configuration updates

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-08 12:58:38 -07:00
kelly
70d2364a6f Merge pull request 'feat: Rename WordPress plugin to CannaIQ Menus v1.5.3' (#3) from feature/cannaiq-menus-plugin-rename into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/3
2025-12-08 18:47:15 +00:00
191 changed files with 34559 additions and 2764 deletions

View File

@@ -2,37 +2,77 @@ when:
- event: [push, pull_request]
steps:
# Build checks
# ===========================================
# PR VALIDATION: Parallel type checks (PRs only)
# ===========================================
typecheck-backend:
image: node:20
image: code.cannabrands.app/creationshop/node:20
commands:
- cd backend
- npm ci
- npx tsc --noEmit || true
- npm ci --prefer-offline
- npx tsc --noEmit
depends_on: []
when:
event: pull_request
build-cannaiq:
image: node:20
typecheck-cannaiq:
image: code.cannabrands.app/creationshop/node:20
commands:
- cd cannaiq
- npm ci
- npm ci --prefer-offline
- npx tsc --noEmit
- npm run build
depends_on: []
when:
event: pull_request
build-findadispo:
image: node:20
typecheck-findadispo:
image: code.cannabrands.app/creationshop/node:20
commands:
- cd findadispo/frontend
- npm ci
- npm run build
- npm ci --prefer-offline
- npx tsc --noEmit 2>/dev/null || true
depends_on: []
when:
event: pull_request
build-findagram:
image: node:20
typecheck-findagram:
image: code.cannabrands.app/creationshop/node:20
commands:
- cd findagram/frontend
- npm ci
- npm run build
- npm ci --prefer-offline
- npx tsc --noEmit 2>/dev/null || true
depends_on: []
when:
event: pull_request
# Docker builds - only on master
# ===========================================
# AUTO-MERGE: Merge PR after all checks pass
# ===========================================
auto-merge:
image: alpine:latest
environment:
GITEA_TOKEN:
from_secret: gitea_token
commands:
- apk add --no-cache curl
- |
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
curl -s -X POST \
-H "Authorization: token $GITEA_TOKEN" \
-H "Content-Type: application/json" \
-d '{"Do":"merge"}' \
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
depends_on:
- typecheck-backend
- typecheck-cannaiq
- typecheck-findadispo
- typecheck-findagram
when:
event: pull_request
# ===========================================
# MASTER DEPLOY: Parallel Docker builds
# ===========================================
docker-backend:
image: woodpeckerci/plugin-docker-buildx
settings:
@@ -49,6 +89,12 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
build_args:
- APP_BUILD_VERSION=${CI_COMMIT_SHA:0:8}
- APP_GIT_SHA=${CI_COMMIT_SHA}
- APP_BUILD_TIME=${CI_PIPELINE_CREATED}
- CONTAINER_IMAGE_TAG=${CI_COMMIT_SHA:0:8}
depends_on: []
when:
branch: master
event: push
@@ -69,6 +115,7 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
depends_on: []
when:
branch: master
event: push
@@ -89,6 +136,7 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
depends_on: []
when:
branch: master
event: push
@@ -109,32 +157,35 @@ steps:
from_secret: registry_password
platforms: linux/amd64
provenance: false
depends_on: []
when:
branch: master
event: push
# Deploy to Kubernetes
# ===========================================
# STAGE 3: Deploy (after Docker builds)
# ===========================================
deploy:
image: bitnami/kubectl:latest
environment:
KUBECONFIG_CONTENT:
from_secret: kubeconfig_data
commands:
- echo "Deploying to Kubernetes..."
- mkdir -p ~/.kube
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
- chmod 600 ~/.kube/config
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/scraper-worker scraper-worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
- kubectl rollout status deployment/scraper-worker -n dispensary-scraper --timeout=300s
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
- kubectl rollout status deployment/findadispo-frontend -n dispensary-scraper --timeout=120s
- kubectl rollout status deployment/findagram-frontend -n dispensary-scraper --timeout=120s
- echo "All deployments complete!"
depends_on:
- docker-backend
- docker-cannaiq
- docker-findadispo
- docker-findagram
when:
branch: master
event: push

224
CLAUDE.md
View File

@@ -213,22 +213,23 @@ CannaiQ has **TWO databases** with distinct purposes:
| Table | Purpose | Row Count |
|-------|---------|-----------|
| `dispensaries` | Store/dispensary records | ~188+ rows |
| `dutchie_products` | Product catalog | ~37,000+ rows |
| `dutchie_product_snapshots` | Price/stock history | ~millions |
| `store_products` | Canonical product schema | ~37,000+ rows |
| `store_product_snapshots` | Canonical snapshot schema | growing |
| `store_products` | Product catalog | ~37,000+ rows |
| `store_product_snapshots` | Price/stock history | ~millions |
**LEGACY TABLES (EMPTY - DO NOT USE):**
| Table | Status | Action |
|-------|--------|--------|
| `stores` | EMPTY (0 rows) | Use `dispensaries` instead |
| `products` | EMPTY (0 rows) | Use `dutchie_products` or `store_products` |
| `products` | EMPTY (0 rows) | Use `store_products` instead |
| `dutchie_products` | LEGACY (0 rows) | Use `store_products` instead |
| `dutchie_product_snapshots` | LEGACY (0 rows) | Use `store_product_snapshots` instead |
| `categories` | EMPTY (0 rows) | Categories stored in product records |
**Code must NEVER:**
- Query the `stores` table (use `dispensaries`)
- Query the `products` table (use `dutchie_products` or `store_products`)
- Query the `products` table (use `store_products`)
- Query the `dutchie_products` table (use `store_products`)
- Query the `categories` table (categories are in product records)
**CRITICAL RULES:**
@@ -343,23 +344,23 @@ npx tsx src/scripts/etl/042_legacy_import.ts
- SCHEMA ONLY - no data inserts from legacy tables
**ETL Script 042** (`backend/src/scripts/etl/042_legacy_import.ts`):
- Copies data from `dutchie_products``store_products`
- Copies data from `dutchie_product_snapshots``store_product_snapshots`
- Copies data from legacy `dutchie_legacy.dutchie_products``store_products`
- Copies data from legacy `dutchie_legacy.dutchie_product_snapshots``store_product_snapshots`
- Extracts brands from product data into `brands` table
- Links dispensaries to chains and states
- INSERT-ONLY and IDEMPOTENT (uses ON CONFLICT DO NOTHING)
- Run manually: `cd backend && npx tsx src/scripts/etl/042_legacy_import.ts`
**Tables touched by ETL:**
| Source Table | Target Table |
|--------------|--------------|
| Source Table (dutchie_legacy) | Target Table (dutchie_menus) |
|-------------------------------|------------------------------|
| `dutchie_products` | `store_products` |
| `dutchie_product_snapshots` | `store_product_snapshots` |
| (brand names extracted) | `brands` |
| (state codes mapped) | `dispensaries.state_id` |
| (chain names matched) | `dispensaries.chain_id` |
**Legacy tables remain intact** - `dutchie_products` and `dutchie_product_snapshots` are not modified.
**Note:** The legacy `dutchie_products` and `dutchie_product_snapshots` tables in `dutchie_legacy` are read-only sources. All new crawl data goes directly to `store_products` and `store_product_snapshots`.
**Migration 045** (`backend/migrations/045_add_image_columns.sql`):
- Adds `thumbnail_url` to `store_products` and `store_product_snapshots`
@@ -459,15 +460,66 @@ const result = await pool.query(`
### Local Storage Structure
```
/storage/products/{brand}/{state}/{product_id}/
/storage/images/products/{state}/{store}/{brand}/{product}/
image-{hash}.webp
image-{hash}-medium.webp
image-{hash}-thumb.webp
/storage/brands/{brand}/
/storage/images/brands/{brand}/
logo-{hash}.webp
```
### Image Proxy API (On-Demand Resizing)
Images are stored at full resolution and resized on-demand via the `/img` endpoint.
**Endpoint:** `GET /img/<path>?<params>`
**Parameters:**
| Param | Description | Example |
|-------|-------------|---------|
| `w` | Width in pixels (max 4000) | `?w=200` |
| `h` | Height in pixels (max 4000) | `?h=200` |
| `q` | Quality 1-100 (default 80) | `?q=70` |
| `fit` | Resize mode: cover, contain, fill, inside, outside | `?fit=cover` |
| `blur` | Blur sigma 0.3-1000 | `?blur=5` |
| `gray` | Grayscale (1 = enabled) | `?gray=1` |
| `format` | Output: webp, jpeg, png, avif (default webp) | `?format=jpeg` |
**Examples:**
```bash
# Thumbnail (50px)
GET /img/products/az/store/brand/product/image-abc123.webp?w=50
# Card image (200px, cover fit)
GET /img/products/az/store/brand/product/image-abc123.webp?w=200&h=200&fit=cover
# JPEG at 70% quality
GET /img/products/az/store/brand/product/image-abc123.webp?w=400&format=jpeg&q=70
# Grayscale blur
GET /img/products/az/store/brand/product/image-abc123.webp?w=200&gray=1&blur=3
```
**Frontend Usage:**
```typescript
import { getImageUrl, ImageSizes } from '../lib/images';
// Returns /img/products/.../image.webp?w=50 for local images
// Returns original URL for remote images (CDN, etc.)
const thumbUrl = getImageUrl(product.image_url, ImageSizes.thumb);
const cardUrl = getImageUrl(product.image_url, ImageSizes.medium);
const detailUrl = getImageUrl(product.image_url, ImageSizes.detail);
```
**Size Presets:**
| Preset | Width | Use Case |
|--------|-------|----------|
| `thumb` | 50px | Table thumbnails |
| `small` | 100px | Small cards |
| `medium` | 200px | Grid cards |
| `large` | 400px | Large cards |
| `detail` | 600px | Product detail |
| `full` | - | No resize |
### Storage Adapter
```typescript
@@ -480,8 +532,9 @@ import { saveImage, getImageUrl } from '../utils/storage-adapter';
| File | Purpose |
|------|---------|
| `backend/src/utils/local-storage.ts` | Local filesystem adapter |
| `backend/src/utils/storage-adapter.ts` | Unified storage abstraction |
| `backend/src/utils/image-storage.ts` | Image download and storage |
| `backend/src/routes/image-proxy.ts` | On-demand image resizing endpoint |
| `cannaiq/src/lib/images.ts` | Frontend image URL helper |
| `docker-compose.local.yml` | Local stack without MinIO |
| `start-local.sh` | Convenience startup script |
@@ -489,12 +542,78 @@ import { saveImage, getImageUrl } from '../utils/storage-adapter';
## UI ANONYMIZATION RULES
- No vendor names in forward-facing URLs: use `/api/az/...`, `/az`, `/az-schedule`
- No vendor names in forward-facing URLs
- No "dutchie", "treez", "jane", "weedmaps", "leafly" visible in consumer UIs
- Internal admin tools may show provider names for debugging
---
## DUTCHIE DISCOVERY PIPELINE (Added 2025-01)
### Overview
Automated discovery of Dutchie-powered dispensaries across all US states.
### Flow
```
1. getAllCitiesByState GraphQL → Get all cities for a state
2. ConsumerDispensaries GraphQL → Get stores for each city
3. Upsert to dutchie_discovery_locations (keyed by platform_location_id)
4. AUTO-VALIDATE: Check required fields
5. AUTO-PROMOTE: Create/update dispensaries with crawl_enabled=true
6. Log all actions to dutchie_promotion_log
```
### Tables
| Table | Purpose |
|-------|---------|
| `dutchie_discovery_cities` | Cities known to have dispensaries |
| `dutchie_discovery_locations` | Raw discovered store data |
| `dispensaries` | Canonical stores (promoted from discovery) |
| `dutchie_promotion_log` | Audit trail for validation/promotion |
### Files
| File | Purpose |
|------|---------|
| `src/discovery/discovery-crawler.ts` | Main orchestrator |
| `src/discovery/location-discovery.ts` | GraphQL fetching |
| `src/discovery/promotion.ts` | Validation & promotion logic |
| `src/scripts/run-discovery.ts` | CLI interface |
| `migrations/067_promotion_log.sql` | Audit log table |
### GraphQL Hashes (in `src/platforms/dutchie/client.ts`)
| Query | Hash |
|-------|------|
| `GetAllCitiesByState` | `ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6` |
| `ConsumerDispensaries` | `0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b` |
### Usage
```bash
# Discover all stores in a state
npx tsx src/scripts/run-discovery.ts discover:state AZ
npx tsx src/scripts/run-discovery.ts discover:state CA
# Check stats
npx tsx src/scripts/run-discovery.ts stats
```
### Validation Rules
A discovery location must have:
- `platform_location_id` (MongoDB ObjectId, 24 hex chars)
- `name`
- `city`
- `state_code`
- `platform_menu_url`
Invalid records are marked `status='rejected'` with errors logged.
### Key Design Decisions
- `platform_location_id` MUST be MongoDB ObjectId (not slug)
- Old geo-based discovery stored slugs → deleted as garbage data
- Rate limit: 2 seconds between city requests to avoid API throttling
- Promotion is idempotent via `ON CONFLICT (platform_dispensary_id)`
---
## FUTURE TODO / PENDING FEATURES
- [ ] Orchestrator observability dashboard
@@ -639,16 +758,19 @@ export default defineConfig({
- **DB**: Use the single CannaiQ database via `CANNAIQ_DB_*` env vars. No hardcoded names.
- **Images**: No MinIO. Save to local /images/products/<disp>/<prod>-<hash>.webp (and brands); preserve original URL; serve via backend static.
- **Dutchie GraphQL**: Endpoint https://dutchie.com/api-3/graphql. Variables must use productsFilter.dispensaryId (platform_dispensary_id). Mode A: Status="Active". Mode B: Status=null/activeOnly:false.
- **Dutchie GraphQL**: Endpoint https://dutchie.com/api-3/graphql. Variables must use productsFilter.dispensaryId (platform_dispensary_id). **CRITICAL: Use `Status: 'Active'`, NOT `null`** (null returns 0 products).
- **cName/slug**: Derive cName from each store's menu_url (/embedded-menu/<cName> or /dispensary/<slug>). No hardcoded defaults.
- **Dual-mode always**: useBothModes:true to get pricing (Mode A) + full coverage (Mode B).
- **Batch DB writes**: Chunk products/snapshots/missing (100200) to avoid OOM.
- **OOS/missing**: Include inactive/OOS in Mode B. Union A+B, dedupe by external_product_id+dispensary_id.
- **API/Frontend**: Use /api/az/... endpoints (stores/products/brands/categories/summary/dashboard).
- **API/Frontend**: Use `/api/stores`, `/api/products`, `/api/workers`, `/api/pipeline` endpoints.
- **Scheduling**: Crawl only menu_type='dutchie' AND platform_dispensary_id IS NOT NULL. 4-hour crawl with jitter.
- **Monitor**: /scraper-monitor (and /az-schedule) should show active/recent jobs from job_run_logs/crawl_jobs.
- **THC/CBD values**: Clamp to ≤100 - some products report milligrams as percentages.
- **Column names**: Use `name_raw`, `brand_name_raw`, `category_raw`, `subcategory_raw` (NOT `name`, `brand_name`, etc.)
- **Monitor**: `/api/workers` shows active/recent jobs from job queue.
- **No slug guessing**: Never use defaults. Always derive per store from menu_url and resolve platform IDs per location.
**📖 Full Documentation: See `docs/DUTCHIE_CRAWL_WORKFLOW.md` for complete pipeline documentation.**
---
### Detailed Rules
@@ -691,7 +813,7 @@ export default defineConfig({
- Use dutchie GraphQL pipeline only for `menu_type='dutchie'`.
6) **Frontend**
- Forward-facing URLs: `/api/az`, `/az`, `/az-schedule`; no vendor names.
- Forward-facing URLs should not contain vendor names.
- `/scraper-schedule`: add filters/search, keep as master view for all schedules; reflect platform ID/menu_type status and controls.
7) **No slug guessing**
@@ -740,24 +862,27 @@ export default defineConfig({
16) **API Route Semantics**
**Route Groups:**
- `/api/admin/...` = Admin/operator actions (crawl triggers, health checks)
- `/api/az/...` = Arizona data slice (stores, products, metrics)
**Route Groups (as registered in `src/index.ts`):**
- `/api/stores` = Store/dispensary CRUD and listing
- `/api/products` = Product listing and details
- `/api/workers` = Job queue monitoring (replaces legacy `/api/dutchie-az/...`)
- `/api/pipeline` = Crawl pipeline triggers
- `/api/admin/orchestrator` = Orchestrator admin actions
- `/api/discovery` = Platform discovery (Dutchie, etc.)
- `/api/v1/...` = Public API for external consumers (WordPress, etc.)
**Crawl Trigger (CANONICAL):**
```
POST /api/admin/crawl/:dispensaryId
```
**Crawl Trigger:**
Check `/api/pipeline` or `/api/admin/orchestrator` routes for crawl triggers.
The legacy `POST /api/admin/crawl/:dispensaryId` does NOT exist.
17) **Monitoring and logging**
- /scraper-monitor (and /az-schedule) should show active/recent jobs from job_run_logs/crawl_jobs
- `/api/workers` shows active/recent jobs from job queue
- Auto-refresh every 30 seconds
- System Logs page should show real log data, not just startup messages
18) **Dashboard Architecture**
- **Frontend**: Rebuild the frontend with `VITE_API_URL` pointing to the correct backend and redeploy.
- **Backend**: `/api/dashboard/stats` MUST use the canonical DB pool. Use the correct tables: `dutchie_products`, `dispensaries`, and views like `v_dashboard_stats`, `v_latest_snapshots`.
- **Backend**: `/api/dashboard/stats` MUST use the canonical DB pool. Use the correct tables: `store_products`, `dispensaries`, and views like `v_dashboard_stats`, `v_latest_snapshots`.
19) **Deployment (Gitea + Kubernetes)**
- **Registry**: Gitea at `code.cannabrands.app/creationshop/dispensary-scraper`
@@ -783,8 +908,8 @@ export default defineConfig({
- **Job schedules** (managed in `job_schedules` table):
- `dutchie_az_menu_detection`: Runs daily with 60-min jitter
- `dutchie_az_product_crawl`: Runs every 4 hours with 30-min jitter
- **Trigger schedules**: `curl -X POST /api/az/admin/schedules/{id}/trigger`
- **Check schedule status**: `curl /api/az/admin/schedules`
- **Monitor jobs**: `GET /api/workers`
- **Trigger crawls**: Check `/api/pipeline` routes
21) **Frontend Architecture - AVOID OVER-ENGINEERING**
@@ -1123,3 +1248,32 @@ Every analytics v2 endpoint must:
---
# END Analytics V2 spec extension
---
## WordPress Plugin Versioning
The WordPress plugin version is tracked in `wordpress-plugin/VERSION`.
**Current version:** Check `wordpress-plugin/VERSION` for the latest version.
**Versioning rules:**
- **Minor bumps (x.x.N)**: Bug fixes, small improvements - default for most changes
- **Middle bumps (x.N.0)**: New features, significant improvements
- **Major bumps (N.0.0)**: Breaking changes, major rewrites - only when user explicitly requests
**When making WP plugin changes:**
1. Read `wordpress-plugin/VERSION` to get current version
2. Bump the version number (minor by default)
3. Update both files:
- `wordpress-plugin/VERSION`
- Plugin header `Version:` in `cannaiq-menus.php` and/or `crawlsy-menus.php`
- The `define('..._VERSION', '...')` constant in each plugin file
**Plugin files:**
| File | Brand | API URL |
|------|-------|---------|
| `cannaiq-menus.php` | CannaIQ | `https://cannaiq.co/api/v1` |
| `crawlsy-menus.php` | Crawlsy (legacy) | `https://cannaiq.co/api/v1` |
Both plugins use the same API endpoint. The Crawlsy version exists for backward compatibility with existing installations.

3
backend/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
# IP2Location database (downloaded separately)
data/ip2location/

View File

@@ -1,17 +1,17 @@
# Build stage
# Image: code.cannabrands.app/creationshop/dispensary-scraper
FROM node:20-slim AS builder
FROM code.cannabrands.app/creationshop/node:20-slim AS builder
WORKDIR /app
COPY package*.json ./
RUN npm ci
RUN npm install
COPY . .
RUN npm run build
# Production stage
FROM node:20-slim
FROM code.cannabrands.app/creationshop/node:20-slim
# Build arguments for version info
ARG APP_BUILD_VERSION=dev
@@ -43,10 +43,13 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
WORKDIR /app
COPY package*.json ./
RUN npm ci --omit=dev
RUN npm install --omit=dev
COPY --from=builder /app/dist ./dist
# Copy migrations for auto-migrate on startup
COPY migrations ./migrations
# Create local images directory for when MinIO is not configured
RUN mkdir -p /app/public/images/products

View File

@@ -0,0 +1,538 @@
# Crawl Pipeline Documentation
## Overview
The crawl pipeline fetches product data from Dutchie dispensary menus and stores it in the canonical database. This document covers the complete flow from task scheduling to data storage.
---
## Pipeline Stages
```
┌─────────────────────┐
│ store_discovery │ Find new dispensaries
└─────────┬───────────┘
┌─────────────────────┐
│ entry_point_discovery│ Resolve slug → platform_dispensary_id
└─────────┬───────────┘
┌─────────────────────┐
│ product_discovery │ Initial product crawl
└─────────┬───────────┘
┌─────────────────────┐
│ product_resync │ Recurring crawl (every 4 hours)
└─────────────────────┘
```
---
## Stage Details
### 1. Store Discovery
**Purpose:** Find new dispensaries to crawl
**Handler:** `src/tasks/handlers/store-discovery.ts`
**Flow:**
1. Query Dutchie `ConsumerDispensaries` GraphQL for cities/states
2. Extract dispensary info (name, address, menu_url)
3. Insert into `dutchie_discovery_locations`
4. Queue `entry_point_discovery` for each new location
---
### 2. Entry Point Discovery
**Purpose:** Resolve menu URL slug to platform_dispensary_id (MongoDB ObjectId)
**Handler:** `src/tasks/handlers/entry-point-discovery.ts`
**Flow:**
1. Load dispensary from database
2. Extract slug from `menu_url`:
- `/embedded-menu/<slug>` or `/dispensary/<slug>`
3. Start stealth session (fingerprint + proxy)
4. Query `resolveDispensaryIdWithDetails(slug)` via GraphQL
5. Update dispensary with `platform_dispensary_id`
6. Queue `product_discovery` task
**Example:**
```
menu_url: https://dutchie.com/embedded-menu/deeply-rooted
slug: deeply-rooted
platform_dispensary_id: 6405ef617056e8014d79101b
```
---
### 3. Product Discovery
**Purpose:** Initial crawl of a new dispensary
**Handler:** `src/tasks/handlers/product-discovery.ts`
Same as product_resync but for first-time crawls.
---
### 4. Product Resync
**Purpose:** Recurring crawl to capture price/stock changes
**Handler:** `src/tasks/handlers/product-resync.ts`
**Flow:**
#### Step 1: Load Dispensary Info
```sql
SELECT id, name, platform_dispensary_id, menu_url, state
FROM dispensaries
WHERE id = $1 AND crawl_enabled = true
```
#### Step 2: Start Stealth Session
- Generate random browser fingerprint
- Set locale/timezone matching state
- Optional proxy rotation
#### Step 3: Fetch Products via GraphQL
**Endpoint:** `https://dutchie.com/api-3/graphql`
**Variables:**
```javascript
{
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: "<platform_dispensary_id>",
pricingType: "rec",
Status: "All",
types: [],
useCache: false,
isDefaultSort: true,
sortBy: "popularSortIdx",
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false
},
page: 0,
perPage: 100
}
```
**Key Notes:**
- `Status: "All"` returns all products (Active returns same count)
- `Status: null` returns 0 products (broken)
- `pricingType: "rec"` returns BOTH rec and med prices
- Paginate until `products.length < perPage` or `allProducts.length >= totalCount`
#### Step 4: Normalize Data
Transform raw Dutchie payload to canonical format via `DutchieNormalizer`.
#### Step 5: Upsert Products
Insert/update `store_products` table with normalized data.
#### Step 6: Create Snapshots
Insert point-in-time record to `store_product_snapshots`.
#### Step 7: Track Missing Products (OOS Detection)
```sql
-- Reset consecutive_misses for products IN the feed
UPDATE store_products
SET consecutive_misses = 0, last_seen_at = NOW()
WHERE dispensary_id = $1
AND provider = 'dutchie'
AND provider_product_id = ANY($2)
-- Increment for products NOT in feed
UPDATE store_products
SET consecutive_misses = consecutive_misses + 1
WHERE dispensary_id = $1
AND provider = 'dutchie'
AND provider_product_id NOT IN (...)
AND consecutive_misses < 3
-- Mark OOS at 3 consecutive misses
UPDATE store_products
SET stock_status = 'oos', is_in_stock = false
WHERE dispensary_id = $1
AND consecutive_misses >= 3
AND stock_status != 'oos'
```
#### Step 8: Download Images
For new products, download and store images locally.
#### Step 9: Update Dispensary
```sql
UPDATE dispensaries SET last_crawl_at = NOW() WHERE id = $1
```
---
## GraphQL Payload Structure
### Product Fields (from filteredProducts.products[])
| Field | Type | Description |
|-------|------|-------------|
| `_id` / `id` | string | MongoDB ObjectId (24 hex chars) |
| `Name` | string | Product display name |
| `brandName` | string | Brand name |
| `brand.name` | string | Brand name (nested) |
| `brand.description` | string | Brand description |
| `type` | string | Category (Flower, Edible, Concentrate, etc.) |
| `subcategory` | string | Subcategory |
| `strainType` | string | Hybrid, Indica, Sativa, N/A |
| `Status` | string | Always "Active" in feed |
| `Image` | string | Primary image URL |
| `images[]` | array | All product images |
### Pricing Fields
| Field | Type | Description |
|-------|------|-------------|
| `Prices[]` | number[] | Rec prices per option |
| `recPrices[]` | number[] | Rec prices |
| `medicalPrices[]` | number[] | Medical prices |
| `recSpecialPrices[]` | number[] | Rec sale prices |
| `medicalSpecialPrices[]` | number[] | Medical sale prices |
| `Options[]` | string[] | Size options ("1/8oz", "1g", etc.) |
| `rawOptions[]` | string[] | Raw weight options ("3.5g") |
### Inventory Fields (POSMetaData.children[])
| Field | Type | Description |
|-------|------|-------------|
| `quantity` | number | Total inventory count |
| `quantityAvailable` | number | Available for online orders |
| `kioskQuantityAvailable` | number | Available for kiosk orders |
| `option` | string | Which size option this is for |
### Potency Fields
| Field | Type | Description |
|-------|------|-------------|
| `THCContent.range[]` | number[] | THC percentage |
| `CBDContent.range[]` | number[] | CBD percentage |
| `cannabinoidsV2[]` | array | Detailed cannabinoid breakdown |
### Specials (specialData.bogoSpecials[])
| Field | Type | Description |
|-------|------|-------------|
| `specialName` | string | Deal name |
| `specialType` | string | "bogo", "sale", etc. |
| `itemsForAPrice.value` | string | Bundle price |
| `bogoRewards[].totalQuantity.quantity` | number | Required quantity |
---
## OOS Detection Logic
Products disappear from the Dutchie feed when they go out of stock. We track this via `consecutive_misses`:
| Scenario | Action |
|----------|--------|
| Product in feed | `consecutive_misses = 0` |
| Product missing 1st time | `consecutive_misses = 1` |
| Product missing 2nd time | `consecutive_misses = 2` |
| Product missing 3rd time | `consecutive_misses = 3`, mark `stock_status = 'oos'` |
| Product returns to feed | `consecutive_misses = 0`, update stock_status |
**Why 3 misses?**
- Protects against false positives from crawl failures
- Single bad crawl doesn't trigger mass OOS alerts
- Balances detection speed vs accuracy
---
## Database Tables
### store_products
Current state of each product:
- `provider_product_id` - Dutchie's MongoDB ObjectId
- `name_raw`, `brand_name_raw` - Raw values from feed
- `price_rec`, `price_med` - Current prices
- `is_in_stock`, `stock_status` - Availability
- `consecutive_misses` - OOS detection counter
- `last_seen_at` - Last time product was in feed
### store_product_snapshots
Point-in-time records for historical analysis:
- One row per product per crawl
- Captures price, stock, potency at that moment
- Used for price history, analytics
### dispensaries
Store metadata:
- `platform_dispensary_id` - MongoDB ObjectId for GraphQL
- `menu_url` - Source URL
- `last_crawl_at` - Last successful crawl
- `crawl_enabled` - Whether to crawl
---
## Worker Roles
Workers pull tasks from the `worker_tasks` queue based on their assigned role.
| Role | Name | Description | Handler |
|------|------|-------------|---------|
| `product_resync` | Product Resync | Re-crawl dispensary products for price/stock changes | `handleProductResync` |
| `product_discovery` | Product Discovery | Initial product discovery for new dispensaries | `handleProductDiscovery` |
| `store_discovery` | Store Discovery | Discover new dispensary locations | `handleStoreDiscovery` |
| `entry_point_discovery` | Entry Point Discovery | Resolve platform IDs from menu URLs | `handleEntryPointDiscovery` |
| `analytics_refresh` | Analytics Refresh | Refresh materialized views and analytics | `handleAnalyticsRefresh` |
**API Endpoint:** `GET /api/worker-registry/roles`
---
## Scheduling
Crawls are scheduled via `worker_tasks` table:
| Role | Frequency | Description |
|------|-----------|-------------|
| `product_resync` | Every 4 hours | Regular product refresh |
| `product_discovery` | On-demand | First crawl for new stores |
| `entry_point_discovery` | On-demand | New store setup |
| `store_discovery` | Daily | Find new stores |
| `analytics_refresh` | Daily | Refresh analytics materialized views |
---
## Priority & On-Demand Tasks
Tasks are claimed by workers in order of **priority DESC, created_at ASC**.
### Priority Levels
| Priority | Use Case | Example |
|----------|----------|---------|
| 0 | Scheduled/batch tasks | Daily product_resync generation |
| 10 | On-demand/chained tasks | entry_point → product_discovery |
| Higher | Urgent/manual triggers | Admin-triggered immediate crawl |
### Task Chaining
When a task completes, the system automatically creates follow-up tasks:
```
store_discovery (completed)
└─► entry_point_discovery (priority: 10) for each new store
entry_point_discovery (completed, success)
└─► product_discovery (priority: 10) for that store
product_discovery (completed)
└─► [no chain] Store enters regular resync schedule
```
### On-Demand Task Creation
Use the task service to create high-priority tasks:
```typescript
// Create immediate product resync for a store
await taskService.createTask({
role: 'product_resync',
dispensary_id: 123,
platform: 'dutchie',
priority: 20, // Higher than batch tasks
});
// Convenience methods with default high priority (10)
await taskService.createEntryPointTask(dispensaryId, 'dutchie');
await taskService.createProductDiscoveryTask(dispensaryId, 'dutchie');
await taskService.createStoreDiscoveryTask('dutchie', 'AZ');
```
### Claim Function
The `claim_task()` SQL function atomically claims tasks:
- Respects priority ordering (higher = first)
- Uses `FOR UPDATE SKIP LOCKED` for concurrency
- Prevents multiple active tasks per store
---
## Image Storage
Images are downloaded from Dutchie's AWS S3 and stored locally with on-demand resizing.
### Storage Path
```
/storage/images/products/<state>/<store>/<brand>/<product_id>/image-<hash>.webp
/storage/images/brands/<brand>/logo-<hash>.webp
```
**Example:**
```
/storage/images/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp
```
### Image Proxy API
Served via `/img/*` with on-demand resizing using **sharp**:
```
GET /img/products/az/az-deeply-rooted/bud-bros/6913e3cd444eac3935e928b9/image-ae38b1f9.webp?w=200
```
| Param | Description |
|-------|-------------|
| `w` | Width in pixels (max 4000) |
| `h` | Height in pixels (max 4000) |
| `q` | Quality 1-100 (default 80) |
| `fit` | cover, contain, fill, inside, outside |
| `blur` | Blur sigma (0.3-1000) |
| `gray` | Grayscale (1 = enabled) |
| `format` | webp, jpeg, png, avif (default webp) |
### Key Files
| File | Purpose |
|------|---------|
| `src/utils/image-storage.ts` | Download & save images to local filesystem |
| `src/routes/image-proxy.ts` | On-demand resize/transform at `/img/*` |
### Download Rules
| Scenario | Image Action |
|----------|--------------|
| **New product (first crawl)** | Download if `primaryImageUrl` exists |
| **Existing product (refresh)** | Download only if `local_image_path` is NULL (backfill) |
| **Product already has local image** | Skip download entirely |
**Logic:**
- Images are downloaded **once** and never re-downloaded on subsequent crawls
- `skipIfExists: true` - filesystem check prevents re-download even if queued
- First crawl: all products get images
- Refresh crawl: only new products or products missing local images
### Storage Rules
- **NO MinIO** - local filesystem only (`STORAGE_DRIVER=local`)
- Store full resolution, resize on-demand via `/img` proxy
- Convert to webp for consistency using **sharp**
- Preserve original Dutchie URL as fallback in `image_url` column
- Local path stored in `local_image_path` column
---
## Stealth & Anti-Detection
**PROXIES ARE REQUIRED** - Workers will fail to start if no active proxies are available in the database. All HTTP requests to Dutchie go through a proxy.
Workers automatically initialize anti-detection systems on startup.
### Components
| Component | Purpose | Source |
|-----------|---------|--------|
| **CrawlRotator** | Coordinates proxy + UA rotation | `src/services/crawl-rotator.ts` |
| **ProxyRotator** | Round-robin proxy selection, health tracking | `src/services/crawl-rotator.ts` |
| **UserAgentRotator** | Cycles through realistic browser fingerprints | `src/services/crawl-rotator.ts` |
| **Dutchie Client** | Curl-based HTTP with auto-retry on 403 | `src/platforms/dutchie/client.ts` |
### Initialization Flow
```
Worker Start
├─► initializeStealth()
│ │
│ ├─► CrawlRotator.initialize()
│ │ └─► Load proxies from `proxies` table
│ │
│ └─► setCrawlRotator(rotator)
│ └─► Wire to Dutchie client
└─► Process tasks...
```
### Stealth Session (per task)
Each crawl task starts a stealth session:
```typescript
// In product-refresh.ts, entry-point-discovery.ts
const session = startSession(dispensary.state || 'AZ', 'America/Phoenix');
```
This creates a new identity with:
- **Random fingerprint:** Chrome/Firefox/Safari/Edge on Win/Mac/Linux
- **Accept-Language:** Matches timezone (e.g., `America/Phoenix``en-US,en;q=0.9`)
- **sec-ch-ua headers:** Proper Client Hints for the browser profile
### On 403 Block
When Dutchie returns 403, the client automatically:
1. Records failure on current proxy (increments `failure_count`)
2. If proxy has 5+ failures, deactivates it
3. Rotates to next healthy proxy
4. Rotates fingerprint
5. Retries the request
### Proxy Table Schema
```sql
CREATE TABLE proxies (
id SERIAL PRIMARY KEY,
host VARCHAR(255) NOT NULL,
port INTEGER NOT NULL,
username VARCHAR(100),
password VARCHAR(100),
protocol VARCHAR(10) DEFAULT 'http', -- http, https, socks5
is_active BOOLEAN DEFAULT true,
last_used_at TIMESTAMPTZ,
failure_count INTEGER DEFAULT 0,
success_count INTEGER DEFAULT 0,
avg_response_time_ms INTEGER,
last_failure_at TIMESTAMPTZ,
last_error TEXT
);
```
### Configuration
Proxies are mandatory. There is no environment variable to disable them. Workers will refuse to start without active proxies in the database.
### Fingerprints Available
The client includes 6 browser fingerprints:
- Chrome 131 on Windows
- Chrome 131 on macOS
- Chrome 120 on Windows
- Firefox 133 on Windows
- Safari 17.2 on macOS
- Edge 131 on Windows
Each includes proper `sec-ch-ua`, `sec-ch-ua-platform`, and `sec-ch-ua-mobile` headers.
---
## Error Handling
- **GraphQL errors:** Logged, task marked failed, retried later
- **Normalization errors:** Logged as warnings, continue with valid products
- **Image download errors:** Non-fatal, logged, continue
- **Database errors:** Task fails, will be retried
- **403 blocks:** Auto-rotate proxy + fingerprint, retry (up to 3 retries)
---
## Files
| File | Purpose |
|------|---------|
| `src/tasks/handlers/product-resync.ts` | Main crawl handler |
| `src/tasks/handlers/entry-point-discovery.ts` | Slug → ID resolution |
| `src/platforms/dutchie/index.ts` | GraphQL client, session management |
| `src/hydration/normalizers/dutchie.ts` | Payload normalization |
| `src/hydration/canonical-upsert.ts` | Database upsert logic |
| `src/utils/image-storage.ts` | Image download and local storage |
| `src/routes/image-proxy.ts` | On-demand image resizing |
| `migrations/075_consecutive_misses.sql` | OOS tracking column |

View File

@@ -0,0 +1,400 @@
# Worker Task Architecture
This document describes the unified task-based worker system that replaces the legacy fragmented job systems.
## Overview
The task worker architecture provides a single, unified system for managing all background work in CannaiQ:
- **Store discovery** - Find new dispensaries on platforms
- **Entry point discovery** - Resolve platform IDs from menu URLs
- **Product discovery** - Initial product fetch for new stores
- **Product resync** - Regular price/stock updates for existing stores
- **Analytics refresh** - Refresh materialized views and analytics
## Architecture
### Database Tables
**`worker_tasks`** - Central task queue
```sql
CREATE TABLE worker_tasks (
id SERIAL PRIMARY KEY,
role task_role NOT NULL, -- What type of work
dispensary_id INTEGER, -- Which store (if applicable)
platform VARCHAR(50), -- Which platform (dutchie, etc.)
status task_status DEFAULT 'pending',
priority INTEGER DEFAULT 0, -- Higher = process first
scheduled_for TIMESTAMP, -- Don't process before this time
worker_id VARCHAR(100), -- Which worker claimed it
claimed_at TIMESTAMP,
started_at TIMESTAMP,
completed_at TIMESTAMP,
last_heartbeat_at TIMESTAMP, -- For stale detection
result JSONB, -- Output from handler
error_message TEXT,
retry_count INTEGER DEFAULT 0,
max_retries INTEGER DEFAULT 3,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
```
**Key indexes:**
- `idx_worker_tasks_pending_priority` - For efficient task claiming
- `idx_worker_tasks_active_dispensary` - Prevents concurrent tasks per store (partial unique index)
### Task Roles
| Role | Purpose | Per-Store | Scheduled |
|------|---------|-----------|-----------|
| `store_discovery` | Find new stores on a platform | No | Daily |
| `entry_point_discovery` | Resolve platform IDs | Yes | On-demand |
| `product_discovery` | Initial product fetch | Yes | After entry_point |
| `product_resync` | Price/stock updates | Yes | Every 4 hours |
| `analytics_refresh` | Refresh MVs | No | Daily |
### Task Lifecycle
```
pending → claimed → running → completed
failed
```
1. **pending** - Task is waiting to be picked up
2. **claimed** - Worker has claimed it (atomic via SELECT FOR UPDATE SKIP LOCKED)
3. **running** - Worker is actively processing
4. **completed** - Task finished successfully
5. **failed** - Task encountered an error
6. **stale** - Task lost its worker (recovered automatically)
## Files
### Core Files
| File | Purpose |
|------|---------|
| `src/tasks/task-service.ts` | TaskService - CRUD, claiming, capacity metrics |
| `src/tasks/task-worker.ts` | TaskWorker - Main worker loop |
| `src/tasks/index.ts` | Module exports |
| `src/routes/tasks.ts` | API endpoints |
| `migrations/074_worker_task_queue.sql` | Database schema |
### Task Handlers
| File | Role |
|------|------|
| `src/tasks/handlers/store-discovery.ts` | `store_discovery` |
| `src/tasks/handlers/entry-point-discovery.ts` | `entry_point_discovery` |
| `src/tasks/handlers/product-discovery.ts` | `product_discovery` |
| `src/tasks/handlers/product-resync.ts` | `product_resync` |
| `src/tasks/handlers/analytics-refresh.ts` | `analytics_refresh` |
## Running Workers
### Environment Variables
| Variable | Default | Description |
|----------|---------|-------------|
| `WORKER_ROLE` | (required) | Which task role to process |
| `WORKER_ID` | auto-generated | Custom worker identifier |
| `POLL_INTERVAL_MS` | 5000 | How often to check for tasks |
| `HEARTBEAT_INTERVAL_MS` | 30000 | How often to update heartbeat |
### Starting a Worker
```bash
# Start a product resync worker
WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts
# Start with custom ID
WORKER_ROLE=product_resync WORKER_ID=resync-1 npx tsx src/tasks/task-worker.ts
# Start multiple workers for different roles
WORKER_ROLE=store_discovery npx tsx src/tasks/task-worker.ts &
WORKER_ROLE=product_resync npx tsx src/tasks/task-worker.ts &
```
### Kubernetes Deployment
```yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: task-worker-resync
spec:
replicas: 3
template:
spec:
containers:
- name: worker
image: code.cannabrands.app/creationshop/dispensary-scraper:latest
command: ["npx", "tsx", "src/tasks/task-worker.ts"]
env:
- name: WORKER_ROLE
value: "product_resync"
```
## API Endpoints
### Task Management
| Endpoint | Method | Description |
|----------|--------|-------------|
| `/api/tasks` | GET | List tasks with filters |
| `/api/tasks` | POST | Create a new task |
| `/api/tasks/:id` | GET | Get task by ID |
| `/api/tasks/counts` | GET | Get counts by status |
| `/api/tasks/capacity` | GET | Get capacity metrics |
| `/api/tasks/capacity/:role` | GET | Get role-specific capacity |
| `/api/tasks/recover-stale` | POST | Recover tasks from dead workers |
### Task Generation
| Endpoint | Method | Description |
|----------|--------|-------------|
| `/api/tasks/generate/resync` | POST | Generate daily resync tasks |
| `/api/tasks/generate/discovery` | POST | Create store discovery task |
### Migration (from legacy systems)
| Endpoint | Method | Description |
|----------|--------|-------------|
| `/api/tasks/migration/status` | GET | Compare old vs new systems |
| `/api/tasks/migration/disable-old-schedules` | POST | Disable job_schedules |
| `/api/tasks/migration/cancel-pending-crawl-jobs` | POST | Cancel old crawl jobs |
| `/api/tasks/migration/create-resync-tasks` | POST | Create tasks for all stores |
| `/api/tasks/migration/full-migrate` | POST | One-click migration |
### Role-Specific Endpoints
| Endpoint | Method | Description |
|----------|--------|-------------|
| `/api/tasks/role/:role/last-completion` | GET | Last completion time |
| `/api/tasks/role/:role/recent` | GET | Recent completions |
| `/api/tasks/store/:id/active` | GET | Check if store has active task |
## Capacity Planning
The `v_worker_capacity` view provides real-time metrics:
```sql
SELECT * FROM v_worker_capacity;
```
Returns:
- `pending_tasks` - Tasks waiting to be claimed
- `ready_tasks` - Tasks ready now (scheduled_for is null or past)
- `claimed_tasks` - Tasks claimed but not started
- `running_tasks` - Tasks actively processing
- `completed_last_hour` - Recent completions
- `failed_last_hour` - Recent failures
- `active_workers` - Workers with recent heartbeats
- `avg_duration_sec` - Average task duration
- `tasks_per_worker_hour` - Throughput estimate
- `estimated_hours_to_drain` - Time to clear queue
### Scaling Recommendations
```javascript
// API: GET /api/tasks/capacity/:role
{
"role": "product_resync",
"pending_tasks": 500,
"active_workers": 3,
"workers_needed": {
"for_1_hour": 10,
"for_4_hours": 3,
"for_8_hours": 2
}
}
```
## Task Chaining
Tasks can automatically create follow-up tasks:
```
store_discovery → entry_point_discovery → product_discovery
(store has platform_dispensary_id)
Daily resync tasks
```
The `chainNextTask()` method handles this automatically.
## Stale Task Recovery
Tasks are considered stale if `last_heartbeat_at` is older than the threshold (default 10 minutes).
```sql
SELECT recover_stale_tasks(10); -- 10 minute threshold
```
Or via API:
```bash
curl -X POST /api/tasks/recover-stale \
-H 'Content-Type: application/json' \
-d '{"threshold_minutes": 10}'
```
## Migration from Legacy Systems
### Legacy Systems Replaced
1. **job_schedules + job_run_logs** - Scheduled job definitions
2. **dispensary_crawl_jobs** - Per-dispensary crawl queue
3. **SyncOrchestrator + HydrationWorker** - Raw payload processing
### Migration Steps
**Option 1: One-Click Migration**
```bash
curl -X POST /api/tasks/migration/full-migrate
```
This will:
1. Disable all job_schedules
2. Cancel pending dispensary_crawl_jobs
3. Generate resync tasks for all stores
4. Create discovery and analytics tasks
**Option 2: Manual Migration**
```bash
# 1. Check current status
curl /api/tasks/migration/status
# 2. Disable old schedules
curl -X POST /api/tasks/migration/disable-old-schedules
# 3. Cancel pending crawl jobs
curl -X POST /api/tasks/migration/cancel-pending-crawl-jobs
# 4. Create resync tasks
curl -X POST /api/tasks/migration/create-resync-tasks \
-H 'Content-Type: application/json' \
-d '{"state_code": "AZ"}'
# 5. Generate daily resync schedule
curl -X POST /api/tasks/generate/resync \
-H 'Content-Type: application/json' \
-d '{"batches_per_day": 6}'
```
## Per-Store Locking
The system prevents concurrent tasks for the same store using a partial unique index:
```sql
CREATE UNIQUE INDEX idx_worker_tasks_active_dispensary
ON worker_tasks (dispensary_id)
WHERE dispensary_id IS NOT NULL
AND status IN ('claimed', 'running');
```
This ensures only one task can be active per store at any time.
## Task Priority
Tasks are claimed in priority order (higher first), then by creation time:
```sql
ORDER BY priority DESC, created_at ASC
```
Default priorities:
- `store_discovery`: 0
- `entry_point_discovery`: 10 (high - new stores)
- `product_discovery`: 10 (high - new stores)
- `product_resync`: 0
- `analytics_refresh`: 0
## Scheduled Tasks
Tasks can be scheduled for future execution:
```javascript
await taskService.createTask({
role: 'product_resync',
dispensary_id: 123,
scheduled_for: new Date('2025-01-10T06:00:00Z'),
});
```
The `generate_resync_tasks()` function creates staggered tasks throughout the day:
```sql
SELECT generate_resync_tasks(6, '2025-01-10'); -- 6 batches = every 4 hours
```
## Dashboard Integration
The admin dashboard shows task queue status in the main overview:
```
Task Queue Summary
------------------
Pending: 45
Running: 3
Completed: 1,234
Failed: 12
```
Full task management is available at `/admin/tasks`.
## Error Handling
Failed tasks include the error message in `error_message` and can be retried:
```sql
-- View failed tasks
SELECT id, role, dispensary_id, error_message, retry_count
FROM worker_tasks
WHERE status = 'failed'
ORDER BY completed_at DESC
LIMIT 20;
-- Retry failed tasks
UPDATE worker_tasks
SET status = 'pending', retry_count = retry_count + 1
WHERE status = 'failed' AND retry_count < max_retries;
```
## Monitoring
### Logs
Workers log to stdout:
```
[TaskWorker] Starting worker worker-product_resync-a1b2c3d4 for role: product_resync
[TaskWorker] Claimed task 123 (product_resync) for dispensary 456
[TaskWorker] Task 123 completed successfully
```
### Health Check
Check if workers are active:
```sql
SELECT worker_id, role, COUNT(*), MAX(last_heartbeat_at)
FROM worker_tasks
WHERE last_heartbeat_at > NOW() - INTERVAL '5 minutes'
GROUP BY worker_id, role;
```
### Metrics
```sql
-- Tasks by status
SELECT status, COUNT(*) FROM worker_tasks GROUP BY status;
-- Tasks by role
SELECT role, status, COUNT(*) FROM worker_tasks GROUP BY role, status;
-- Average duration by role
SELECT role, AVG(EXTRACT(EPOCH FROM (completed_at - started_at))) as avg_seconds
FROM worker_tasks
WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours'
GROUP BY role;
```

View File

@@ -0,0 +1,69 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: ip2location-update
namespace: default
spec:
# Run on the 1st of every month at 3am UTC
schedule: "0 3 1 * *"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
jobTemplate:
spec:
template:
spec:
containers:
- name: ip2location-updater
image: curlimages/curl:latest
command:
- /bin/sh
- -c
- |
set -e
echo "Downloading IP2Location LITE DB5..."
# Download to temp
cd /tmp
curl -L -o ip2location.zip "https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB5LITEBIN"
# Extract
unzip -o ip2location.zip
# Find and copy the BIN file
BIN_FILE=$(ls *.BIN 2>/dev/null | head -1)
if [ -z "$BIN_FILE" ]; then
echo "ERROR: No BIN file found"
exit 1
fi
# Copy to shared volume
cp "$BIN_FILE" /data/IP2LOCATION-LITE-DB5.BIN
echo "Done! Database updated: /data/IP2LOCATION-LITE-DB5.BIN"
env:
- name: IP2LOCATION_TOKEN
valueFrom:
secretKeyRef:
name: dutchie-backend-secret
key: IP2LOCATION_TOKEN
volumeMounts:
- name: ip2location-data
mountPath: /data
restartPolicy: OnFailure
volumes:
- name: ip2location-data
persistentVolumeClaim:
claimName: ip2location-pvc
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ip2location-pvc
namespace: default
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Mi

View File

@@ -26,6 +26,12 @@ spec:
name: dutchie-backend-config
- secretRef:
name: dutchie-backend-secret
env:
- name: IP2LOCATION_DB_PATH
value: /data/ip2location/IP2LOCATION-LITE-DB5.BIN
volumeMounts:
- name: ip2location-data
mountPath: /data/ip2location
resources:
requests:
memory: "256Mi"
@@ -45,3 +51,7 @@ spec:
port: 3010
initialDelaySeconds: 5
periodSeconds: 5
volumes:
- name: ip2location-data
persistentVolumeClaim:
claimName: ip2location-pvc

View File

@@ -0,0 +1,119 @@
-- Migration 051: Worker Definitions
-- Creates a dedicated workers table for named workers with roles and assignments
-- Workers table - defines named workers with roles
CREATE TABLE IF NOT EXISTS workers (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL UNIQUE,
role VARCHAR(100) NOT NULL,
description TEXT,
enabled BOOLEAN DEFAULT TRUE,
-- Schedule configuration (for dedicated crawl workers)
schedule_type VARCHAR(50) DEFAULT 'interval', -- 'interval', 'cron', 'manual'
interval_minutes INTEGER DEFAULT 240,
cron_expression VARCHAR(100), -- e.g., '0 */4 * * *'
jitter_minutes INTEGER DEFAULT 30,
-- Assignment scope
assignment_type VARCHAR(50) DEFAULT 'all', -- 'all', 'state', 'dispensary', 'chain'
assigned_state_codes TEXT[], -- e.g., ['AZ', 'CA']
assigned_dispensary_ids INTEGER[],
assigned_chain_ids INTEGER[],
-- Job configuration
job_type VARCHAR(50) NOT NULL DEFAULT 'dutchie_product_crawl',
job_config JSONB DEFAULT '{}',
priority INTEGER DEFAULT 0,
max_concurrent INTEGER DEFAULT 1,
-- Status tracking
status VARCHAR(50) DEFAULT 'idle', -- 'idle', 'running', 'paused', 'error'
last_run_at TIMESTAMPTZ,
last_status VARCHAR(50),
last_error TEXT,
last_duration_ms INTEGER,
next_run_at TIMESTAMPTZ,
current_job_id INTEGER,
-- Metrics
total_runs INTEGER DEFAULT 0,
successful_runs INTEGER DEFAULT 0,
failed_runs INTEGER DEFAULT 0,
avg_duration_ms INTEGER,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Worker run history
CREATE TABLE IF NOT EXISTS worker_runs (
id SERIAL PRIMARY KEY,
worker_id INTEGER NOT NULL REFERENCES workers(id) ON DELETE CASCADE,
started_at TIMESTAMPTZ DEFAULT NOW(),
completed_at TIMESTAMPTZ,
status VARCHAR(50) DEFAULT 'running', -- 'running', 'success', 'error', 'cancelled'
duration_ms INTEGER,
-- What was processed
jobs_created INTEGER DEFAULT 0,
jobs_completed INTEGER DEFAULT 0,
jobs_failed INTEGER DEFAULT 0,
dispensaries_crawled INTEGER DEFAULT 0,
products_found INTEGER DEFAULT 0,
error_message TEXT,
metadata JSONB DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Index for efficient lookups
CREATE INDEX IF NOT EXISTS idx_workers_enabled ON workers(enabled) WHERE enabled = TRUE;
CREATE INDEX IF NOT EXISTS idx_workers_next_run ON workers(next_run_at) WHERE enabled = TRUE;
CREATE INDEX IF NOT EXISTS idx_workers_status ON workers(status);
CREATE INDEX IF NOT EXISTS idx_worker_runs_worker_id ON worker_runs(worker_id);
CREATE INDEX IF NOT EXISTS idx_worker_runs_started_at ON worker_runs(started_at DESC);
-- Add worker_id to dispensary_crawl_jobs if not exists
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensary_crawl_jobs' AND column_name = 'assigned_worker_id'
) THEN
ALTER TABLE dispensary_crawl_jobs ADD COLUMN assigned_worker_id INTEGER REFERENCES workers(id);
END IF;
END $$;
-- Migrate existing job_schedules workers to new workers table
INSERT INTO workers (name, role, description, enabled, interval_minutes, jitter_minutes, job_type, job_config, last_run_at, last_status, last_error, last_duration_ms, next_run_at)
SELECT
worker_name,
worker_role,
description,
enabled,
base_interval_minutes,
jitter_minutes,
job_name,
job_config,
last_run_at,
last_status,
last_error_message,
last_duration_ms,
next_run_at
FROM job_schedules
WHERE worker_name IS NOT NULL
ON CONFLICT (name) DO UPDATE SET
updated_at = NOW();
-- Available worker roles (reference)
COMMENT ON TABLE workers IS 'Named workers with specific roles and assignments. Roles include:
- product_sync: Crawls products from dispensary menus
- store_discovery: Discovers new dispensary locations
- entry_point_finder: Detects menu providers and resolves platform IDs
- analytics_refresh: Refreshes materialized views and analytics
- price_monitor: Monitors price changes and triggers alerts
- inventory_sync: Syncs inventory levels
- image_processor: Downloads and processes product images
- data_validator: Validates data integrity';

View File

@@ -0,0 +1,49 @@
-- Migration 052: SEO Settings Table
-- Key/value store for SEO Orchestrator configuration
CREATE TABLE IF NOT EXISTS seo_settings (
id SERIAL PRIMARY KEY,
key TEXT UNIQUE NOT NULL,
value JSONB NOT NULL,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- Create index on key for fast lookups
CREATE INDEX IF NOT EXISTS idx_seo_settings_key ON seo_settings(key);
-- Seed with default settings
INSERT INTO seo_settings (key, value) VALUES
-- Section 1: Global Content Generation Settings
('primary_prompt_template', '"You are a cannabis industry content expert. Generate SEO-optimized content for {{page_type}} pages about {{subject}}. Focus on: {{focus_areas}}. Maintain a {{tone}} tone and keep content {{length}}."'),
('regeneration_prompt_template', '"Regenerate the following SEO content with fresh perspectives. Original topic: {{subject}}. Improve upon: {{improvement_areas}}. Maintain compliance with cannabis industry standards."'),
('default_content_length', '"medium"'),
('tone_voice', '"informational"'),
-- Section 2: Automatic Refresh Rules
('auto_refresh_interval', '"weekly"'),
('trigger_pct_product_change', 'true'),
('trigger_pct_brand_change', 'true'),
('trigger_new_stores', 'true'),
('trigger_market_shift', 'false'),
('webhook_url', '""'),
('notify_on_trigger', 'false'),
-- Section 3: Page-Level Defaults
('default_title_template', '"{{state_name}} Dispensaries | Find Cannabis Near You | CannaiQ"'),
('default_meta_description_template', '"Discover the best dispensaries in {{state_name}}. Browse {{dispensary_count}}+ licensed retailers, compare prices, and find cannabis products near you."'),
('default_slug_template', '"dispensaries-{{state_code_lower}}"'),
('default_og_image_template', '"/images/seo/og-{{state_code_lower}}.jpg"'),
('enable_ai_images', 'false'),
-- Section 4: Crawl / Dataset Configuration
('primary_data_provider', '"cannaiq"'),
('fallback_data_provider', '"dutchie"'),
('min_data_freshness_hours', '24'),
('stale_data_behavior', '"allow_with_warning"')
ON CONFLICT (key) DO NOTHING;
-- Record migration
INSERT INTO schema_migrations (version, name, applied_at)
VALUES ('052', 'seo_settings', NOW())
ON CONFLICT (version) DO NOTHING;

View File

@@ -0,0 +1,140 @@
-- Migration 066: Align dispensaries and discovery_locations tables with Dutchie field names
-- Uses snake_case convention (Postgres standard) mapped from Dutchie's camelCase
--
-- Changes:
-- 1. dispensaries: rename address→address1, zip→zipcode, remove company_name
-- 2. dispensaries: add missing Dutchie fields
-- 3. dutchie_discovery_locations: add missing Dutchie fields
-- ============================================================================
-- DISPENSARIES TABLE
-- ============================================================================
-- Rename address to address1 (matches Dutchie's address1)
ALTER TABLE dispensaries RENAME COLUMN address TO address1;
-- Rename zip to zipcode (matches Dutchie's zip, but we use zipcode for clarity)
ALTER TABLE dispensaries RENAME COLUMN zip TO zipcode;
-- Drop company_name (redundant with name)
ALTER TABLE dispensaries DROP COLUMN IF EXISTS company_name;
-- Add address2
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS address2 VARCHAR(255);
-- Add country
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States';
-- Add timezone
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
-- Add email
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS email VARCHAR(255);
-- Add description
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS description TEXT;
-- Add logo_image (Dutchie: logoImage)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS logo_image TEXT;
-- Add banner_image (Dutchie: bannerImage)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS banner_image TEXT;
-- Add offer_pickup (Dutchie: offerPickup)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_pickup BOOLEAN DEFAULT TRUE;
-- Add offer_delivery (Dutchie: offerDelivery)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_delivery BOOLEAN DEFAULT FALSE;
-- Add offer_curbside_pickup (Dutchie: offerCurbsidePickup)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS offer_curbside_pickup BOOLEAN DEFAULT FALSE;
-- Add is_medical (Dutchie: isMedical)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_medical BOOLEAN DEFAULT FALSE;
-- Add is_recreational (Dutchie: isRecreational)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS is_recreational BOOLEAN DEFAULT FALSE;
-- Add chain_slug (Dutchie: chain)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255);
-- Add enterprise_id (Dutchie: retailer.enterpriseId)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100);
-- Add status (Dutchie: status - open/closed)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS status VARCHAR(50);
-- Add c_name (Dutchie: cName - the URL slug used in embedded menus)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
-- ============================================================================
-- DUTCHIE_DISCOVERY_LOCATIONS TABLE
-- ============================================================================
-- Add phone
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS phone VARCHAR(50);
-- Add website (Dutchie: embedBackUrl)
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS website TEXT;
-- Add email
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS email VARCHAR(255);
-- Add description
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS description TEXT;
-- Add logo_image
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS logo_image TEXT;
-- Add banner_image
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS banner_image TEXT;
-- Add chain_slug
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS chain_slug VARCHAR(255);
-- Add enterprise_id
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS enterprise_id VARCHAR(100);
-- Add c_name
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS c_name VARCHAR(255);
-- Add country
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS country VARCHAR(100) DEFAULT 'United States';
-- Add store status
ALTER TABLE dutchie_discovery_locations ADD COLUMN IF NOT EXISTS store_status VARCHAR(50);
-- ============================================================================
-- INDEXES
-- ============================================================================
-- Index for chain lookups
CREATE INDEX IF NOT EXISTS idx_dispensaries_chain_slug ON dispensaries(chain_slug) WHERE chain_slug IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_discovery_locations_chain_slug ON dutchie_discovery_locations(chain_slug) WHERE chain_slug IS NOT NULL;
-- Index for enterprise lookups (for multi-location chains)
CREATE INDEX IF NOT EXISTS idx_dispensaries_enterprise_id ON dispensaries(enterprise_id) WHERE enterprise_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_discovery_locations_enterprise_id ON dutchie_discovery_locations(enterprise_id) WHERE enterprise_id IS NOT NULL;
-- Index for c_name lookups
CREATE INDEX IF NOT EXISTS idx_dispensaries_c_name ON dispensaries(c_name) WHERE c_name IS NOT NULL;
-- ============================================================================
-- COMMENTS
-- ============================================================================
COMMENT ON COLUMN dispensaries.address1 IS 'Street address line 1 (Dutchie: address1)';
COMMENT ON COLUMN dispensaries.address2 IS 'Street address line 2 (Dutchie: address2)';
COMMENT ON COLUMN dispensaries.zipcode IS 'ZIP/postal code (Dutchie: zip)';
COMMENT ON COLUMN dispensaries.c_name IS 'Dutchie URL slug for embedded menus (Dutchie: cName)';
COMMENT ON COLUMN dispensaries.chain_slug IS 'Chain identifier slug (Dutchie: chain)';
COMMENT ON COLUMN dispensaries.enterprise_id IS 'Parent enterprise UUID (Dutchie: retailer.enterpriseId)';
COMMENT ON COLUMN dispensaries.logo_image IS 'Logo image URL (Dutchie: logoImage)';
COMMENT ON COLUMN dispensaries.banner_image IS 'Banner image URL (Dutchie: bannerImage)';
COMMENT ON COLUMN dispensaries.offer_pickup IS 'Offers in-store pickup (Dutchie: offerPickup)';
COMMENT ON COLUMN dispensaries.offer_delivery IS 'Offers delivery (Dutchie: offerDelivery)';
COMMENT ON COLUMN dispensaries.offer_curbside_pickup IS 'Offers curbside pickup (Dutchie: offerCurbsidePickup)';
COMMENT ON COLUMN dispensaries.is_medical IS 'Licensed for medical sales (Dutchie: isMedical)';
COMMENT ON COLUMN dispensaries.is_recreational IS 'Licensed for recreational sales (Dutchie: isRecreational)';
SELECT 'Migration 066 completed: Dutchie field alignment' as status;

View File

@@ -0,0 +1,24 @@
-- Promotion log table for tracking discovery → dispensary promotions
-- Tracks validation and promotion actions for audit/review
CREATE TABLE IF NOT EXISTS dutchie_promotion_log (
id SERIAL PRIMARY KEY,
discovery_id INTEGER REFERENCES dutchie_discovery_locations(id) ON DELETE SET NULL,
dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE SET NULL,
action VARCHAR(50) NOT NULL, -- 'validated', 'rejected', 'promoted_create', 'promoted_update', 'skipped'
state_code VARCHAR(10),
store_name VARCHAR(255),
validation_errors TEXT[], -- Array of error messages if rejected
field_changes JSONB, -- Before/after snapshot of changed fields
triggered_by VARCHAR(100) DEFAULT 'auto', -- 'auto', 'manual', 'api'
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Indexes for efficient querying
CREATE INDEX IF NOT EXISTS idx_promotion_log_discovery_id ON dutchie_promotion_log(discovery_id);
CREATE INDEX IF NOT EXISTS idx_promotion_log_dispensary_id ON dutchie_promotion_log(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_promotion_log_action ON dutchie_promotion_log(action);
CREATE INDEX IF NOT EXISTS idx_promotion_log_state_code ON dutchie_promotion_log(state_code);
CREATE INDEX IF NOT EXISTS idx_promotion_log_created_at ON dutchie_promotion_log(created_at DESC);
COMMENT ON TABLE dutchie_promotion_log IS 'Audit log for discovery location validation and promotion to dispensaries';

View File

@@ -0,0 +1,95 @@
-- Migration 068: Crawler Status Alerts
-- Creates status_alerts table for dashboard notifications and status change logging
-- ============================================================
-- STATUS ALERTS TABLE
-- ============================================================
CREATE TABLE IF NOT EXISTS crawler_status_alerts (
id SERIAL PRIMARY KEY,
-- References
dispensary_id INTEGER REFERENCES dispensaries(id),
profile_id INTEGER REFERENCES dispensary_crawler_profiles(id),
-- Alert info
alert_type VARCHAR(50) NOT NULL, -- 'status_change', 'crawl_error', 'validation_failed', 'promoted', 'demoted'
severity VARCHAR(20) DEFAULT 'info', -- 'info', 'warning', 'error', 'critical'
-- Status transition
previous_status VARCHAR(50),
new_status VARCHAR(50),
-- Context
message TEXT,
error_details JSONB,
metadata JSONB, -- Additional context (product counts, error codes, etc.)
-- Tracking
acknowledged BOOLEAN DEFAULT FALSE,
acknowledged_at TIMESTAMP WITH TIME ZONE,
acknowledged_by VARCHAR(100),
-- Timestamps
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Indexes for common queries
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_dispensary ON crawler_status_alerts(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_type ON crawler_status_alerts(alert_type);
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_severity ON crawler_status_alerts(severity);
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_unack ON crawler_status_alerts(acknowledged) WHERE acknowledged = FALSE;
CREATE INDEX IF NOT EXISTS idx_crawler_status_alerts_created ON crawler_status_alerts(created_at DESC);
-- ============================================================
-- STATUS DEFINITIONS (for reference/validation)
-- ============================================================
COMMENT ON TABLE crawler_status_alerts IS 'Crawler status change notifications for dashboard alerting';
COMMENT ON COLUMN crawler_status_alerts.alert_type IS 'Type: status_change, crawl_error, validation_failed, promoted, demoted';
COMMENT ON COLUMN crawler_status_alerts.severity IS 'Severity: info, warning, error, critical';
COMMENT ON COLUMN crawler_status_alerts.previous_status IS 'Previous crawler status before change';
COMMENT ON COLUMN crawler_status_alerts.new_status IS 'New crawler status after change';
-- ============================================================
-- STATUS TRACKING ON PROFILES
-- ============================================================
-- Add columns for status tracking if not exists
DO $$
BEGIN
-- Consecutive success count for auto-promotion
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_successes') THEN
ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_successes INTEGER DEFAULT 0;
END IF;
-- Consecutive failure count for auto-demotion
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'consecutive_failures') THEN
ALTER TABLE dispensary_crawler_profiles ADD COLUMN consecutive_failures INTEGER DEFAULT 0;
END IF;
-- Last status change timestamp
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_changed_at') THEN
ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_changed_at TIMESTAMP WITH TIME ZONE;
END IF;
-- Status change reason
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensary_crawler_profiles' AND column_name = 'status_reason') THEN
ALTER TABLE dispensary_crawler_profiles ADD COLUMN status_reason TEXT;
END IF;
END $$;
-- ============================================================
-- VALID STATUS VALUES
-- ============================================================
-- Status values for dispensary_crawler_profiles.status:
-- 'sandbox' - Newly created, being validated
-- 'production' - Healthy, actively crawled
-- 'needs_manual' - Requires human intervention
-- 'failing' - Multiple consecutive failures
-- 'disabled' - Manually disabled
-- 'legacy' - No profile, uses default method (virtual status)

View File

@@ -0,0 +1,163 @@
-- Migration 069: Seven-Stage Status System
--
-- Implements explicit 7-stage pipeline for store lifecycle:
-- 1. discovered - Found via Dutchie API, raw data
-- 2. validated - Passed field checks, ready for promotion
-- 3. promoted - In dispensaries table, has crawler profile
-- 4. sandbox - First crawl attempted, testing
-- 5. hydrating - Products are being loaded/updated
-- 6. production - Healthy, scheduled crawls via Horizon
-- 7. failing - Crawl errors, needs attention
-- ============================================================
-- STAGE ENUM TYPE
-- ============================================================
DO $$
BEGIN
-- Create enum if not exists
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'store_stage') THEN
CREATE TYPE store_stage AS ENUM (
'discovered',
'validated',
'promoted',
'sandbox',
'hydrating',
'production',
'failing'
);
END IF;
END $$;
-- ============================================================
-- UPDATE DISCOVERY LOCATIONS TABLE
-- ============================================================
-- Add stage column to discovery locations (replaces status)
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dutchie_discovery_locations' AND column_name = 'stage') THEN
ALTER TABLE dutchie_discovery_locations ADD COLUMN stage VARCHAR(20) DEFAULT 'discovered';
END IF;
END $$;
-- Migrate existing status values to stage
UPDATE dutchie_discovery_locations
SET stage = CASE
WHEN status = 'discovered' THEN 'discovered'
WHEN status = 'verified' THEN 'validated'
WHEN status = 'rejected' THEN 'failing'
WHEN status = 'merged' THEN 'validated'
ELSE 'discovered'
END
WHERE stage IS NULL OR stage = '';
-- ============================================================
-- UPDATE CRAWLER PROFILES TABLE
-- ============================================================
-- Ensure status column exists and update to new values
UPDATE dispensary_crawler_profiles
SET status = CASE
WHEN status = 'sandbox' THEN 'sandbox'
WHEN status = 'production' THEN 'production'
WHEN status = 'needs_manual' THEN 'failing'
WHEN status = 'failing' THEN 'failing'
WHEN status = 'disabled' THEN 'failing'
WHEN status IS NULL THEN 'promoted'
ELSE 'promoted'
END;
-- ============================================================
-- ADD STAGE TRACKING TO DISPENSARIES
-- ============================================================
DO $$
BEGIN
-- Add stage column to dispensaries for quick filtering
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensaries' AND column_name = 'stage') THEN
ALTER TABLE dispensaries ADD COLUMN stage VARCHAR(20) DEFAULT 'promoted';
END IF;
-- Add stage_changed_at for tracking
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensaries' AND column_name = 'stage_changed_at') THEN
ALTER TABLE dispensaries ADD COLUMN stage_changed_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP;
END IF;
-- Add first_crawl_at to track sandbox → production transition
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensaries' AND column_name = 'first_crawl_at') THEN
ALTER TABLE dispensaries ADD COLUMN first_crawl_at TIMESTAMP WITH TIME ZONE;
END IF;
-- Add last_successful_crawl_at
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'dispensaries' AND column_name = 'last_successful_crawl_at') THEN
ALTER TABLE dispensaries ADD COLUMN last_successful_crawl_at TIMESTAMP WITH TIME ZONE;
END IF;
END $$;
-- Set initial stage for existing dispensaries based on their crawler profile status
UPDATE dispensaries d
SET stage = COALESCE(
(SELECT dcp.status FROM dispensary_crawler_profiles dcp
WHERE dcp.dispensary_id = d.id AND dcp.enabled = true
ORDER BY dcp.updated_at DESC LIMIT 1),
'promoted'
)
WHERE d.stage IS NULL OR d.stage = '';
-- ============================================================
-- INDEXES FOR STAGE-BASED QUERIES
-- ============================================================
CREATE INDEX IF NOT EXISTS idx_dispensaries_stage ON dispensaries(stage);
CREATE INDEX IF NOT EXISTS idx_dispensaries_stage_state ON dispensaries(stage, state);
CREATE INDEX IF NOT EXISTS idx_discovery_locations_stage ON dutchie_discovery_locations(stage);
CREATE INDEX IF NOT EXISTS idx_crawler_profiles_status ON dispensary_crawler_profiles(status);
-- ============================================================
-- STAGE TRANSITION LOG
-- ============================================================
CREATE TABLE IF NOT EXISTS stage_transitions (
id SERIAL PRIMARY KEY,
-- What changed
entity_type VARCHAR(20) NOT NULL, -- 'discovery_location' or 'dispensary'
entity_id INTEGER NOT NULL,
-- Stage change
from_stage VARCHAR(20),
to_stage VARCHAR(20) NOT NULL,
-- Context
trigger_type VARCHAR(50) NOT NULL, -- 'api', 'scheduler', 'manual', 'auto'
trigger_endpoint VARCHAR(200),
-- Outcome
success BOOLEAN DEFAULT TRUE,
error_message TEXT,
metadata JSONB,
-- Timing
duration_ms INTEGER,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_stage_transitions_entity ON stage_transitions(entity_type, entity_id);
CREATE INDEX IF NOT EXISTS idx_stage_transitions_to_stage ON stage_transitions(to_stage);
CREATE INDEX IF NOT EXISTS idx_stage_transitions_created ON stage_transitions(created_at DESC);
-- ============================================================
-- COMMENTS
-- ============================================================
COMMENT ON TABLE stage_transitions IS 'Audit log for all stage transitions in the pipeline';
COMMENT ON COLUMN dispensaries.stage IS 'Current pipeline stage: discovered, validated, promoted, sandbox, production, failing';
COMMENT ON COLUMN dispensaries.stage_changed_at IS 'When the stage was last changed';
COMMENT ON COLUMN dispensaries.first_crawl_at IS 'When the first crawl was attempted (sandbox stage)';
COMMENT ON COLUMN dispensaries.last_successful_crawl_at IS 'When the last successful crawl completed';

View File

@@ -0,0 +1,239 @@
-- ============================================================================
-- Migration 070: Product Variants Tables
-- ============================================================================
--
-- Purpose: Store variant-level pricing and inventory as first-class entities
-- to enable time-series analytics, price comparisons, and sale tracking.
--
-- Enables queries like:
-- - Price history for a specific variant (1g Blue Dream over time)
-- - Sale frequency analysis (how often is this on special?)
-- - Cross-store price comparison (who has cheapest 1g flower?)
-- - Current specials across all stores
--
-- RULES:
-- - STRICTLY ADDITIVE (no DROP, DELETE, TRUNCATE)
-- - All new tables use IF NOT EXISTS
-- - All indexes use IF NOT EXISTS
--
-- ============================================================================
-- ============================================================================
-- SECTION 1: PRODUCT_VARIANTS TABLE (Current State)
-- ============================================================================
-- One row per product+option combination. Tracks current pricing/inventory.
CREATE TABLE IF NOT EXISTS product_variants (
id SERIAL PRIMARY KEY,
store_product_id INTEGER NOT NULL REFERENCES store_products(id) ON DELETE CASCADE,
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
-- Variant identity (from Dutchie POSMetaData.children)
option VARCHAR(100) NOT NULL, -- "1g", "3.5g", "1/8oz", "100mg"
canonical_sku VARCHAR(100), -- Dutchie canonicalSKU
canonical_id VARCHAR(100), -- Dutchie canonicalID
canonical_name VARCHAR(500), -- Dutchie canonicalName
-- Current pricing (in dollars, not cents)
price_rec NUMERIC(10,2),
price_med NUMERIC(10,2),
price_rec_special NUMERIC(10,2),
price_med_special NUMERIC(10,2),
-- Current inventory
quantity INTEGER,
quantity_available INTEGER,
in_stock BOOLEAN DEFAULT TRUE,
-- Special/sale status
is_on_special BOOLEAN DEFAULT FALSE,
-- Weight/size parsing (for analytics)
weight_value NUMERIC(10,2), -- 1, 3.5, 28, etc.
weight_unit VARCHAR(20), -- g, oz, mg, ml, etc.
-- Timestamps
first_seen_at TIMESTAMPTZ DEFAULT NOW(),
last_seen_at TIMESTAMPTZ DEFAULT NOW(),
last_price_change_at TIMESTAMPTZ,
last_stock_change_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(store_product_id, option)
);
-- Indexes for common queries
CREATE INDEX IF NOT EXISTS idx_variants_store_product ON product_variants(store_product_id);
CREATE INDEX IF NOT EXISTS idx_variants_dispensary ON product_variants(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_variants_option ON product_variants(option);
CREATE INDEX IF NOT EXISTS idx_variants_in_stock ON product_variants(dispensary_id, in_stock) WHERE in_stock = TRUE;
CREATE INDEX IF NOT EXISTS idx_variants_on_special ON product_variants(dispensary_id, is_on_special) WHERE is_on_special = TRUE;
CREATE INDEX IF NOT EXISTS idx_variants_canonical_sku ON product_variants(canonical_sku) WHERE canonical_sku IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_variants_price_rec ON product_variants(price_rec) WHERE price_rec IS NOT NULL;
COMMENT ON TABLE product_variants IS 'Current state of each product variant (weight/size option). One row per product+option.';
COMMENT ON COLUMN product_variants.option IS 'Weight/size option string from Dutchie (e.g., "1g", "3.5g", "1/8oz")';
COMMENT ON COLUMN product_variants.canonical_sku IS 'Dutchie POS SKU for cross-store matching';
-- ============================================================================
-- SECTION 2: PRODUCT_VARIANT_SNAPSHOTS TABLE (Historical Data)
-- ============================================================================
-- Time-series data for variant pricing. One row per variant per crawl.
-- CRITICAL: NEVER DELETE from this table.
CREATE TABLE IF NOT EXISTS product_variant_snapshots (
id SERIAL PRIMARY KEY,
product_variant_id INTEGER NOT NULL REFERENCES product_variants(id) ON DELETE CASCADE,
store_product_id INTEGER REFERENCES store_products(id) ON DELETE SET NULL,
dispensary_id INTEGER NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
crawl_run_id INTEGER REFERENCES crawl_runs(id) ON DELETE SET NULL,
-- Variant identity (denormalized for query performance)
option VARCHAR(100) NOT NULL,
-- Pricing at time of capture
price_rec NUMERIC(10,2),
price_med NUMERIC(10,2),
price_rec_special NUMERIC(10,2),
price_med_special NUMERIC(10,2),
-- Inventory at time of capture
quantity INTEGER,
in_stock BOOLEAN DEFAULT TRUE,
-- Special status at time of capture
is_on_special BOOLEAN DEFAULT FALSE,
-- Feed presence (FALSE = variant missing from crawl)
is_present_in_feed BOOLEAN DEFAULT TRUE,
-- Capture timestamp
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes for time-series queries
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_variant ON product_variant_snapshots(product_variant_id, captured_at DESC);
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_dispensary ON product_variant_snapshots(dispensary_id, captured_at DESC);
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_crawl ON product_variant_snapshots(crawl_run_id) WHERE crawl_run_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_captured ON product_variant_snapshots(captured_at DESC);
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_special ON product_variant_snapshots(is_on_special, captured_at DESC) WHERE is_on_special = TRUE;
CREATE INDEX IF NOT EXISTS idx_variant_snapshots_option ON product_variant_snapshots(option, captured_at DESC);
COMMENT ON TABLE product_variant_snapshots IS 'Historical variant pricing/inventory. One row per variant per crawl. NEVER DELETE.';
-- ============================================================================
-- SECTION 3: USEFUL VIEWS
-- ============================================================================
-- View: Current specials across all stores
CREATE OR REPLACE VIEW v_current_specials AS
SELECT
pv.id as variant_id,
sp.id as product_id,
sp.name_raw as product_name,
sp.brand_name_raw as brand_name,
sp.category_raw as category,
d.id as dispensary_id,
d.name as dispensary_name,
d.city,
d.state,
pv.option,
pv.price_rec,
pv.price_rec_special,
ROUND(((pv.price_rec - pv.price_rec_special) / NULLIF(pv.price_rec, 0)) * 100, 1) as discount_percent,
pv.quantity,
pv.in_stock,
pv.last_seen_at
FROM product_variants pv
JOIN store_products sp ON sp.id = pv.store_product_id
JOIN dispensaries d ON d.id = pv.dispensary_id
WHERE pv.is_on_special = TRUE
AND pv.in_stock = TRUE
AND pv.price_rec_special IS NOT NULL
AND pv.price_rec_special < pv.price_rec;
COMMENT ON VIEW v_current_specials IS 'All products currently on special across all stores';
-- View: Price comparison for a product across stores
CREATE OR REPLACE VIEW v_price_comparison AS
SELECT
sp.name_raw as product_name,
sp.brand_name_raw as brand_name,
sp.category_raw as category,
pv.option,
d.id as dispensary_id,
d.name as dispensary_name,
d.city,
pv.price_rec,
pv.price_rec_special,
pv.is_on_special,
pv.in_stock,
pv.quantity,
RANK() OVER (PARTITION BY sp.name_raw, pv.option ORDER BY COALESCE(pv.price_rec_special, pv.price_rec) ASC) as price_rank
FROM product_variants pv
JOIN store_products sp ON sp.id = pv.store_product_id
JOIN dispensaries d ON d.id = pv.dispensary_id
WHERE pv.in_stock = TRUE
AND (pv.price_rec IS NOT NULL OR pv.price_rec_special IS NOT NULL);
COMMENT ON VIEW v_price_comparison IS 'Compare prices for same product across stores, ranked by price';
-- View: Latest snapshot per variant
CREATE OR REPLACE VIEW v_latest_variant_snapshots AS
SELECT DISTINCT ON (product_variant_id)
pvs.*
FROM product_variant_snapshots pvs
ORDER BY product_variant_id, captured_at DESC;
-- ============================================================================
-- SECTION 4: HELPER FUNCTION FOR SALE FREQUENCY
-- ============================================================================
-- Function to calculate sale frequency for a variant
CREATE OR REPLACE FUNCTION get_variant_sale_stats(p_variant_id INTEGER, p_days INTEGER DEFAULT 30)
RETURNS TABLE (
total_snapshots BIGINT,
times_on_special BIGINT,
special_frequency_pct NUMERIC,
avg_discount_pct NUMERIC,
min_price NUMERIC,
max_price NUMERIC,
avg_price NUMERIC
) AS $$
BEGIN
RETURN QUERY
SELECT
COUNT(*)::BIGINT as total_snapshots,
COUNT(*) FILTER (WHERE is_on_special)::BIGINT as times_on_special,
ROUND((COUNT(*) FILTER (WHERE is_on_special)::NUMERIC / NULLIF(COUNT(*), 0)) * 100, 1) as special_frequency_pct,
ROUND(AVG(
CASE WHEN is_on_special AND price_rec_special IS NOT NULL AND price_rec IS NOT NULL
THEN ((price_rec - price_rec_special) / NULLIF(price_rec, 0)) * 100
END
), 1) as avg_discount_pct,
MIN(COALESCE(price_rec_special, price_rec)) as min_price,
MAX(price_rec) as max_price,
ROUND(AVG(COALESCE(price_rec_special, price_rec)), 2) as avg_price
FROM product_variant_snapshots
WHERE product_variant_id = p_variant_id
AND captured_at >= NOW() - (p_days || ' days')::INTERVAL;
END;
$$ LANGUAGE plpgsql;
COMMENT ON FUNCTION get_variant_sale_stats IS 'Get sale frequency and price stats for a variant over N days';
-- ============================================================================
-- DONE
-- ============================================================================
SELECT 'Migration 070 completed. Product variants tables ready for time-series analytics.' AS status;

View File

@@ -0,0 +1,53 @@
-- Migration 071: Harmonize store_products with dutchie_products
-- Adds missing columns to store_products to consolidate on a single canonical table
-- Product details
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS description TEXT;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weight VARCHAR(50);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS weights JSONB;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS measurements JSONB;
-- Cannabinoid/terpene data
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS effects JSONB;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS terpenes JSONB;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cannabinoids_v2 JSONB;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS thc_content NUMERIC(10,4);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS cbd_content NUMERIC(10,4);
-- Images
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS images JSONB;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS primary_image_url TEXT;
-- Inventory
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS total_quantity_available INTEGER DEFAULT 0;
-- Status/flags
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS status VARCHAR(50);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS featured BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS coming_soon BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_lost_at TIMESTAMP WITH TIME ZONE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS visibility_restored_at TIMESTAMP WITH TIME ZONE;
-- Threshold flags (Dutchie-specific)
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_threshold BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS is_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_threshold BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS options_below_kiosk_threshold BOOLEAN DEFAULT FALSE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS certificate_of_analysis_enabled BOOLEAN DEFAULT FALSE;
-- Platform metadata
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS external_product_id VARCHAR(100);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS c_name VARCHAR(500);
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS past_c_names TEXT[];
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS latest_raw_payload JSONB;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS created_at_platform TIMESTAMP WITH TIME ZONE;
ALTER TABLE store_products ADD COLUMN IF NOT EXISTS updated_at_platform TIMESTAMP WITH TIME ZONE;
-- Indexes for common queries
CREATE INDEX IF NOT EXISTS idx_store_products_external_id ON store_products(external_product_id);
CREATE INDEX IF NOT EXISTS idx_store_products_visibility_lost ON store_products(visibility_lost) WHERE visibility_lost = TRUE;
CREATE INDEX IF NOT EXISTS idx_store_products_status ON store_products(status);
-- Add comment
COMMENT ON TABLE store_products IS 'Canonical product table - consolidated from dutchie_products';

View File

@@ -0,0 +1,74 @@
-- Migration 072: Create compatibility views for store_products and store_product_snapshots
-- These views provide backward-compatible column names for API routes
-- v_products view - aliases store_products columns to match legacy dutchie_products naming
CREATE OR REPLACE VIEW v_products AS
SELECT
id,
dispensary_id,
provider_product_id as external_product_id,
provider_product_id as dutchie_id,
name_raw as name,
brand_name_raw as brand_name,
category_raw as type,
subcategory_raw as subcategory,
strain_type,
thc_percent as thc,
cbd_percent as cbd,
stock_status,
is_in_stock,
stock_quantity,
image_url,
primary_image_url,
images,
effects,
description,
is_on_special,
featured,
medical_only,
rec_only,
external_product_id as external_id,
provider,
created_at,
updated_at
FROM store_products;
-- v_product_snapshots view - aliases store_product_snapshots columns to match legacy naming
CREATE OR REPLACE VIEW v_product_snapshots AS
SELECT
id,
store_product_id,
dispensary_id,
provider,
provider_product_id,
crawl_run_id,
captured_at as crawled_at,
name_raw,
brand_name_raw,
category_raw,
subcategory_raw,
-- Convert price_rec (dollars) to rec_min_price_cents (cents)
CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_min_price_cents,
CASE WHEN price_rec IS NOT NULL THEN (price_rec * 100)::integer END as rec_max_price_cents,
CASE WHEN price_rec_special IS NOT NULL THEN (price_rec_special * 100)::integer END as rec_min_special_price_cents,
CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_min_price_cents,
CASE WHEN price_med IS NOT NULL THEN (price_med * 100)::integer END as med_max_price_cents,
CASE WHEN price_med_special IS NOT NULL THEN (price_med_special * 100)::integer END as med_min_special_price_cents,
is_on_special as special,
discount_percent,
is_in_stock,
stock_quantity,
stock_status,
stock_quantity as total_quantity_available,
thc_percent,
cbd_percent,
image_url,
raw_data as options,
created_at
FROM store_product_snapshots;
-- Add indexes for the views' underlying tables
CREATE INDEX IF NOT EXISTS idx_store_products_dispensary ON store_products(dispensary_id);
CREATE INDEX IF NOT EXISTS idx_store_products_stock ON store_products(stock_status);
CREATE INDEX IF NOT EXISTS idx_store_snapshots_product ON store_product_snapshots(store_product_id);
CREATE INDEX IF NOT EXISTS idx_store_snapshots_captured ON store_product_snapshots(captured_at DESC);

View File

@@ -0,0 +1,12 @@
-- Add timezone column to proxies table for geo-consistent fingerprinting
-- This allows matching Accept-Language and other headers to proxy location
ALTER TABLE proxies
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
-- Add timezone to failed_proxies as well
ALTER TABLE failed_proxies
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
-- Comment explaining usage
COMMENT ON COLUMN proxies.timezone IS 'IANA timezone (e.g., America/Phoenix) for geo-consistent fingerprinting';

View File

@@ -0,0 +1,322 @@
-- Migration 074: Worker Task Queue System
-- Implements role-based task queue with per-store locking and capacity tracking
-- Task queue table
CREATE TABLE IF NOT EXISTS worker_tasks (
id SERIAL PRIMARY KEY,
-- Task identification
role VARCHAR(50) NOT NULL, -- store_discovery, entry_point_discovery, product_discovery, product_resync, analytics_refresh
dispensary_id INTEGER REFERENCES dispensaries(id) ON DELETE CASCADE,
platform VARCHAR(20), -- dutchie, jane, treez, etc.
-- Task state
status VARCHAR(20) NOT NULL DEFAULT 'pending',
priority INTEGER DEFAULT 0, -- Higher = more urgent
-- Scheduling
scheduled_for TIMESTAMPTZ, -- For batch scheduling (e.g., every 4 hours)
-- Ownership
worker_id VARCHAR(100), -- Pod name or worker ID
claimed_at TIMESTAMPTZ,
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
last_heartbeat_at TIMESTAMPTZ,
-- Results
result JSONB, -- Task output data
error_message TEXT,
retry_count INTEGER DEFAULT 0,
max_retries INTEGER DEFAULT 3,
-- Metadata
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
-- Constraints
CONSTRAINT valid_status CHECK (status IN ('pending', 'claimed', 'running', 'completed', 'failed', 'stale'))
);
-- Indexes for efficient task claiming
CREATE INDEX IF NOT EXISTS idx_worker_tasks_pending
ON worker_tasks(role, priority DESC, created_at ASC)
WHERE status = 'pending';
CREATE INDEX IF NOT EXISTS idx_worker_tasks_claimed
ON worker_tasks(worker_id, claimed_at)
WHERE status = 'claimed';
CREATE INDEX IF NOT EXISTS idx_worker_tasks_running
ON worker_tasks(worker_id, last_heartbeat_at)
WHERE status = 'running';
CREATE INDEX IF NOT EXISTS idx_worker_tasks_dispensary
ON worker_tasks(dispensary_id)
WHERE dispensary_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_worker_tasks_scheduled
ON worker_tasks(scheduled_for)
WHERE status = 'pending' AND scheduled_for IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_worker_tasks_history
ON worker_tasks(role, completed_at DESC)
WHERE status IN ('completed', 'failed');
-- Partial unique index to prevent duplicate active tasks per store
-- Only one task can be claimed/running for a given dispensary at a time
CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_tasks_unique_active_store
ON worker_tasks(dispensary_id)
WHERE status IN ('claimed', 'running') AND dispensary_id IS NOT NULL;
-- Worker registration table (tracks active workers)
CREATE TABLE IF NOT EXISTS worker_registry (
id SERIAL PRIMARY KEY,
worker_id VARCHAR(100) UNIQUE NOT NULL,
role VARCHAR(50) NOT NULL,
pod_name VARCHAR(100),
hostname VARCHAR(100),
started_at TIMESTAMPTZ DEFAULT NOW(),
last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(),
tasks_completed INTEGER DEFAULT 0,
tasks_failed INTEGER DEFAULT 0,
status VARCHAR(20) DEFAULT 'active',
CONSTRAINT valid_worker_status CHECK (status IN ('active', 'idle', 'offline'))
);
CREATE INDEX IF NOT EXISTS idx_worker_registry_role
ON worker_registry(role, status);
CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat
ON worker_registry(last_heartbeat_at)
WHERE status = 'active';
-- Task completion tracking (summarized history)
CREATE TABLE IF NOT EXISTS task_completion_log (
id SERIAL PRIMARY KEY,
role VARCHAR(50) NOT NULL,
date DATE NOT NULL DEFAULT CURRENT_DATE,
hour INTEGER NOT NULL DEFAULT EXTRACT(HOUR FROM NOW()),
tasks_created INTEGER DEFAULT 0,
tasks_completed INTEGER DEFAULT 0,
tasks_failed INTEGER DEFAULT 0,
avg_duration_sec NUMERIC(10,2),
min_duration_sec NUMERIC(10,2),
max_duration_sec NUMERIC(10,2),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(role, date, hour)
);
-- Capacity planning view
CREATE OR REPLACE VIEW v_worker_capacity AS
SELECT
role,
COUNT(*) FILTER (WHERE status = 'pending') as pending_tasks,
COUNT(*) FILTER (WHERE status = 'pending' AND (scheduled_for IS NULL OR scheduled_for <= NOW())) as ready_tasks,
COUNT(*) FILTER (WHERE status = 'claimed') as claimed_tasks,
COUNT(*) FILTER (WHERE status = 'running') as running_tasks,
COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') as completed_last_hour,
COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '1 hour') as failed_last_hour,
COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) as active_workers,
AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') as avg_duration_sec,
-- Capacity planning metrics
CASE
WHEN COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') > 0
THEN 3600.0 / NULLIF(AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour'), 0)
ELSE NULL
END as tasks_per_worker_hour,
-- Estimated time to drain queue
CASE
WHEN COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) > 0
AND COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour') > 0
THEN COUNT(*) FILTER (WHERE status = 'pending') / NULLIF(
COUNT(DISTINCT worker_id) FILTER (WHERE status IN ('claimed', 'running')) *
(3600.0 / NULLIF(AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '1 hour'), 0)),
0
)
ELSE NULL
END as estimated_hours_to_drain
FROM worker_tasks
GROUP BY role;
-- Task history view (for UI)
CREATE OR REPLACE VIEW v_task_history AS
SELECT
t.id,
t.role,
t.dispensary_id,
d.name as dispensary_name,
t.platform,
t.status,
t.priority,
t.worker_id,
t.scheduled_for,
t.claimed_at,
t.started_at,
t.completed_at,
t.error_message,
t.retry_count,
t.created_at,
EXTRACT(EPOCH FROM (t.completed_at - t.started_at)) as duration_sec
FROM worker_tasks t
LEFT JOIN dispensaries d ON d.id = t.dispensary_id
ORDER BY t.created_at DESC;
-- Function to claim a task atomically
CREATE OR REPLACE FUNCTION claim_task(
p_role VARCHAR(50),
p_worker_id VARCHAR(100)
) RETURNS worker_tasks AS $$
DECLARE
claimed_task worker_tasks;
BEGIN
UPDATE worker_tasks
SET
status = 'claimed',
worker_id = p_worker_id,
claimed_at = NOW(),
updated_at = NOW()
WHERE id = (
SELECT id FROM worker_tasks
WHERE role = p_role
AND status = 'pending'
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
-- Exclude stores that already have an active task
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
SELECT dispensary_id FROM worker_tasks
WHERE status IN ('claimed', 'running')
AND dispensary_id IS NOT NULL
))
ORDER BY priority DESC, created_at ASC
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING * INTO claimed_task;
RETURN claimed_task;
END;
$$ LANGUAGE plpgsql;
-- Function to mark stale tasks (workers that died)
CREATE OR REPLACE FUNCTION recover_stale_tasks(
stale_threshold_minutes INTEGER DEFAULT 10
) RETURNS INTEGER AS $$
DECLARE
recovered_count INTEGER;
BEGIN
WITH stale AS (
UPDATE worker_tasks
SET
status = 'pending',
worker_id = NULL,
claimed_at = NULL,
started_at = NULL,
retry_count = retry_count + 1,
updated_at = NOW()
WHERE status IN ('claimed', 'running')
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
AND retry_count < max_retries
RETURNING id
)
SELECT COUNT(*) INTO recovered_count FROM stale;
-- Mark tasks that exceeded retries as failed
UPDATE worker_tasks
SET
status = 'failed',
error_message = 'Exceeded max retries after worker failures',
completed_at = NOW(),
updated_at = NOW()
WHERE status IN ('claimed', 'running')
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
AND retry_count >= max_retries;
RETURN recovered_count;
END;
$$ LANGUAGE plpgsql;
-- Function to generate daily resync tasks
CREATE OR REPLACE FUNCTION generate_resync_tasks(
p_batches_per_day INTEGER DEFAULT 6, -- Every 4 hours
p_date DATE DEFAULT CURRENT_DATE
) RETURNS INTEGER AS $$
DECLARE
store_count INTEGER;
stores_per_batch INTEGER;
batch_num INTEGER;
scheduled_time TIMESTAMPTZ;
created_count INTEGER := 0;
BEGIN
-- Count active stores that need resync
SELECT COUNT(*) INTO store_count
FROM dispensaries
WHERE crawl_enabled = true
AND menu_type = 'dutchie'
AND platform_dispensary_id IS NOT NULL;
IF store_count = 0 THEN
RETURN 0;
END IF;
stores_per_batch := CEIL(store_count::NUMERIC / p_batches_per_day);
FOR batch_num IN 0..(p_batches_per_day - 1) LOOP
scheduled_time := p_date + (batch_num * 4 || ' hours')::INTERVAL;
INSERT INTO worker_tasks (role, dispensary_id, platform, scheduled_for, priority)
SELECT
'product_resync',
d.id,
'dutchie',
scheduled_time,
0
FROM (
SELECT id, ROW_NUMBER() OVER (ORDER BY id) as rn
FROM dispensaries
WHERE crawl_enabled = true
AND menu_type = 'dutchie'
AND platform_dispensary_id IS NOT NULL
) d
WHERE d.rn > (batch_num * stores_per_batch)
AND d.rn <= ((batch_num + 1) * stores_per_batch)
ON CONFLICT DO NOTHING;
GET DIAGNOSTICS created_count = created_count + ROW_COUNT;
END LOOP;
RETURN created_count;
END;
$$ LANGUAGE plpgsql;
-- Trigger to update timestamp
CREATE OR REPLACE FUNCTION update_worker_tasks_timestamp()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS worker_tasks_updated_at ON worker_tasks;
CREATE TRIGGER worker_tasks_updated_at
BEFORE UPDATE ON worker_tasks
FOR EACH ROW
EXECUTE FUNCTION update_worker_tasks_timestamp();
-- Comments
COMMENT ON TABLE worker_tasks IS 'Central task queue for all worker roles';
COMMENT ON TABLE worker_registry IS 'Registry of active workers and their stats';
COMMENT ON TABLE task_completion_log IS 'Hourly aggregated task completion metrics';
COMMENT ON VIEW v_worker_capacity IS 'Real-time capacity planning metrics per role';
COMMENT ON VIEW v_task_history IS 'Task history with dispensary details for UI';
COMMENT ON FUNCTION claim_task IS 'Atomically claim a task for a worker, respecting per-store locking';
COMMENT ON FUNCTION recover_stale_tasks IS 'Release tasks from dead workers back to pending';
COMMENT ON FUNCTION generate_resync_tasks IS 'Generate daily product resync tasks in batches';

View File

@@ -0,0 +1,13 @@
-- Migration 075: Add consecutive_misses column to store_products
-- Used to track how many consecutive crawls a product has been missing from the feed
-- After 3 consecutive misses, product is marked as OOS
ALTER TABLE store_products
ADD COLUMN IF NOT EXISTS consecutive_misses INTEGER NOT NULL DEFAULT 0;
-- Index for finding products that need OOS check
CREATE INDEX IF NOT EXISTS idx_store_products_consecutive_misses
ON store_products (dispensary_id, consecutive_misses)
WHERE consecutive_misses > 0;
COMMENT ON COLUMN store_products.consecutive_misses IS 'Number of consecutive crawls where product was not in feed. Reset to 0 when seen. At 3, mark OOS.';

View File

@@ -0,0 +1,71 @@
-- Visitor location analytics for Findagram
-- Tracks visitor locations to understand popular areas
CREATE TABLE IF NOT EXISTS visitor_locations (
id SERIAL PRIMARY KEY,
-- Location data (from IP lookup)
ip_hash VARCHAR(64), -- Hashed IP for privacy (SHA256)
city VARCHAR(100),
state VARCHAR(100),
state_code VARCHAR(10),
country VARCHAR(100),
country_code VARCHAR(10),
latitude DECIMAL(10, 7),
longitude DECIMAL(10, 7),
-- Visit metadata
domain VARCHAR(50) NOT NULL, -- 'findagram.co', 'findadispo.com', etc.
page_path VARCHAR(255), -- '/products', '/dispensaries/123', etc.
referrer VARCHAR(500),
user_agent VARCHAR(500),
-- Session tracking
session_id VARCHAR(64), -- For grouping page views in a session
-- Timestamps
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes for analytics queries
CREATE INDEX IF NOT EXISTS idx_visitor_locations_domain ON visitor_locations(domain);
CREATE INDEX IF NOT EXISTS idx_visitor_locations_city_state ON visitor_locations(city, state_code);
CREATE INDEX IF NOT EXISTS idx_visitor_locations_created_at ON visitor_locations(created_at);
CREATE INDEX IF NOT EXISTS idx_visitor_locations_session ON visitor_locations(session_id);
-- Aggregated daily stats (materialized for performance)
CREATE TABLE IF NOT EXISTS visitor_location_stats (
id SERIAL PRIMARY KEY,
date DATE NOT NULL,
domain VARCHAR(50) NOT NULL,
city VARCHAR(100),
state VARCHAR(100),
state_code VARCHAR(10),
country_code VARCHAR(10),
-- Metrics
visit_count INTEGER DEFAULT 0,
unique_sessions INTEGER DEFAULT 0,
UNIQUE(date, domain, city, state_code, country_code)
);
CREATE INDEX IF NOT EXISTS idx_visitor_stats_date ON visitor_location_stats(date);
CREATE INDEX IF NOT EXISTS idx_visitor_stats_domain ON visitor_location_stats(domain);
CREATE INDEX IF NOT EXISTS idx_visitor_stats_state ON visitor_location_stats(state_code);
-- View for easy querying of top locations
CREATE OR REPLACE VIEW v_top_visitor_locations AS
SELECT
domain,
city,
state,
state_code,
country_code,
COUNT(*) as total_visits,
COUNT(DISTINCT session_id) as unique_sessions,
MAX(created_at) as last_visit
FROM visitor_locations
WHERE created_at > NOW() - INTERVAL '30 days'
GROUP BY domain, city, state, state_code, country_code
ORDER BY total_visits DESC;

View File

@@ -0,0 +1,141 @@
-- Migration 076: Worker Registry for Dynamic Workers
-- Workers register on startup, receive a friendly name, and report heartbeats
-- Name pool for workers (expandable, no hardcoding)
CREATE TABLE IF NOT EXISTS worker_name_pool (
id SERIAL PRIMARY KEY,
name VARCHAR(50) UNIQUE NOT NULL,
in_use BOOLEAN DEFAULT FALSE,
assigned_to VARCHAR(100), -- worker_id
assigned_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- Seed with initial names (can add more via API)
INSERT INTO worker_name_pool (name) VALUES
('Alice'), ('Bella'), ('Clara'), ('Diana'), ('Elena'),
('Fiona'), ('Grace'), ('Hazel'), ('Iris'), ('Julia'),
('Katie'), ('Luna'), ('Mia'), ('Nora'), ('Olive'),
('Pearl'), ('Quinn'), ('Rosa'), ('Sara'), ('Tara'),
('Uma'), ('Vera'), ('Wendy'), ('Xena'), ('Yuki'), ('Zara'),
('Amber'), ('Blake'), ('Coral'), ('Dawn'), ('Echo'),
('Fleur'), ('Gem'), ('Haven'), ('Ivy'), ('Jade'),
('Kira'), ('Lotus'), ('Maple'), ('Nova'), ('Onyx'),
('Pixel'), ('Quest'), ('Raven'), ('Sage'), ('Terra'),
('Unity'), ('Violet'), ('Willow'), ('Xylo'), ('Yara'), ('Zen')
ON CONFLICT (name) DO NOTHING;
-- Worker registry - tracks active workers
CREATE TABLE IF NOT EXISTS worker_registry (
id SERIAL PRIMARY KEY,
worker_id VARCHAR(100) UNIQUE NOT NULL, -- e.g., "pod-abc123" or uuid
friendly_name VARCHAR(50), -- assigned from pool
role VARCHAR(50) NOT NULL, -- task role
pod_name VARCHAR(100), -- k8s pod name
hostname VARCHAR(100), -- machine hostname
ip_address VARCHAR(50), -- worker IP
status VARCHAR(20) DEFAULT 'starting', -- starting, active, idle, offline, terminated
started_at TIMESTAMPTZ DEFAULT NOW(),
last_heartbeat_at TIMESTAMPTZ DEFAULT NOW(),
last_task_at TIMESTAMPTZ,
tasks_completed INTEGER DEFAULT 0,
tasks_failed INTEGER DEFAULT 0,
current_task_id INTEGER,
metadata JSONB DEFAULT '{}',
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Indexes for worker registry
CREATE INDEX IF NOT EXISTS idx_worker_registry_status ON worker_registry(status);
CREATE INDEX IF NOT EXISTS idx_worker_registry_role ON worker_registry(role);
CREATE INDEX IF NOT EXISTS idx_worker_registry_heartbeat ON worker_registry(last_heartbeat_at);
-- Function to assign a name to a new worker
CREATE OR REPLACE FUNCTION assign_worker_name(p_worker_id VARCHAR(100))
RETURNS VARCHAR(50) AS $$
DECLARE
v_name VARCHAR(50);
BEGIN
-- Try to get an unused name
UPDATE worker_name_pool
SET in_use = TRUE, assigned_to = p_worker_id, assigned_at = NOW()
WHERE id = (
SELECT id FROM worker_name_pool
WHERE in_use = FALSE
ORDER BY RANDOM()
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING name INTO v_name;
-- If no names available, generate one
IF v_name IS NULL THEN
v_name := 'Worker-' || SUBSTRING(p_worker_id FROM 1 FOR 8);
END IF;
RETURN v_name;
END;
$$ LANGUAGE plpgsql;
-- Function to release a worker's name back to the pool
CREATE OR REPLACE FUNCTION release_worker_name(p_worker_id VARCHAR(100))
RETURNS VOID AS $$
BEGIN
UPDATE worker_name_pool
SET in_use = FALSE, assigned_to = NULL, assigned_at = NULL
WHERE assigned_to = p_worker_id;
END;
$$ LANGUAGE plpgsql;
-- Function to mark stale workers as offline
CREATE OR REPLACE FUNCTION mark_stale_workers(stale_threshold_minutes INTEGER DEFAULT 5)
RETURNS INTEGER AS $$
DECLARE
v_count INTEGER;
BEGIN
UPDATE worker_registry
SET status = 'offline', updated_at = NOW()
WHERE status IN ('active', 'idle', 'starting')
AND last_heartbeat_at < NOW() - (stale_threshold_minutes || ' minutes')::INTERVAL
RETURNING COUNT(*) INTO v_count;
-- Release names from offline workers
PERFORM release_worker_name(worker_id)
FROM worker_registry
WHERE status = 'offline'
AND last_heartbeat_at < NOW() - INTERVAL '30 minutes';
RETURN COALESCE(v_count, 0);
END;
$$ LANGUAGE plpgsql;
-- View for dashboard
CREATE OR REPLACE VIEW v_active_workers AS
SELECT
wr.id,
wr.worker_id,
wr.friendly_name,
wr.role,
wr.status,
wr.pod_name,
wr.hostname,
wr.started_at,
wr.last_heartbeat_at,
wr.last_task_at,
wr.tasks_completed,
wr.tasks_failed,
wr.current_task_id,
EXTRACT(EPOCH FROM (NOW() - wr.last_heartbeat_at)) as seconds_since_heartbeat,
CASE
WHEN wr.status = 'offline' THEN 'offline'
WHEN wr.last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
WHEN wr.current_task_id IS NOT NULL THEN 'busy'
ELSE 'ready'
END as health_status
FROM worker_registry wr
WHERE wr.status != 'terminated'
ORDER BY wr.status = 'active' DESC, wr.last_heartbeat_at DESC;
COMMENT ON TABLE worker_registry IS 'Tracks all workers that have registered with the system';
COMMENT ON TABLE worker_name_pool IS 'Pool of friendly names for workers - expandable via API';

View File

@@ -0,0 +1,35 @@
-- Migration: Add visitor location and dispensary name to click events
-- Captures where visitors are clicking from and which dispensary
-- Add visitor location columns
ALTER TABLE product_click_events
ADD COLUMN IF NOT EXISTS visitor_city VARCHAR(100);
ALTER TABLE product_click_events
ADD COLUMN IF NOT EXISTS visitor_state VARCHAR(10);
ALTER TABLE product_click_events
ADD COLUMN IF NOT EXISTS visitor_lat DECIMAL(10, 7);
ALTER TABLE product_click_events
ADD COLUMN IF NOT EXISTS visitor_lng DECIMAL(10, 7);
-- Add dispensary name for easier reporting
ALTER TABLE product_click_events
ADD COLUMN IF NOT EXISTS dispensary_name VARCHAR(255);
-- Create index for location-based analytics
CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_state
ON product_click_events(visitor_state)
WHERE visitor_state IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_product_click_events_visitor_city
ON product_click_events(visitor_city)
WHERE visitor_city IS NOT NULL;
-- Add comments
COMMENT ON COLUMN product_click_events.visitor_city IS 'City where the visitor is located (from IP geolocation)';
COMMENT ON COLUMN product_click_events.visitor_state IS 'State where the visitor is located (from IP geolocation)';
COMMENT ON COLUMN product_click_events.visitor_lat IS 'Visitor latitude (from IP geolocation)';
COMMENT ON COLUMN product_click_events.visitor_lng IS 'Visitor longitude (from IP geolocation)';
COMMENT ON COLUMN product_click_events.dispensary_name IS 'Name of the dispensary (denormalized for easier reporting)';

View File

@@ -1026,6 +1026,17 @@
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/csv-parser": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
"integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
"bin": {
"csv-parser": "bin/csv-parser"
},
"engines": {
"node": ">= 10"
}
},
"node_modules/data-uri-to-buffer": {
"version": "6.0.2",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
@@ -2235,6 +2246,14 @@
"node": ">= 12"
}
},
"node_modules/ip2location-nodejs": {
"version": "9.7.0",
"resolved": "https://registry.npmjs.org/ip2location-nodejs/-/ip2location-nodejs-9.7.0.tgz",
"integrity": "sha512-eQ4T5TXm1cx0+pQcRycPiuaiRuoDEMd9O89Be7Ugk555qi9UY9enXSznkkqr3kQRyUaXx7zj5dORC5LGTPOttA==",
"dependencies": {
"csv-parser": "^3.0.0"
}
},
"node_modules/ipaddr.js": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",

View File

@@ -21,6 +21,7 @@
"helmet": "^7.1.0",
"https-proxy-agent": "^7.0.2",
"ioredis": "^5.8.2",
"ip2location-nodejs": "^9.7.0",
"ipaddr.js": "^2.2.0",
"jsonwebtoken": "^9.0.2",
"minio": "^7.1.3",
@@ -1531,6 +1532,17 @@
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/csv-parser": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-3.2.0.tgz",
"integrity": "sha512-fgKbp+AJbn1h2dcAHKIdKNSSjfp43BZZykXsCjzALjKy80VXQNHPFJ6T9Afwdzoj24aMkq8GwDS7KGcDPpejrA==",
"bin": {
"csv-parser": "bin/csv-parser"
},
"engines": {
"node": ">= 10"
}
},
"node_modules/data-uri-to-buffer": {
"version": "6.0.2",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz",
@@ -2754,6 +2766,14 @@
"node": ">= 12"
}
},
"node_modules/ip2location-nodejs": {
"version": "9.7.0",
"resolved": "https://registry.npmjs.org/ip2location-nodejs/-/ip2location-nodejs-9.7.0.tgz",
"integrity": "sha512-eQ4T5TXm1cx0+pQcRycPiuaiRuoDEMd9O89Be7Ugk555qi9UY9enXSznkkqr3kQRyUaXx7zj5dORC5LGTPOttA==",
"dependencies": {
"csv-parser": "^3.0.0"
}
},
"node_modules/ipaddr.js": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-2.2.0.tgz",

View File

@@ -35,6 +35,7 @@
"helmet": "^7.1.0",
"https-proxy-agent": "^7.0.2",
"ioredis": "^5.8.2",
"ip2location-nodejs": "^9.7.0",
"ipaddr.js": "^2.2.0",
"jsonwebtoken": "^9.0.2",
"minio": "^7.1.3",

Binary file not shown.

View File

@@ -0,0 +1,65 @@
#!/bin/bash
# Download IP2Location LITE DB3 (City-level) database
# Free for commercial use with attribution
# https://lite.ip2location.com/database/db3-ip-country-region-city
set -e
DATA_DIR="${1:-./data/ip2location}"
DB_FILE="IP2LOCATION-LITE-DB3.BIN"
mkdir -p "$DATA_DIR"
cd "$DATA_DIR"
echo "Downloading IP2Location LITE DB3 database..."
# IP2Location LITE DB3 - includes city, region, country, lat/lng
# You need to register at https://lite.ip2location.com/ to get a download token
# Then set IP2LOCATION_TOKEN environment variable
if [ -z "$IP2LOCATION_TOKEN" ]; then
echo ""
echo "ERROR: IP2LOCATION_TOKEN not set"
echo ""
echo "To download the database:"
echo "1. Register free at https://lite.ip2location.com/"
echo "2. Get your download token from the dashboard"
echo "3. Run: IP2LOCATION_TOKEN=your_token ./scripts/download-ip2location.sh"
echo ""
exit 1
fi
# Download DB3.LITE (IPv4 + City)
DOWNLOAD_URL="https://www.ip2location.com/download/?token=${IP2LOCATION_TOKEN}&file=DB3LITEBIN"
echo "Downloading from IP2Location..."
curl -L -o ip2location.zip "$DOWNLOAD_URL"
echo "Extracting..."
unzip -o ip2location.zip
# Rename to standard name
if [ -f "IP2LOCATION-LITE-DB3.BIN" ]; then
echo "Database ready: $DATA_DIR/IP2LOCATION-LITE-DB3.BIN"
elif [ -f "IP-COUNTRY-REGION-CITY.BIN" ]; then
mv "IP-COUNTRY-REGION-CITY.BIN" "$DB_FILE"
echo "Database ready: $DATA_DIR/$DB_FILE"
else
# Find whatever BIN file was extracted
BIN_FILE=$(ls *.BIN 2>/dev/null | head -1)
if [ -n "$BIN_FILE" ]; then
mv "$BIN_FILE" "$DB_FILE"
echo "Database ready: $DATA_DIR/$DB_FILE"
else
echo "ERROR: No BIN file found in archive"
ls -la
exit 1
fi
fi
# Cleanup
rm -f ip2location.zip *.txt LICENSE* README*
echo ""
echo "Done! Database saved to: $DATA_DIR/$DB_FILE"
echo "Update monthly by re-running this script."

View File

@@ -29,6 +29,11 @@ const TRUSTED_ORIGINS = [
'http://localhost:5173',
];
// Pattern-based trusted origins (wildcards)
const TRUSTED_ORIGIN_PATTERNS = [
/^https:\/\/.*\.cannabrands\.app$/, // *.cannabrands.app
];
// Trusted IPs for internal pod-to-pod communication
const TRUSTED_IPS = [
'127.0.0.1',
@@ -42,9 +47,17 @@ const TRUSTED_IPS = [
function isTrustedRequest(req: Request): boolean {
// Check origin header
const origin = req.headers.origin;
if (origin && TRUSTED_ORIGINS.includes(origin)) {
if (origin) {
if (TRUSTED_ORIGINS.includes(origin)) {
return true;
}
// Check pattern-based origins (wildcards like *.cannabrands.app)
for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
if (pattern.test(origin)) {
return true;
}
}
}
// Check referer header (for same-origin requests without CORS)
const referer = req.headers.referer;
@@ -54,6 +67,18 @@ function isTrustedRequest(req: Request): boolean {
return true;
}
}
// Check pattern-based referers
try {
const refererUrl = new URL(referer);
const refererOrigin = refererUrl.origin;
for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
if (pattern.test(refererOrigin)) {
return true;
}
}
} catch {
// Invalid referer URL, skip
}
}
// Check IP for internal requests (pod-to-pod, localhost)

View File

@@ -0,0 +1,141 @@
/**
* Auto-Migration System
*
* Runs SQL migration files from the migrations/ folder automatically on server startup.
* Uses a schema_migrations table to track which migrations have been applied.
*
* Safe to run multiple times - only applies new migrations.
*/
import { Pool } from 'pg';
import fs from 'fs';
import path from 'path';
const MIGRATIONS_DIR = path.join(__dirname, '../../migrations');
/**
* Ensure schema_migrations table exists
*/
async function ensureMigrationsTable(pool: Pool): Promise<void> {
await pool.query(`
CREATE TABLE IF NOT EXISTS schema_migrations (
id SERIAL PRIMARY KEY,
name VARCHAR(255) UNIQUE NOT NULL,
applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
)
`);
}
/**
* Get list of already-applied migrations
*/
async function getAppliedMigrations(pool: Pool): Promise<Set<string>> {
const result = await pool.query('SELECT name FROM schema_migrations');
return new Set(result.rows.map(row => row.name));
}
/**
* Get list of migration files from disk
*/
function getMigrationFiles(): string[] {
if (!fs.existsSync(MIGRATIONS_DIR)) {
console.log('[AutoMigrate] No migrations directory found');
return [];
}
return fs.readdirSync(MIGRATIONS_DIR)
.filter(f => f.endsWith('.sql'))
.sort(); // Sort alphabetically (001_, 002_, etc.)
}
/**
* Run a single migration file
*/
async function runMigration(pool: Pool, filename: string): Promise<void> {
const filepath = path.join(MIGRATIONS_DIR, filename);
const sql = fs.readFileSync(filepath, 'utf8');
const client = await pool.connect();
try {
await client.query('BEGIN');
// Run the migration SQL
await client.query(sql);
// Record that this migration was applied
await client.query(
'INSERT INTO schema_migrations (name) VALUES ($1) ON CONFLICT (name) DO NOTHING',
[filename]
);
await client.query('COMMIT');
console.log(`[AutoMigrate] ✓ Applied: ${filename}`);
} catch (error: any) {
await client.query('ROLLBACK');
console.error(`[AutoMigrate] ✗ Failed: ${filename}`);
throw error;
} finally {
client.release();
}
}
/**
* Run all pending migrations
*
* @param pool - Database connection pool
* @returns Number of migrations applied
*/
export async function runAutoMigrations(pool: Pool): Promise<number> {
console.log('[AutoMigrate] Checking for pending migrations...');
try {
// Ensure migrations table exists
await ensureMigrationsTable(pool);
// Get applied and available migrations
const applied = await getAppliedMigrations(pool);
const available = getMigrationFiles();
// Find pending migrations
const pending = available.filter(f => !applied.has(f));
if (pending.length === 0) {
console.log('[AutoMigrate] No pending migrations');
return 0;
}
console.log(`[AutoMigrate] Found ${pending.length} pending migrations`);
// Run each pending migration in order
for (const filename of pending) {
await runMigration(pool, filename);
}
console.log(`[AutoMigrate] Successfully applied ${pending.length} migrations`);
return pending.length;
} catch (error: any) {
console.error('[AutoMigrate] Migration failed:', error.message);
// Don't crash the server - log and continue
// The specific failing migration will have been rolled back
return -1;
}
}
/**
* Check migration status without running anything
*/
export async function checkMigrationStatus(pool: Pool): Promise<{
applied: string[];
pending: string[];
}> {
await ensureMigrationsTable(pool);
const applied = await getAppliedMigrations(pool);
const available = getMigrationFiles();
return {
applied: available.filter(f => applied.has(f)),
pending: available.filter(f => !applied.has(f)),
};
}

View File

@@ -372,6 +372,51 @@ async function runMigrations() {
ON CONFLICT (key) DO NOTHING;
`);
// SEO Pages table
await client.query(`
CREATE TABLE IF NOT EXISTS seo_pages (
id SERIAL PRIMARY KEY,
type VARCHAR(50) NOT NULL,
slug VARCHAR(255) NOT NULL UNIQUE,
page_key VARCHAR(255) NOT NULL,
primary_keyword VARCHAR(255),
status VARCHAR(50) DEFAULT 'pending_generation',
data_source VARCHAR(100),
meta_title VARCHAR(255),
meta_description TEXT,
last_generated_at TIMESTAMPTZ,
last_reviewed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_seo_pages_type ON seo_pages(type);
CREATE INDEX IF NOT EXISTS idx_seo_pages_status ON seo_pages(status);
CREATE INDEX IF NOT EXISTS idx_seo_pages_slug ON seo_pages(slug);
`);
// SEO Page Contents table
await client.query(`
CREATE TABLE IF NOT EXISTS seo_page_contents (
id SERIAL PRIMARY KEY,
page_id INTEGER NOT NULL REFERENCES seo_pages(id) ON DELETE CASCADE,
version INTEGER DEFAULT 1,
blocks JSONB NOT NULL DEFAULT '[]',
meta JSONB NOT NULL DEFAULT '{}',
meta_title VARCHAR(255),
meta_description TEXT,
h1 VARCHAR(255),
canonical_url TEXT,
og_title VARCHAR(255),
og_description TEXT,
og_image_url TEXT,
generated_by VARCHAR(50) DEFAULT 'claude',
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(page_id, version)
);
CREATE INDEX IF NOT EXISTS idx_seo_page_contents_page ON seo_page_contents(page_id);
`);
await client.query('COMMIT');
console.log('✅ Migrations completed successfully');
} catch (error) {

View File

@@ -0,0 +1,200 @@
#!/usr/bin/env npx tsx
/**
* Database Migration Runner
*
* Runs SQL migrations from backend/migrations/*.sql in order.
* Tracks applied migrations in schema_migrations table.
*
* Usage:
* npx tsx src/db/run-migrations.ts
*
* Environment:
* DATABASE_URL or CANNAIQ_DB_* variables
*/
import { Pool } from 'pg';
import * as fs from 'fs/promises';
import * as path from 'path';
import dotenv from 'dotenv';
dotenv.config();
function getConnectionString(): string {
if (process.env.DATABASE_URL) {
return process.env.DATABASE_URL;
}
if (process.env.CANNAIQ_DB_URL) {
return process.env.CANNAIQ_DB_URL;
}
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
const port = process.env.CANNAIQ_DB_PORT || '54320';
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
}
interface MigrationFile {
filename: string;
number: number;
path: string;
}
async function getMigrationFiles(migrationsDir: string): Promise<MigrationFile[]> {
const files = await fs.readdir(migrationsDir);
const migrations: MigrationFile[] = files
.filter(f => f.endsWith('.sql'))
.map(filename => {
// Extract number from filename like "005_api_tokens.sql" or "073_proxy_timezone.sql"
const match = filename.match(/^(\d+)_/);
if (!match) return null;
return {
filename,
number: parseInt(match[1], 10),
path: path.join(migrationsDir, filename),
};
})
.filter((m): m is MigrationFile => m !== null)
.sort((a, b) => a.number - b.number);
return migrations;
}
async function ensureMigrationsTable(pool: Pool): Promise<void> {
// Migrate to filename-based tracking (handles duplicate version numbers)
// Check if old version-based PK exists
const pkCheck = await pool.query(`
SELECT constraint_name FROM information_schema.table_constraints
WHERE table_name = 'schema_migrations' AND constraint_type = 'PRIMARY KEY'
`);
if (pkCheck.rows.length === 0) {
// Table doesn't exist, create with filename as PK
await pool.query(`
CREATE TABLE IF NOT EXISTS schema_migrations (
filename VARCHAR(255) NOT NULL PRIMARY KEY,
version VARCHAR(10),
name VARCHAR(255),
applied_at TIMESTAMPTZ DEFAULT NOW()
)
`);
} else {
// Table exists - add filename column if missing
await pool.query(`
ALTER TABLE schema_migrations ADD COLUMN IF NOT EXISTS filename VARCHAR(255)
`);
// Populate filename from version+name for existing rows
await pool.query(`
UPDATE schema_migrations SET filename = version || '_' || name || '.sql'
WHERE filename IS NULL
`);
}
}
async function getAppliedMigrations(pool: Pool): Promise<Set<string>> {
// Try filename first, fall back to version_name combo
const result = await pool.query(`
SELECT COALESCE(filename, version || '_' || name || '.sql') as filename
FROM schema_migrations
`);
return new Set(result.rows.map(r => r.filename));
}
async function applyMigration(pool: Pool, migration: MigrationFile): Promise<void> {
const sql = await fs.readFile(migration.path, 'utf-8');
// Extract version and name from filename like "005_api_tokens.sql"
const version = String(migration.number).padStart(3, '0');
const name = migration.filename.replace(/^\d+_/, '').replace(/\.sql$/, '');
const client = await pool.connect();
try {
await client.query('BEGIN');
// Run the migration SQL
await client.query(sql);
// Record that it was applied - use INSERT with ON CONFLICT for safety
await client.query(`
INSERT INTO schema_migrations (filename, version, name)
VALUES ($1, $2, $3)
ON CONFLICT DO NOTHING
`, [migration.filename, version, name]);
await client.query('COMMIT');
} catch (error) {
await client.query('ROLLBACK');
throw error;
} finally {
client.release();
}
}
async function main() {
const pool = new Pool({ connectionString: getConnectionString() });
// Migrations directory relative to this file
const migrationsDir = path.resolve(__dirname, '../../migrations');
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ DATABASE MIGRATION RUNNER ║');
console.log('╚════════════════════════════════════════════════════════════╝');
console.log(`Migrations dir: ${migrationsDir}`);
console.log('');
try {
// Ensure tracking table exists
await ensureMigrationsTable(pool);
// Get all migration files
const allMigrations = await getMigrationFiles(migrationsDir);
console.log(`Found ${allMigrations.length} migration files`);
// Get already-applied migrations
const applied = await getAppliedMigrations(pool);
console.log(`Already applied: ${applied.size} migrations`);
console.log('');
// Find pending migrations (compare by filename)
const pending = allMigrations.filter(m => !applied.has(m.filename));
if (pending.length === 0) {
console.log('✅ No pending migrations. Database is up to date.');
await pool.end();
return;
}
console.log(`Pending migrations: ${pending.length}`);
console.log('─'.repeat(60));
// Apply each pending migration
for (const migration of pending) {
process.stdout.write(` ${migration.filename}... `);
try {
await applyMigration(pool, migration);
console.log('✅');
} catch (error: any) {
console.log('❌');
console.error(`\nError applying ${migration.filename}:`);
console.error(error.message);
process.exit(1);
}
}
console.log('');
console.log('═'.repeat(60));
console.log(`✅ Applied ${pending.length} migrations successfully`);
} catch (error: any) {
console.error('Migration runner failed:', error.message);
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -3,14 +3,23 @@
*
* Main orchestrator for the Dutchie store discovery pipeline.
*
* Flow:
* 1. Discover cities from Dutchie (or use seeded cities)
* 2. For each city, discover store locations
* 3. Upsert all data to discovery tables
* 4. Admin verifies locations manually
* 5. Verified locations are promoted to canonical dispensaries
* AUTOMATED FLOW (as of 2025-01):
* 1. Fetch cities dynamically from Dutchie GraphQL (getAllCitiesByState)
* 2. For each city, discover store locations via ConsumerDispensaries query
* 3. Upsert locations to dutchie_discovery_locations (keyed by platform_location_id)
* 4. AUTO-VALIDATE: Check required fields (name, city, state, platform_menu_url, platform_location_id)
* 5. AUTO-PROMOTE: Valid locations are upserted to dispensaries table with crawl_enabled=true
* 6. All actions logged to dutchie_promotion_log for audit
*
* This module does NOT create canonical dispensaries automatically.
* Tables involved:
* - dutchie_discovery_cities: Known cities for each state
* - dutchie_discovery_locations: Raw discovered store data
* - dispensaries: Canonical store records (promoted from discovery)
* - dutchie_promotion_log: Audit trail for validation/promotion
*
* Usage:
* npx tsx src/scripts/run-discovery.ts discover:state AZ
* npx tsx src/scripts/run-discovery.ts discover:state CA
*/
import { Pool } from 'pg';
@@ -24,11 +33,12 @@ import {
getCitiesToCrawl,
getCityBySlug,
seedKnownCities,
ARIZONA_CITIES,
} from './city-discovery';
import {
discoverLocationsForCity,
getCitiesForState,
} from './location-discovery';
import { promoteDiscoveredLocations } from './promotion';
// ============================================================
// FULL DISCOVERY
@@ -162,6 +172,42 @@ export async function runFullDiscovery(
console.log(`Errors: ${totalErrors}`);
}
// Step 4: Auto-validate and promote discovered locations
if (!dryRun && totalLocationsUpserted > 0) {
console.log('\n[Discovery] Step 4: Auto-promoting discovered locations...');
const promotionResult = await promoteDiscoveredLocations(stateCode, false);
console.log(`[Discovery] Promotion complete:`);
console.log(` Created: ${promotionResult.created} new dispensaries`);
console.log(` Updated: ${promotionResult.updated} existing dispensaries`);
console.log(` Rejected: ${promotionResult.rejected} (validation failed)`);
if (promotionResult.rejectedRecords.length > 0) {
console.log(` Rejection reasons:`);
promotionResult.rejectedRecords.slice(0, 5).forEach(r => {
console.log(` - ${r.name}: ${r.errors.join(', ')}`);
});
if (promotionResult.rejectedRecords.length > 5) {
console.log(` ... and ${promotionResult.rejectedRecords.length - 5} more`);
}
}
}
// Step 5: Detect dropped stores (in DB but not in discovery results)
if (!dryRun) {
console.log('\n[Discovery] Step 5: Detecting dropped stores...');
const droppedResult = await detectDroppedStores(pool, stateCode);
if (droppedResult.droppedCount > 0) {
console.log(`[Discovery] Found ${droppedResult.droppedCount} dropped stores:`);
droppedResult.droppedStores.slice(0, 10).forEach(s => {
console.log(` - ${s.name} (${s.city}, ${s.state}) - last seen: ${s.lastSeenAt}`);
});
if (droppedResult.droppedCount > 10) {
console.log(` ... and ${droppedResult.droppedCount - 10} more`);
}
} else {
console.log(`[Discovery] No dropped stores detected`);
}
}
return {
cities: cityResult,
locations: locationResults,
@@ -171,6 +217,107 @@ export async function runFullDiscovery(
};
}
// ============================================================
// DROPPED STORE DETECTION
// ============================================================
export interface DroppedStoreResult {
droppedCount: number;
droppedStores: Array<{
id: number;
name: string;
city: string;
state: string;
platformDispensaryId: string;
lastSeenAt: string;
}>;
}
/**
* Detect stores that exist in dispensaries but were not found in discovery.
* Marks them as status='dropped' for manual review.
*
* A store is considered "dropped" if:
* 1. It has a platform_dispensary_id (was verified via Dutchie)
* 2. It was NOT seen in the latest discovery crawl (last_seen_at in discovery < 24h ago)
* 3. It's currently marked as 'open' status
*/
export async function detectDroppedStores(
pool: Pool,
stateCode?: string
): Promise<DroppedStoreResult> {
// Find dispensaries that:
// 1. Have platform_dispensary_id (verified Dutchie stores)
// 2. Are currently 'open' status
// 3. Have a linked discovery record that wasn't seen in the last discovery run
// (last_seen_at in dutchie_discovery_locations is older than 24 hours)
const params: any[] = [];
let stateFilter = '';
if (stateCode) {
stateFilter = ` AND d.state = $1`;
params.push(stateCode);
}
const query = `
WITH recently_seen AS (
SELECT DISTINCT platform_location_id
FROM dutchie_discovery_locations
WHERE last_seen_at > NOW() - INTERVAL '24 hours'
AND active = true
)
SELECT
d.id,
d.name,
d.city,
d.state,
d.platform_dispensary_id,
d.updated_at as last_seen_at
FROM dispensaries d
WHERE d.platform_dispensary_id IS NOT NULL
AND d.platform = 'dutchie'
AND (d.status = 'open' OR d.status IS NULL)
AND d.crawl_enabled = true
AND d.platform_dispensary_id NOT IN (SELECT platform_location_id FROM recently_seen)
${stateFilter}
ORDER BY d.name
`;
const result = await pool.query(query, params);
const droppedStores = result.rows;
// Mark these stores as 'dropped' status
if (droppedStores.length > 0) {
const ids = droppedStores.map(s => s.id);
await pool.query(`
UPDATE dispensaries
SET status = 'dropped', updated_at = NOW()
WHERE id = ANY($1::int[])
`, [ids]);
// Log to promotion log for audit
for (const store of droppedStores) {
await pool.query(`
INSERT INTO dutchie_promotion_log
(dispensary_id, action, state_code, store_name, triggered_by)
VALUES ($1, 'dropped', $2, $3, 'discovery_detection')
`, [store.id, store.state, store.name]);
}
}
return {
droppedCount: droppedStores.length,
droppedStores: droppedStores.map(s => ({
id: s.id,
name: s.name,
city: s.city,
state: s.state,
platformDispensaryId: s.platform_dispensary_id,
lastSeenAt: s.last_seen_at,
})),
};
}
// ============================================================
// SINGLE CITY DISCOVERY
// ============================================================
@@ -235,11 +382,19 @@ export async function discoverState(
console.log(`[Discovery] Discovering state: ${stateCode}`);
// Seed known cities for this state
if (stateCode === 'AZ') {
console.log('[Discovery] Seeding Arizona cities...');
const seeded = await seedKnownCities(pool, ARIZONA_CITIES);
console.log(`[Discovery] Seeded ${seeded.created} new cities, ${seeded.updated} updated`);
// Dynamically fetch and seed cities for this state
console.log(`[Discovery] Fetching cities for ${stateCode} from Dutchie...`);
const cityNames = await getCitiesForState(stateCode);
if (cityNames.length > 0) {
const cities = cityNames.map(name => ({
name,
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
stateCode,
}));
const seeded = await seedKnownCities(pool, cities);
console.log(`[Discovery] Seeded ${seeded.created} new cities, ${seeded.updated} updated for ${stateCode}`);
} else {
console.log(`[Discovery] No cities found for ${stateCode}`);
}
// Run full discovery for this state

View File

@@ -13,7 +13,6 @@ export {
getCitiesToCrawl,
getCityBySlug,
seedKnownCities,
ARIZONA_CITIES,
} from './city-discovery';
// Location Discovery
@@ -33,5 +32,17 @@ export {
DiscoveryStats,
} from './discovery-crawler';
// Promotion
export {
validateForPromotion,
validateDiscoveredLocations,
promoteDiscoveredLocations,
promoteSingleLocation,
ValidationResult,
ValidationSummary,
PromotionResult,
PromotionSummary,
} from './promotion';
// Routes
export { createDiscoveryRoutes } from './routes';

View File

@@ -134,10 +134,10 @@ export interface StateWithCities {
}
/**
* Fetch all states with their cities from Dutchie's __NEXT_DATA__
* Fetch all states with their cities via direct GraphQL query
*
* This fetches a city page and extracts the statesWithDispensaries data
* which contains all states and their cities where Dutchie has dispensaries.
* Uses the getAllCitiesByState persisted query which returns all states
* and cities where Dutchie has dispensaries.
*/
export async function fetchStatesWithDispensaries(
options: { verbose?: boolean } = {}
@@ -147,71 +147,36 @@ export async function fetchStatesWithDispensaries(
// Initialize proxy if USE_PROXY=true
await initDiscoveryProxy();
console.log('[LocationDiscovery] Fetching statesWithDispensaries from Dutchie...');
console.log('[LocationDiscovery] Fetching statesWithDispensaries via GraphQL...');
// Fetch any city page to get the __NEXT_DATA__ with statesWithDispensaries
// Using a known city that's likely to exist
const result = await fetchPage('/dispensaries/az/phoenix', { maxRetries: 3 });
if (!result || result.status !== 200) {
console.error('[LocationDiscovery] Failed to fetch city page');
return [];
}
const nextData = extractNextData(result.html);
if (!nextData) {
console.error('[LocationDiscovery] No __NEXT_DATA__ found');
return [];
}
// Extract statesWithDispensaries from Apollo state
const apolloState = nextData.props?.pageProps?.initialApolloState;
if (!apolloState) {
console.error('[LocationDiscovery] No initialApolloState found');
return [];
}
// Find ROOT_QUERY.statesWithDispensaries
const rootQuery = apolloState['ROOT_QUERY'];
if (!rootQuery) {
console.error('[LocationDiscovery] No ROOT_QUERY found');
return [];
}
// The statesWithDispensaries is at ROOT_QUERY.statesWithDispensaries
const statesRefs = rootQuery.statesWithDispensaries;
if (!Array.isArray(statesRefs)) {
console.error('[LocationDiscovery] statesWithDispensaries not found or not an array');
return [];
}
// Resolve the references to actual state data
const states: StateWithCities[] = [];
for (const ref of statesRefs) {
// ref might be { __ref: "StateWithDispensaries:0" } or direct object
let stateData: any;
if (ref && ref.__ref) {
stateData = apolloState[ref.__ref];
} else {
stateData = ref;
}
if (stateData && stateData.name) {
// Parse cities JSON array if it's a string
let cities = stateData.cities;
if (typeof cities === 'string') {
try {
cities = JSON.parse(cities);
} catch {
cities = [];
}
// Use direct GraphQL query - much cleaner than scraping __NEXT_DATA__
const result = await executeGraphQL(
'getAllCitiesByState',
{}, // No variables needed
GRAPHQL_HASHES.GetAllCitiesByState,
{ maxRetries: 3, retryOn403: true }
);
const statesData = result?.data?.statesWithDispensaries;
if (!Array.isArray(statesData)) {
console.error('[LocationDiscovery] statesWithDispensaries not found in response');
return [];
}
// Map to our StateWithCities format
const states: StateWithCities[] = [];
for (const state of statesData) {
if (state && state.name) {
// Filter out null cities
const cities = Array.isArray(state.cities)
? state.cities.filter((c: string | null) => c !== null)
: [];
states.push({
name: stateData.name,
country: stateData.country || 'US',
cities: Array.isArray(cities) ? cities : [],
name: state.name,
country: state.country || 'US',
cities,
});
}
}
@@ -225,6 +190,10 @@ export async function fetchStatesWithDispensaries(
console.log(`[LocationDiscovery] Loaded ${states.length} states with cities`);
return states;
} catch (error: any) {
console.error(`[LocationDiscovery] Failed to fetch states: ${error.message}`);
return [];
}
}
/**
@@ -751,31 +720,57 @@ async function scrapeLocationCards(
/**
* Normalize a raw location response to a consistent format.
* Maps Dutchie camelCase fields to our snake_case equivalents.
*/
function normalizeLocationResponse(raw: any): DutchieLocationResponse {
const slug = raw.slug || raw.cName || raw.urlSlug || '';
const id = raw.id || raw._id || raw.dispensaryId || '';
// Extract location data - GraphQL response nests address info in .location
const loc = raw.location || {};
// Extract coordinates from geometry.coordinates [longitude, latitude]
const coords = loc.geometry?.coordinates || [];
const longitude = coords[0] || raw.longitude || raw.lng || loc.longitude || loc.lng;
const latitude = coords[1] || raw.latitude || raw.lat || loc.latitude || loc.lat;
return {
id,
name: raw.name || raw.dispensaryName || '',
slug,
address: raw.address || raw.fullAddress || '',
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || '',
address2: raw.address2 || raw.addressLine2 || '',
city: raw.city || '',
state: raw.state || raw.stateCode || '',
zip: raw.zip || raw.zipCode || raw.postalCode || '',
country: raw.country || raw.countryCode || 'US',
latitude: raw.latitude || raw.lat || raw.location?.latitude,
longitude: raw.longitude || raw.lng || raw.location?.longitude,
cName: raw.cName || raw.slug || '',
address: raw.address || raw.fullAddress || loc.ln1 || '',
address1: raw.address1 || raw.addressLine1 || raw.streetAddress || loc.ln1 || '',
address2: raw.address2 || raw.addressLine2 || loc.ln2 || '',
city: raw.city || loc.city || '',
state: raw.state || raw.stateCode || loc.state || '',
zip: raw.zip || raw.zipCode || raw.postalCode || loc.zipcode || loc.zip || '',
country: raw.country || raw.countryCode || loc.country || 'United States',
latitude,
longitude,
timezone: raw.timezone || raw.tz || '',
menuUrl: raw.menuUrl || (slug ? `https://dutchie.com/dispensary/${slug}` : ''),
retailType: raw.retailType || raw.type || '',
// Service offerings
offerPickup: raw.offerPickup ?? raw.storeSettings?.offerPickup ?? true,
offerDelivery: raw.offerDelivery ?? raw.storeSettings?.offerDelivery ?? false,
isRecreational: raw.isRecreational ?? raw.retailType?.includes('Recreational') ?? true,
isMedical: raw.isMedical ?? raw.retailType?.includes('Medical') ?? true,
offerCurbsidePickup: raw.offerCurbsidePickup ?? false,
// License types
isRecreational: raw.isRecreational ?? raw.recDispensary ?? raw.retailType?.includes('Recreational') ?? true,
isMedical: raw.isMedical ?? raw.medicalDispensary ?? raw.retailType?.includes('Medical') ?? true,
// Contact info
phone: raw.phone || '',
email: raw.email || '',
website: raw.embedBackUrl || '',
// Branding
description: raw.description || '',
logoImage: raw.logoImage || '',
bannerImage: raw.bannerImage || '',
// Chain/enterprise info
chainSlug: raw.chain || '',
enterpriseId: raw.retailer?.enterpriseId || '',
// Status
status: raw.status || '',
// Preserve raw data
...raw,
};
@@ -826,15 +821,27 @@ export async function upsertLocation(
offers_pickup,
is_recreational,
is_medical,
phone,
website,
email,
description,
logo_image,
banner_image,
chain_slug,
enterprise_id,
c_name,
country,
store_status,
last_seen_at,
updated_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, NOW(), NOW())
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, NOW(), NOW())
ON CONFLICT (platform, platform_location_id)
DO UPDATE SET
name = EXCLUDED.name,
platform_menu_url = EXCLUDED.platform_menu_url,
raw_address = COALESCE(EXCLUDED.raw_address, dutchie_discovery_locations.raw_address),
address_line1 = COALESCE(EXCLUDED.address_line1, dutchie_discovery_locations.address_line1),
address_line2 = COALESCE(EXCLUDED.address_line2, dutchie_discovery_locations.address_line2),
city = COALESCE(EXCLUDED.city, dutchie_discovery_locations.city),
state_code = COALESCE(EXCLUDED.state_code, dutchie_discovery_locations.state_code),
postal_code = COALESCE(EXCLUDED.postal_code, dutchie_discovery_locations.postal_code),
@@ -846,6 +853,17 @@ export async function upsertLocation(
offers_pickup = COALESCE(EXCLUDED.offers_pickup, dutchie_discovery_locations.offers_pickup),
is_recreational = COALESCE(EXCLUDED.is_recreational, dutchie_discovery_locations.is_recreational),
is_medical = COALESCE(EXCLUDED.is_medical, dutchie_discovery_locations.is_medical),
phone = COALESCE(EXCLUDED.phone, dutchie_discovery_locations.phone),
website = COALESCE(EXCLUDED.website, dutchie_discovery_locations.website),
email = COALESCE(EXCLUDED.email, dutchie_discovery_locations.email),
description = COALESCE(EXCLUDED.description, dutchie_discovery_locations.description),
logo_image = COALESCE(EXCLUDED.logo_image, dutchie_discovery_locations.logo_image),
banner_image = COALESCE(EXCLUDED.banner_image, dutchie_discovery_locations.banner_image),
chain_slug = COALESCE(EXCLUDED.chain_slug, dutchie_discovery_locations.chain_slug),
enterprise_id = COALESCE(EXCLUDED.enterprise_id, dutchie_discovery_locations.enterprise_id),
c_name = COALESCE(EXCLUDED.c_name, dutchie_discovery_locations.c_name),
country = COALESCE(EXCLUDED.country, dutchie_discovery_locations.country),
store_status = COALESCE(EXCLUDED.store_status, dutchie_discovery_locations.store_status),
last_seen_at = NOW(),
updated_at = NOW()
RETURNING id, (xmax = 0) as is_new`,
@@ -861,7 +879,7 @@ export async function upsertLocation(
location.city || null,
location.state || null,
location.zip || null,
location.country || 'US',
location.country || 'United States',
location.latitude || null,
location.longitude || null,
location.timezone || null,
@@ -871,6 +889,17 @@ export async function upsertLocation(
location.offerPickup ?? null,
location.isRecreational ?? null,
location.isMedical ?? null,
location.phone || null,
location.website || null,
location.email || null,
location.description || null,
location.logoImage || null,
location.bannerImage || null,
location.chainSlug || null,
location.enterpriseId || null,
location.cName || null,
location.country || 'United States',
location.status || null,
]
);

View File

@@ -0,0 +1,579 @@
/**
* Discovery Promotion Service
*
* Handles the promotion of discovery locations to dispensaries:
* 1. Discovery → Raw data in dutchie_discovery_locations (status='discovered')
* 2. Validation → Check required fields, reject incomplete records
* 3. Promotion → Idempotent upsert to dispensaries, link back via dispensary_id
*/
import { pool } from '../db/pool';
import { DiscoveryLocationRow, DiscoveryStatus } from './types';
// ============================================================
// VALIDATION
// ============================================================
export interface ValidationResult {
valid: boolean;
errors: string[];
}
export interface ValidationSummary {
totalChecked: number;
validCount: number;
invalidCount: number;
invalidRecords: Array<{
id: number;
name: string;
errors: string[];
}>;
}
/**
* Validate a single discovery location has all required fields for promotion
*/
export function validateForPromotion(loc: DiscoveryLocationRow): ValidationResult {
const errors: string[] = [];
// Required fields
if (!loc.platform_location_id) {
errors.push('Missing platform_location_id');
}
if (!loc.name || loc.name.trim() === '') {
errors.push('Missing name');
}
if (!loc.city || loc.city.trim() === '') {
errors.push('Missing city');
}
if (!loc.state_code || loc.state_code.trim() === '') {
errors.push('Missing state_code');
}
if (!loc.platform_menu_url) {
errors.push('Missing platform_menu_url');
}
return {
valid: errors.length === 0,
errors,
};
}
/**
* Validate all discovered locations and return summary
*/
export async function validateDiscoveredLocations(
stateCode?: string
): Promise<ValidationSummary> {
let query = `
SELECT * FROM dutchie_discovery_locations
WHERE status = 'discovered'
`;
const params: string[] = [];
if (stateCode) {
query += ` AND state_code = $1`;
params.push(stateCode);
}
const result = await pool.query(query, params);
const locations = result.rows as DiscoveryLocationRow[];
const invalidRecords: ValidationSummary['invalidRecords'] = [];
let validCount = 0;
for (const loc of locations) {
const validation = validateForPromotion(loc);
if (validation.valid) {
validCount++;
} else {
invalidRecords.push({
id: loc.id,
name: loc.name,
errors: validation.errors,
});
}
}
return {
totalChecked: locations.length,
validCount,
invalidCount: invalidRecords.length,
invalidRecords,
};
}
// ============================================================
// PROMOTION
// ============================================================
export interface PromotionResult {
discoveryId: number;
dispensaryId: number;
action: 'created' | 'updated' | 'skipped';
name: string;
}
export interface PromotionSummary {
totalProcessed: number;
created: number;
updated: number;
skipped: number;
rejected: number;
results: PromotionResult[];
rejectedRecords: Array<{
id: number;
name: string;
errors: string[];
}>;
durationMs: number;
}
/**
* Generate a URL-safe slug from name and city
*/
function generateSlug(name: string, city: string, state: string): string {
const base = `${name}-${city}-${state}`
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '')
.substring(0, 100);
return base;
}
/**
* Log a promotion action to dutchie_promotion_log
*/
async function logPromotionAction(
action: string,
discoveryId: number | null,
dispensaryId: number | null,
stateCode: string | null,
storeName: string | null,
validationErrors: string[] | null = null,
fieldChanges: Record<string, any> | null = null,
triggeredBy: string = 'auto'
): Promise<void> {
await pool.query(`
INSERT INTO dutchie_promotion_log
(discovery_id, dispensary_id, action, state_code, store_name, validation_errors, field_changes, triggered_by)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
`, [
discoveryId,
dispensaryId,
action,
stateCode,
storeName,
validationErrors,
fieldChanges ? JSON.stringify(fieldChanges) : null,
triggeredBy,
]);
}
/**
* Create a status alert for the dashboard
*/
export async function createStatusAlert(
dispensaryId: number,
profileId: number | null,
alertType: string,
severity: 'info' | 'warning' | 'error' | 'critical',
message: string,
previousStatus?: string | null,
newStatus?: string | null,
metadata?: Record<string, any>
): Promise<number> {
const result = await pool.query(`
INSERT INTO crawler_status_alerts
(dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
RETURNING id
`, [
dispensaryId,
profileId,
alertType,
severity,
message,
previousStatus || null,
newStatus || null,
metadata ? JSON.stringify(metadata) : null,
]);
return result.rows[0].id;
}
/**
* Create or update crawler profile for a dispensary with initial sandbox status
*/
async function ensureCrawlerProfile(
dispensaryId: number,
dispensaryName: string,
platformDispensaryId: string
): Promise<{ profileId: number; created: boolean }> {
// Check if profile already exists
const existingResult = await pool.query(`
SELECT id FROM dispensary_crawler_profiles
WHERE dispensary_id = $1 AND enabled = true
LIMIT 1
`, [dispensaryId]);
if (existingResult.rows.length > 0) {
return { profileId: existingResult.rows[0].id, created: false };
}
// Create new profile with sandbox status
const profileKey = dispensaryName
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '')
.substring(0, 50);
const insertResult = await pool.query(`
INSERT INTO dispensary_crawler_profiles (
dispensary_id,
profile_name,
profile_key,
crawler_type,
status,
status_reason,
status_changed_at,
config,
enabled,
consecutive_successes,
consecutive_failures,
created_at,
updated_at
) VALUES (
$1, $2, $3, 'dutchie', 'sandbox', 'Newly promoted from discovery', CURRENT_TIMESTAMP,
$4::jsonb, true, 0, 0, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
)
RETURNING id
`, [
dispensaryId,
dispensaryName,
profileKey,
JSON.stringify({
platformDispensaryId,
useBothModes: true,
downloadImages: true,
trackStock: true,
}),
]);
const profileId = insertResult.rows[0].id;
// Create status alert for new sandbox store
await createStatusAlert(
dispensaryId,
profileId,
'promoted',
'info',
`${dispensaryName} promoted to sandbox - awaiting first successful crawl`,
null,
'sandbox',
{ source: 'discovery_promotion', platformDispensaryId }
);
return { profileId, created: true };
}
/**
* Promote a single discovery location to dispensaries table
* Idempotent: uses ON CONFLICT on platform_dispensary_id
*/
async function promoteLocation(
loc: DiscoveryLocationRow
): Promise<PromotionResult> {
const slug = loc.platform_slug || generateSlug(loc.name, loc.city || '', loc.state_code || '');
// Upsert into dispensaries
// ON CONFLICT by platform_dispensary_id ensures idempotency
const upsertResult = await pool.query(`
INSERT INTO dispensaries (
platform,
name,
slug,
city,
state,
address1,
address2,
zipcode,
postal_code,
phone,
website,
email,
latitude,
longitude,
timezone,
platform_dispensary_id,
menu_url,
menu_type,
description,
logo_image,
banner_image,
offer_pickup,
offer_delivery,
is_medical,
is_recreational,
chain_slug,
enterprise_id,
c_name,
country,
status,
crawl_enabled,
dutchie_verified,
dutchie_verified_at,
dutchie_discovery_id,
created_at,
updated_at
) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
$21, $22, $23, $24, $25, $26, $27, $28, $29, $30,
$31, $32, $33, $34, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
)
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
DO UPDATE SET
name = EXCLUDED.name,
city = EXCLUDED.city,
state = EXCLUDED.state,
address1 = EXCLUDED.address1,
address2 = EXCLUDED.address2,
zipcode = EXCLUDED.zipcode,
postal_code = EXCLUDED.postal_code,
phone = EXCLUDED.phone,
website = EXCLUDED.website,
email = EXCLUDED.email,
latitude = EXCLUDED.latitude,
longitude = EXCLUDED.longitude,
timezone = EXCLUDED.timezone,
menu_url = EXCLUDED.menu_url,
description = EXCLUDED.description,
logo_image = EXCLUDED.logo_image,
banner_image = EXCLUDED.banner_image,
offer_pickup = EXCLUDED.offer_pickup,
offer_delivery = EXCLUDED.offer_delivery,
is_medical = EXCLUDED.is_medical,
is_recreational = EXCLUDED.is_recreational,
chain_slug = EXCLUDED.chain_slug,
enterprise_id = EXCLUDED.enterprise_id,
c_name = EXCLUDED.c_name,
country = EXCLUDED.country,
status = EXCLUDED.status,
dutchie_discovery_id = EXCLUDED.dutchie_discovery_id,
updated_at = CURRENT_TIMESTAMP
RETURNING id, (xmax = 0) AS inserted
`, [
loc.platform || 'dutchie', // $1 platform
loc.name, // $2 name
slug, // $3 slug
loc.city, // $4 city
loc.state_code, // $5 state
loc.address_line1, // $6 address1
loc.address_line2, // $7 address2
loc.postal_code, // $8 zipcode
loc.postal_code, // $9 postal_code
loc.phone, // $10 phone
loc.website, // $11 website
loc.email, // $12 email
loc.latitude, // $13 latitude
loc.longitude, // $14 longitude
loc.timezone, // $15 timezone
loc.platform_location_id, // $16 platform_dispensary_id
loc.platform_menu_url, // $17 menu_url
'dutchie', // $18 menu_type
loc.description, // $19 description
loc.logo_image, // $20 logo_image
loc.banner_image, // $21 banner_image
loc.offers_pickup ?? true, // $22 offer_pickup
loc.offers_delivery ?? false, // $23 offer_delivery
loc.is_medical ?? false, // $24 is_medical
loc.is_recreational ?? true, // $25 is_recreational
loc.chain_slug, // $26 chain_slug
loc.enterprise_id, // $27 enterprise_id
loc.c_name, // $28 c_name
loc.country || 'United States', // $29 country
loc.store_status || 'open', // $30 status
true, // $31 crawl_enabled
true, // $32 dutchie_verified
new Date(), // $33 dutchie_verified_at
loc.id, // $34 dutchie_discovery_id
]);
const dispensaryId = upsertResult.rows[0].id;
const wasInserted = upsertResult.rows[0].inserted;
// Link discovery location back to dispensary and update status
await pool.query(`
UPDATE dutchie_discovery_locations
SET
dispensary_id = $1,
status = 'verified',
verified_at = CURRENT_TIMESTAMP,
verified_by = 'auto-promotion'
WHERE id = $2
`, [dispensaryId, loc.id]);
// Create crawler profile with sandbox status for new dispensaries
if (wasInserted && loc.platform_location_id) {
await ensureCrawlerProfile(dispensaryId, loc.name, loc.platform_location_id);
}
const action = wasInserted ? 'promoted_create' : 'promoted_update';
// Log the promotion
await logPromotionAction(
action,
loc.id,
dispensaryId,
loc.state_code,
loc.name,
null,
{ slug, city: loc.city, platform_location_id: loc.platform_location_id }
);
return {
discoveryId: loc.id,
dispensaryId,
action: wasInserted ? 'created' : 'updated',
name: loc.name,
};
}
/**
* Promote all valid discovered locations to dispensaries
*
* @param stateCode Optional filter by state (e.g., 'CA', 'AZ')
* @param dryRun If true, only validate without making changes
*/
export async function promoteDiscoveredLocations(
stateCode?: string,
dryRun = false
): Promise<PromotionSummary> {
const startTime = Date.now();
let query = `
SELECT * FROM dutchie_discovery_locations
WHERE status = 'discovered'
`;
const params: string[] = [];
if (stateCode) {
query += ` AND state_code = $1`;
params.push(stateCode);
}
query += ` ORDER BY id`;
const result = await pool.query(query, params);
const locations = result.rows as DiscoveryLocationRow[];
const results: PromotionResult[] = [];
const rejectedRecords: PromotionSummary['rejectedRecords'] = [];
let created = 0;
let updated = 0;
let skipped = 0;
let rejected = 0;
for (const loc of locations) {
// Step 2: Validation
const validation = validateForPromotion(loc);
if (!validation.valid) {
rejected++;
rejectedRecords.push({
id: loc.id,
name: loc.name,
errors: validation.errors,
});
// Mark as rejected if not dry run
if (!dryRun) {
await pool.query(`
UPDATE dutchie_discovery_locations
SET status = 'rejected', notes = $1
WHERE id = $2
`, [validation.errors.join('; '), loc.id]);
// Log the rejection
await logPromotionAction(
'rejected',
loc.id,
null,
loc.state_code,
loc.name,
validation.errors
);
}
continue;
}
// Step 3: Promotion (skip if dry run)
if (dryRun) {
skipped++;
results.push({
discoveryId: loc.id,
dispensaryId: 0,
action: 'skipped',
name: loc.name,
});
continue;
}
try {
const promotionResult = await promoteLocation(loc);
results.push(promotionResult);
if (promotionResult.action === 'created') {
created++;
} else {
updated++;
}
} catch (error: any) {
console.error(`Failed to promote location ${loc.id} (${loc.name}):`, error.message);
rejected++;
rejectedRecords.push({
id: loc.id,
name: loc.name,
errors: [`Promotion error: ${error.message}`],
});
}
}
return {
totalProcessed: locations.length,
created,
updated,
skipped,
rejected,
results,
rejectedRecords,
durationMs: Date.now() - startTime,
};
}
/**
* Promote a single discovery location by ID
*/
export async function promoteSingleLocation(
discoveryId: number
): Promise<PromotionResult> {
const result = await pool.query(
`SELECT * FROM dutchie_discovery_locations WHERE id = $1`,
[discoveryId]
);
if (result.rows.length === 0) {
throw new Error(`Discovery location ${discoveryId} not found`);
}
const loc = result.rows[0] as DiscoveryLocationRow;
// Validate
const validation = validateForPromotion(loc);
if (!validation.valid) {
throw new Error(`Validation failed: ${validation.errors.join(', ')}`);
}
// Promote
return promoteLocation(loc);
}

View File

@@ -18,8 +18,8 @@ import {
getCitiesToCrawl,
getCityBySlug,
seedKnownCities,
ARIZONA_CITIES,
} from './city-discovery';
import { getCitiesForState } from './location-discovery';
import {
DiscoveryLocation,
DiscoveryCity,
@@ -27,6 +27,11 @@ import {
mapLocationRowToLocation,
mapCityRowToCity,
} from './types';
import {
validateDiscoveredLocations,
promoteDiscoveredLocations,
promoteSingleLocation,
} from './promotion';
export function createDiscoveryRoutes(pool: Pool): Router {
const router = Router();
@@ -53,44 +58,44 @@ export function createDiscoveryRoutes(pool: Pool): Router {
offset = '0',
} = req.query;
let whereClause = 'WHERE platform = $1 AND active = TRUE';
let whereClause = 'WHERE dl.platform = $1 AND dl.active = TRUE';
const params: any[] = [platform];
let paramIndex = 2;
if (status) {
whereClause += ` AND status = $${paramIndex}`;
whereClause += ` AND dl.status = $${paramIndex}`;
params.push(status);
paramIndex++;
}
if (stateCode) {
whereClause += ` AND state_code = $${paramIndex}`;
whereClause += ` AND dl.state_code = $${paramIndex}`;
params.push(stateCode);
paramIndex++;
}
if (countryCode) {
whereClause += ` AND country_code = $${paramIndex}`;
whereClause += ` AND dl.country_code = $${paramIndex}`;
params.push(countryCode);
paramIndex++;
}
if (city) {
whereClause += ` AND city ILIKE $${paramIndex}`;
whereClause += ` AND dl.city ILIKE $${paramIndex}`;
params.push(`%${city}%`);
paramIndex++;
}
if (search) {
whereClause += ` AND (name ILIKE $${paramIndex} OR platform_slug ILIKE $${paramIndex})`;
whereClause += ` AND (dl.name ILIKE $${paramIndex} OR dl.platform_slug ILIKE $${paramIndex})`;
params.push(`%${search}%`);
paramIndex++;
}
if (hasDispensary === 'true') {
whereClause += ' AND dispensary_id IS NOT NULL';
whereClause += ' AND dl.dispensary_id IS NOT NULL';
} else if (hasDispensary === 'false') {
whereClause += ' AND dispensary_id IS NULL';
whereClause += ' AND dl.dispensary_id IS NULL';
}
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
@@ -705,15 +710,22 @@ export function createDiscoveryRoutes(pool: Pool): Router {
return res.status(400).json({ error: 'stateCode is required' });
}
let cities: any[] = [];
if (stateCode === 'AZ') {
cities = ARIZONA_CITIES;
} else {
// Dynamically fetch cities from Dutchie for any state
const cityNames = await getCitiesForState(stateCode as string);
if (cityNames.length === 0) {
return res.status(400).json({
error: `No predefined cities for state: ${stateCode}. Add cities to city-discovery.ts`,
error: `No cities found for state: ${stateCode}`,
});
}
// Convert to seed format
const cities = cityNames.map(name => ({
name,
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
stateCode: stateCode as string,
}));
const result = await seedKnownCities(pool, cities);
res.json({
@@ -834,6 +846,136 @@ export function createDiscoveryRoutes(pool: Pool): Router {
}
});
// ============================================================
// PROMOTION ENDPOINTS
// ============================================================
/**
* GET /api/discovery/admin/validate
* Validate discovered locations before promotion
*/
router.get('/admin/validate', async (req: Request, res: Response) => {
try {
const { stateCode } = req.query;
const summary = await validateDiscoveredLocations(stateCode as string | undefined);
res.json({
success: true,
...summary,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/discovery/admin/promote
* Promote all valid discovered locations to dispensaries (idempotent)
*
* Query params:
* - stateCode: Filter by state (e.g., 'CA', 'AZ')
* - dryRun: If true, only validate without making changes
*/
router.post('/admin/promote', async (req: Request, res: Response) => {
try {
const { stateCode, dryRun = false } = req.body;
console.log(`[Discovery API] Starting promotion for ${stateCode || 'all states'} (dryRun=${dryRun})`);
const summary = await promoteDiscoveredLocations(stateCode, dryRun);
res.json({
success: true,
...summary,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/discovery/admin/promote/:id
* Promote a single discovery location by ID
*/
router.post('/admin/promote/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
console.log(`[Discovery API] Promoting single location ${id}`);
const result = await promoteSingleLocation(parseInt(id, 10));
res.json({
success: true,
...result,
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
// ============================================================
// PROMOTION LOG
// ============================================================
/**
* GET /api/discovery/promotion-log
* Get promotion audit log
*/
router.get('/promotion-log', async (req: Request, res: Response) => {
try {
const { state, dispensary_id, limit = '100' } = req.query;
let whereClause = 'WHERE 1=1';
const params: any[] = [];
let paramIndex = 1;
if (state) {
whereClause += ` AND pl.state_code = $${paramIndex}`;
params.push(state);
paramIndex++;
}
if (dispensary_id) {
whereClause += ` AND pl.dispensary_id = $${paramIndex}`;
params.push(parseInt(dispensary_id as string, 10));
paramIndex++;
}
params.push(parseInt(limit as string, 10));
const { rows } = await pool.query(`
SELECT
pl.*,
dl.name as discovery_name,
d.name as dispensary_name
FROM dutchie_promotion_log pl
LEFT JOIN dutchie_discovery_locations dl ON pl.discovery_id = dl.id
LEFT JOIN dispensaries d ON pl.dispensary_id = d.id
${whereClause}
ORDER BY pl.created_at DESC
LIMIT $${paramIndex}
`, params);
res.json({
logs: rows.map((r: any) => ({
id: r.id,
discoveryId: r.discovery_id,
dispensaryId: r.dispensary_id,
action: r.action,
stateCode: r.state_code,
storeName: r.store_name,
validationErrors: r.validation_errors,
fieldChanges: r.field_changes,
triggeredBy: r.triggered_by,
createdAt: r.created_at,
discoveryName: r.discovery_name,
dispensaryName: r.dispensary_name,
})),
});
} catch (error: any) {
res.status(500).json({ error: error.message });
}
});
return router;
}

View File

@@ -60,6 +60,7 @@ export interface DiscoveryLocation {
stateCode: string | null;
postalCode: string | null;
countryCode: string | null;
country: string | null;
latitude: number | null;
longitude: number | null;
timezone: string | null;
@@ -72,6 +73,18 @@ export interface DiscoveryLocation {
offersPickup: boolean | null;
isRecreational: boolean | null;
isMedical: boolean | null;
// New Dutchie fields
phone: string | null;
website: string | null;
email: string | null;
description: string | null;
logoImage: string | null;
bannerImage: string | null;
chainSlug: string | null;
enterpriseId: string | null;
cName: string | null;
storeStatus: string | null;
// Timestamps
firstSeenAt: Date;
lastSeenAt: Date;
lastCheckedAt: Date | null;
@@ -96,6 +109,7 @@ export interface DiscoveryLocationRow {
state_code: string | null;
postal_code: string | null;
country_code: string | null;
country: string | null;
latitude: number | null;
longitude: number | null;
timezone: string | null;
@@ -108,6 +122,18 @@ export interface DiscoveryLocationRow {
offers_pickup: boolean | null;
is_recreational: boolean | null;
is_medical: boolean | null;
// New Dutchie fields (snake_case for DB row)
phone: string | null;
website: string | null;
email: string | null;
description: string | null;
logo_image: string | null;
banner_image: string | null;
chain_slug: string | null;
enterprise_id: string | null;
c_name: string | null;
store_status: string | null;
// Timestamps
first_seen_at: Date;
last_seen_at: Date;
last_checked_at: Date | null;
@@ -245,6 +271,7 @@ export function mapLocationRowToLocation(row: DiscoveryLocationRow): DiscoveryLo
stateCode: row.state_code,
postalCode: row.postal_code,
countryCode: row.country_code,
country: row.country,
latitude: row.latitude,
longitude: row.longitude,
timezone: row.timezone,
@@ -257,6 +284,18 @@ export function mapLocationRowToLocation(row: DiscoveryLocationRow): DiscoveryLo
offersPickup: row.offers_pickup,
isRecreational: row.is_recreational,
isMedical: row.is_medical,
// New Dutchie fields
phone: row.phone,
website: row.website,
email: row.email,
description: row.description,
logoImage: row.logo_image,
bannerImage: row.banner_image,
chainSlug: row.chain_slug,
enterpriseId: row.enterprise_id,
cName: row.c_name,
storeStatus: row.store_status,
// Timestamps
firstSeenAt: row.first_seen_at,
lastSeenAt: row.last_seen_at,
lastCheckedAt: row.last_checked_at,

View File

@@ -16,6 +16,12 @@ import {
NormalizedBrand,
NormalizationResult,
} from './types';
import {
downloadProductImage,
ProductImageContext,
isImageStorageReady,
LocalImageSizes,
} from '../utils/image-storage';
const BATCH_SIZE = 100;
@@ -23,10 +29,21 @@ const BATCH_SIZE = 100;
// PRODUCT UPSERTS
// ============================================================
export interface NewProductInfo {
id: number; // store_products.id
externalProductId: string; // provider_product_id
name: string;
brandName: string | null;
primaryImageUrl: string | null;
hasLocalImage?: boolean; // True if local_image_path is already set
}
export interface UpsertProductsResult {
upserted: number;
new: number;
updated: number;
newProducts: NewProductInfo[]; // Details of newly created products
productsNeedingImages: NewProductInfo[]; // Products (new or updated) that need image downloads
}
/**
@@ -41,12 +58,14 @@ export async function upsertStoreProducts(
options: { dryRun?: boolean } = {}
): Promise<UpsertProductsResult> {
if (products.length === 0) {
return { upserted: 0, new: 0, updated: 0 };
return { upserted: 0, new: 0, updated: 0, newProducts: [], productsNeedingImages: [] };
}
const { dryRun = false } = options;
let newCount = 0;
let updatedCount = 0;
const newProducts: NewProductInfo[] = [];
const productsNeedingImages: NewProductInfo[] = [];
// Process in batches
for (let i = 0; i < products.length; i += BATCH_SIZE) {
@@ -68,10 +87,10 @@ export async function upsertStoreProducts(
const result = await client.query(
`INSERT INTO store_products (
dispensary_id, provider, provider_product_id, provider_brand_id,
name, brand_name, category, subcategory,
name_raw, brand_name_raw, category_raw, subcategory_raw,
price_rec, price_med, price_rec_special, price_med_special,
is_on_special, discount_percent,
is_in_stock, stock_status,
is_in_stock, stock_status, stock_quantity, total_quantity_available,
thc_percent, cbd_percent,
image_url,
first_seen_at, last_seen_at, updated_at
@@ -80,17 +99,17 @@ export async function upsertStoreProducts(
$5, $6, $7, $8,
$9, $10, $11, $12,
$13, $14,
$15, $16,
$17, $18,
$19,
$15, $16, $17, $17,
$18, $19,
$20,
NOW(), NOW(), NOW()
)
ON CONFLICT (dispensary_id, provider, provider_product_id)
DO UPDATE SET
name = EXCLUDED.name,
brand_name = EXCLUDED.brand_name,
category = EXCLUDED.category,
subcategory = EXCLUDED.subcategory,
name_raw = EXCLUDED.name_raw,
brand_name_raw = EXCLUDED.brand_name_raw,
category_raw = EXCLUDED.category_raw,
subcategory_raw = EXCLUDED.subcategory_raw,
price_rec = EXCLUDED.price_rec,
price_med = EXCLUDED.price_med,
price_rec_special = EXCLUDED.price_rec_special,
@@ -99,12 +118,14 @@ export async function upsertStoreProducts(
discount_percent = EXCLUDED.discount_percent,
is_in_stock = EXCLUDED.is_in_stock,
stock_status = EXCLUDED.stock_status,
stock_quantity = EXCLUDED.stock_quantity,
total_quantity_available = EXCLUDED.total_quantity_available,
thc_percent = EXCLUDED.thc_percent,
cbd_percent = EXCLUDED.cbd_percent,
image_url = EXCLUDED.image_url,
last_seen_at = NOW(),
updated_at = NOW()
RETURNING (xmax = 0) as is_new`,
RETURNING id, (xmax = 0) as is_new, (local_image_path IS NOT NULL) as has_local_image`,
[
product.dispensaryId,
product.platform,
@@ -122,16 +143,38 @@ export async function upsertStoreProducts(
productPricing?.discountPercent,
productAvailability?.inStock ?? true,
productAvailability?.stockStatus || 'unknown',
product.thcPercent,
product.cbdPercent,
productAvailability?.quantity ?? null, // stock_quantity and total_quantity_available
// Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
product.primaryImageUrl,
]
);
if (result.rows[0]?.is_new) {
const row = result.rows[0];
const productInfo: NewProductInfo = {
id: row.id,
externalProductId: product.externalProductId,
name: product.name,
brandName: product.brandName,
primaryImageUrl: product.primaryImageUrl,
hasLocalImage: row.has_local_image,
};
if (row.is_new) {
newCount++;
// Track new products
newProducts.push(productInfo);
// New products always need images (if they have a source URL)
if (product.primaryImageUrl && !row.has_local_image) {
productsNeedingImages.push(productInfo);
}
} else {
updatedCount++;
// Updated products need images only if they don't have a local image yet
if (product.primaryImageUrl && !row.has_local_image) {
productsNeedingImages.push(productInfo);
}
}
}
@@ -148,6 +191,8 @@ export async function upsertStoreProducts(
upserted: newCount + updatedCount,
new: newCount,
updated: updatedCount,
newProducts,
productsNeedingImages,
};
}
@@ -212,8 +257,9 @@ export async function createStoreProductSnapshots(
productAvailability?.inStock ?? true,
productAvailability?.quantity,
productAvailability?.stockStatus || 'unknown',
product.thcPercent,
product.cbdPercent,
// Clamp THC/CBD to valid percentage range (0-100) - some products report mg as %
product.thcPercent !== null && product.thcPercent <= 100 ? product.thcPercent : null,
product.cbdPercent !== null && product.cbdPercent <= 100 ? product.cbdPercent : null,
product.primaryImageUrl,
JSON.stringify(product.rawProduct),
]);
@@ -229,7 +275,7 @@ export async function createStoreProductSnapshots(
`INSERT INTO store_product_snapshots (
dispensary_id, provider, provider_product_id, crawl_run_id,
captured_at,
name, brand_name, category, subcategory,
name_raw, brand_name_raw, category_raw, subcategory_raw,
price_rec, price_med, price_rec_special, price_med_special,
is_on_special, discount_percent,
is_in_stock, stock_quantity, stock_status,
@@ -245,6 +291,202 @@ export async function createStoreProductSnapshots(
return { created };
}
// ============================================================
// VARIANT UPSERTS
// ============================================================
export interface UpsertVariantsResult {
upserted: number;
new: number;
updated: number;
snapshotsCreated: number;
}
/**
* Extract variant data from raw Dutchie product
*/
function extractVariantsFromRaw(rawProduct: any): any[] {
const children = rawProduct?.POSMetaData?.children || [];
return children.map((child: any) => ({
option: child.option || child.key || '',
canonicalSku: child.canonicalSKU || null,
canonicalId: child.canonicalID || null,
canonicalName: child.canonicalName || null,
priceRec: child.recPrice || child.price || null,
priceMed: child.medPrice || null,
priceRecSpecial: child.recSpecialPrice || null,
priceMedSpecial: child.medSpecialPrice || null,
quantity: child.quantityAvailable ?? child.quantity ?? null,
inStock: (child.quantityAvailable ?? child.quantity ?? 0) > 0,
}));
}
/**
* Parse weight value and unit from option string
* e.g., "1g" -> { value: 1, unit: "g" }
* "3.5g" -> { value: 3.5, unit: "g" }
* "1/8oz" -> { value: 0.125, unit: "oz" }
*/
function parseWeight(option: string): { value: number | null; unit: string | null } {
if (!option) return { value: null, unit: null };
// Handle fractions like "1/8oz"
const fractionMatch = option.match(/^(\d+)\/(\d+)\s*(g|oz|mg|ml)?$/i);
if (fractionMatch) {
const value = parseInt(fractionMatch[1]) / parseInt(fractionMatch[2]);
return { value, unit: fractionMatch[3]?.toLowerCase() || 'oz' };
}
// Handle decimals like "3.5g" or "100mg"
const decimalMatch = option.match(/^([\d.]+)\s*(g|oz|mg|ml|each)?$/i);
if (decimalMatch) {
return {
value: parseFloat(decimalMatch[1]),
unit: decimalMatch[2]?.toLowerCase() || null
};
}
return { value: null, unit: null };
}
/**
* Upsert variants for products and create variant snapshots
*/
export async function upsertProductVariants(
pool: Pool,
dispensaryId: number,
products: NormalizedProduct[],
crawlRunId: number | null,
options: { dryRun?: boolean } = {}
): Promise<UpsertVariantsResult> {
if (products.length === 0) {
return { upserted: 0, new: 0, updated: 0, snapshotsCreated: 0 };
}
const { dryRun = false } = options;
let newCount = 0;
let updatedCount = 0;
let snapshotsCreated = 0;
for (const product of products) {
// Get the store_product_id for this product
const productResult = await pool.query(
`SELECT id FROM store_products
WHERE dispensary_id = $1 AND provider = $2 AND provider_product_id = $3`,
[dispensaryId, product.platform, product.externalProductId]
);
if (productResult.rows.length === 0) {
continue; // Product not found, skip variants
}
const storeProductId = productResult.rows[0].id;
const variants = extractVariantsFromRaw(product.rawProduct);
if (variants.length === 0) {
continue; // No variants to process
}
if (dryRun) {
console.log(`[DryRun] Would upsert ${variants.length} variants for product ${product.externalProductId}`);
continue;
}
for (const variant of variants) {
const { value: weightValue, unit: weightUnit } = parseWeight(variant.option);
const isOnSpecial = (variant.priceRecSpecial !== null && variant.priceRecSpecial < variant.priceRec) ||
(variant.priceMedSpecial !== null && variant.priceMedSpecial < variant.priceMed);
// Upsert variant
const variantResult = await pool.query(
`INSERT INTO product_variants (
store_product_id, dispensary_id,
option, canonical_sku, canonical_id, canonical_name,
price_rec, price_med, price_rec_special, price_med_special,
quantity, quantity_available, in_stock, is_on_special,
weight_value, weight_unit,
first_seen_at, last_seen_at, updated_at
) VALUES (
$1, $2,
$3, $4, $5, $6,
$7, $8, $9, $10,
$11, $11, $12, $13,
$14, $15,
NOW(), NOW(), NOW()
)
ON CONFLICT (store_product_id, option)
DO UPDATE SET
canonical_sku = COALESCE(EXCLUDED.canonical_sku, product_variants.canonical_sku),
canonical_id = COALESCE(EXCLUDED.canonical_id, product_variants.canonical_id),
canonical_name = COALESCE(EXCLUDED.canonical_name, product_variants.canonical_name),
price_rec = EXCLUDED.price_rec,
price_med = EXCLUDED.price_med,
price_rec_special = EXCLUDED.price_rec_special,
price_med_special = EXCLUDED.price_med_special,
quantity = EXCLUDED.quantity,
quantity_available = EXCLUDED.quantity_available,
in_stock = EXCLUDED.in_stock,
is_on_special = EXCLUDED.is_on_special,
weight_value = COALESCE(EXCLUDED.weight_value, product_variants.weight_value),
weight_unit = COALESCE(EXCLUDED.weight_unit, product_variants.weight_unit),
last_seen_at = NOW(),
last_price_change_at = CASE
WHEN product_variants.price_rec IS DISTINCT FROM EXCLUDED.price_rec
OR product_variants.price_rec_special IS DISTINCT FROM EXCLUDED.price_rec_special
THEN NOW()
ELSE product_variants.last_price_change_at
END,
last_stock_change_at = CASE
WHEN product_variants.quantity IS DISTINCT FROM EXCLUDED.quantity
THEN NOW()
ELSE product_variants.last_stock_change_at
END,
updated_at = NOW()
RETURNING id, (xmax = 0) as is_new`,
[
storeProductId, dispensaryId,
variant.option, variant.canonicalSku, variant.canonicalId, variant.canonicalName,
variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
variant.quantity, variant.inStock, isOnSpecial,
weightValue, weightUnit,
]
);
const variantId = variantResult.rows[0].id;
if (variantResult.rows[0]?.is_new) {
newCount++;
} else {
updatedCount++;
}
// Create variant snapshot
await pool.query(
`INSERT INTO product_variant_snapshots (
product_variant_id, store_product_id, dispensary_id, crawl_run_id,
option,
price_rec, price_med, price_rec_special, price_med_special,
quantity, in_stock, is_on_special,
captured_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW())`,
[
variantId, storeProductId, dispensaryId, crawlRunId,
variant.option,
variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
variant.quantity, variant.inStock, isOnSpecial,
]
);
snapshotsCreated++;
}
}
return {
upserted: newCount + updatedCount,
new: newCount,
updated: updatedCount,
snapshotsCreated,
};
}
// ============================================================
// DISCONTINUED PRODUCTS
// ============================================================
@@ -366,6 +608,19 @@ export async function upsertBrands(
// FULL HYDRATION
// ============================================================
export interface ImageDownloadResult {
downloaded: number;
skipped: number;
failed: number;
bytesTotal: number;
}
export interface DispensaryContext {
stateCode: string;
storeSlug: string;
hasExistingProducts?: boolean; // True if store already has products with local images
}
export interface HydratePayloadResult {
productsUpserted: number;
productsNew: number;
@@ -373,6 +628,157 @@ export interface HydratePayloadResult {
productsDiscontinued: number;
snapshotsCreated: number;
brandsCreated: number;
variantsUpserted: number;
variantsNew: number;
variantSnapshotsCreated: number;
imagesDownloaded: number;
imagesSkipped: number;
imagesFailed: number;
imagesBytesTotal: number;
}
/**
* Helper to create slug from string
*/
function slugify(str: string): string {
return str
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '')
.substring(0, 50) || 'unknown';
}
/**
* Download images for new products and update their local paths
*/
export async function downloadProductImages(
pool: Pool,
newProducts: NewProductInfo[],
dispensaryContext: DispensaryContext,
options: { dryRun?: boolean; concurrency?: number } = {}
): Promise<ImageDownloadResult> {
const { dryRun = false, concurrency = 5 } = options;
// Filter products that have images to download
const productsWithImages = newProducts.filter(p => p.primaryImageUrl);
if (productsWithImages.length === 0) {
return { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
}
// Check if image storage is ready
if (!isImageStorageReady()) {
console.warn('[ImageDownload] Image storage not initialized, skipping downloads');
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
}
if (dryRun) {
console.log(`[DryRun] Would download ${productsWithImages.length} images`);
return { downloaded: 0, skipped: productsWithImages.length, failed: 0, bytesTotal: 0 };
}
let downloaded = 0;
let skipped = 0;
let failed = 0;
let bytesTotal = 0;
// Process in batches with concurrency limit
for (let i = 0; i < productsWithImages.length; i += concurrency) {
const batch = productsWithImages.slice(i, i + concurrency);
const results = await Promise.allSettled(
batch.map(async (product) => {
const ctx: ProductImageContext = {
stateCode: dispensaryContext.stateCode,
storeSlug: dispensaryContext.storeSlug,
brandSlug: slugify(product.brandName || 'unknown'),
productId: product.externalProductId,
};
const result = await downloadProductImage(product.primaryImageUrl!, ctx, { skipIfExists: true });
if (result.success) {
// Update the database with local image path
const imagesJson = JSON.stringify({
full: result.urls!.full,
medium: result.urls!.medium,
thumb: result.urls!.thumb,
});
await pool.query(
`UPDATE store_products
SET local_image_path = $1, images = $2
WHERE id = $3`,
[result.urls!.full, imagesJson, product.id]
);
}
return result;
})
);
for (const result of results) {
if (result.status === 'fulfilled') {
const downloadResult = result.value;
if (downloadResult.success) {
if (downloadResult.skipped) {
skipped++;
} else {
downloaded++;
bytesTotal += downloadResult.bytesDownloaded || 0;
}
} else {
failed++;
console.warn(`[ImageDownload] Failed: ${downloadResult.error}`);
}
} else {
failed++;
console.error(`[ImageDownload] Error:`, result.reason);
}
}
}
console.log(`[ImageDownload] Downloaded: ${downloaded}, Skipped: ${skipped}, Failed: ${failed}, Bytes: ${bytesTotal}`);
return { downloaded, skipped, failed, bytesTotal };
}
/**
* Get dispensary context for image paths
* Also checks if this dispensary already has products with local images
* to skip unnecessary filesystem checks for existing stores
*/
async function getDispensaryContext(pool: Pool, dispensaryId: number): Promise<DispensaryContext | null> {
try {
const result = await pool.query(
`SELECT
d.state,
d.slug,
d.name,
EXISTS(
SELECT 1 FROM store_products sp
WHERE sp.dispensary_id = d.id
AND sp.local_image_path IS NOT NULL
LIMIT 1
) as has_local_images
FROM dispensaries d
WHERE d.id = $1`,
[dispensaryId]
);
if (result.rows.length === 0) {
return null;
}
const row = result.rows[0];
return {
stateCode: row.state || 'unknown',
storeSlug: row.slug || slugify(row.name || `store-${dispensaryId}`),
hasExistingProducts: row.has_local_images,
};
} catch (error) {
console.error('[getDispensaryContext] Error:', error);
return null;
}
}
/**
@@ -383,9 +789,9 @@ export async function hydrateToCanonical(
dispensaryId: number,
normResult: NormalizationResult,
crawlRunId: number | null,
options: { dryRun?: boolean } = {}
options: { dryRun?: boolean; downloadImages?: boolean } = {}
): Promise<HydratePayloadResult> {
const { dryRun = false } = options;
const { dryRun = false, downloadImages: shouldDownloadImages = true } = options;
// 1. Upsert brands
const brandResult = await upsertBrands(pool, normResult.brands, { dryRun });
@@ -399,7 +805,7 @@ export async function hydrateToCanonical(
{ dryRun }
);
// 3. Create snapshots
// 3. Create product snapshots
const snapshotResult = await createStoreProductSnapshots(
pool,
dispensaryId,
@@ -410,7 +816,16 @@ export async function hydrateToCanonical(
{ dryRun }
);
// 4. Mark discontinued products
// 4. Upsert variants and create variant snapshots
const variantResult = await upsertProductVariants(
pool,
dispensaryId,
normResult.products,
crawlRunId,
{ dryRun }
);
// 5. Mark discontinued products
const currentProductIds = new Set(
normResult.products.map((p) => p.externalProductId)
);
@@ -424,6 +839,36 @@ export async function hydrateToCanonical(
{ dryRun }
);
// 6. Download images for products that need them
// This includes:
// - New products (always need images)
// - Updated products that don't have local images yet (backfill)
// This avoids:
// - Filesystem checks for products that already have local images
// - Unnecessary HTTP requests for products with existing images
let imageResult: ImageDownloadResult = { downloaded: 0, skipped: 0, failed: 0, bytesTotal: 0 };
if (shouldDownloadImages && productResult.productsNeedingImages.length > 0) {
const dispensaryContext = await getDispensaryContext(pool, dispensaryId);
if (dispensaryContext) {
const newCount = productResult.productsNeedingImages.filter(p => !p.hasLocalImage).length;
const backfillCount = productResult.productsNeedingImages.length - newCount;
console.log(`[Hydration] Downloading images for ${productResult.productsNeedingImages.length} products (${productResult.new} new, ${backfillCount} backfill)...`);
imageResult = await downloadProductImages(
pool,
productResult.productsNeedingImages,
dispensaryContext,
{ dryRun }
);
} else {
console.warn(`[Hydration] Could not get dispensary context for ID ${dispensaryId}, skipping image downloads`);
}
} else if (productResult.productsNeedingImages.length === 0 && productResult.upserted > 0) {
// All products already have local images
console.log(`[Hydration] All ${productResult.upserted} products already have local images, skipping downloads`);
}
return {
productsUpserted: productResult.upserted,
productsNew: productResult.new,
@@ -431,5 +876,12 @@ export async function hydrateToCanonical(
productsDiscontinued: discontinuedCount,
snapshotsCreated: snapshotResult.created,
brandsCreated: brandResult.new,
variantsUpserted: variantResult.upserted,
variantsNew: variantResult.new,
variantSnapshotsCreated: variantResult.snapshotsCreated,
imagesDownloaded: imageResult.downloaded,
imagesSkipped: imageResult.skipped,
imagesFailed: imageResult.failed,
imagesBytesTotal: imageResult.bytesTotal,
};
}

View File

@@ -234,99 +234,94 @@ export async function syncProductsToCanonical(
const result = await pool.query(
`INSERT INTO store_products (
dispensary_id, state_id, provider, provider_product_id,
provider_brand_id, provider_dispensary_id, enterprise_product_id,
legacy_dutchie_product_id,
name, brand_name, category, subcategory, product_type, strain_type,
description, effects, cannabinoids,
thc_percent, cbd_percent, thc_content_text, cbd_content_text,
is_in_stock, stock_status, stock_quantity,
total_quantity_available, total_kiosk_quantity_available,
image_url, local_image_url, local_image_thumb_url, local_image_medium_url,
original_image_url, additional_images,
is_on_special, is_featured, medical_only, rec_only,
dispensary_id, provider, provider_product_id, provider_brand_id,
platform_dispensary_id, external_product_id,
name_raw, brand_name_raw, category_raw, subcategory_raw, strain_type,
description, effects, cannabinoids_v2,
thc_percent, cbd_percent, thc_content, cbd_content,
is_in_stock, stock_status, stock_quantity, total_quantity_available,
image_url, primary_image_url, images,
is_on_special, featured, medical_only, rec_only,
is_below_threshold, is_below_kiosk_threshold,
platform_status, c_name, weight, options, measurements,
first_seen_at, last_seen_at, updated_at
status, c_name, weight, measurements,
first_seen_at, last_seen_at, created_at, updated_at
) VALUES (
$1, $2, 'dutchie', $3,
$4, $5, $6,
$7,
$8, $9, $10, $11, $12, $13,
$14, $15, $16,
$17, $18, $19, $20,
$21, $22, $23,
$24, $25,
$26, $27, $28, $29,
$30, $31,
$32, $33, $34, $35,
$36, $37,
$38, $39, $40, $41, $42,
$43, $44, NOW()
$1, 'dutchie', $2, $3,
$4, $5,
$6, $7, $8, $9, $10,
$11, $12, $13,
$14, $15, $16, $17,
$18, $19, $20, $21,
$22, $23, $24,
$25, $26, $27, $28,
$29, $30,
$31, $32, $33, $34,
$35, $36, NOW(), NOW()
)
ON CONFLICT (dispensary_id, provider, provider_product_id)
DO UPDATE SET
legacy_dutchie_product_id = EXCLUDED.legacy_dutchie_product_id,
name = EXCLUDED.name,
brand_name = EXCLUDED.brand_name,
category = EXCLUDED.category,
subcategory = EXCLUDED.subcategory,
name_raw = EXCLUDED.name_raw,
brand_name_raw = EXCLUDED.brand_name_raw,
category_raw = EXCLUDED.category_raw,
subcategory_raw = EXCLUDED.subcategory_raw,
strain_type = EXCLUDED.strain_type,
is_in_stock = EXCLUDED.is_in_stock,
stock_status = EXCLUDED.stock_status,
stock_quantity = EXCLUDED.stock_quantity,
total_quantity_available = EXCLUDED.total_quantity_available,
thc_percent = EXCLUDED.thc_percent,
cbd_percent = EXCLUDED.cbd_percent,
thc_content = EXCLUDED.thc_content,
cbd_content = EXCLUDED.cbd_content,
image_url = EXCLUDED.image_url,
local_image_url = EXCLUDED.local_image_url,
primary_image_url = EXCLUDED.primary_image_url,
is_on_special = EXCLUDED.is_on_special,
platform_status = EXCLUDED.platform_status,
status = EXCLUDED.status,
description = COALESCE(EXCLUDED.description, store_products.description),
effects = COALESCE(EXCLUDED.effects, store_products.effects),
cannabinoids_v2 = COALESCE(EXCLUDED.cannabinoids_v2, store_products.cannabinoids_v2),
weight = EXCLUDED.weight,
measurements = EXCLUDED.measurements,
last_seen_at = NOW(),
updated_at = NOW()
RETURNING (xmax = 0) as is_new`,
[
dispensaryId,
stateId,
p.external_product_id,
p.brand_id,
p.platform_dispensary_id,
p.enterprise_product_id,
p.id,
p.name,
p.brand_name,
p.category || p.type,
p.subcategory,
p.type,
p.strain_type,
p.description,
p.effects,
p.cannabinoids_v2,
thcPercent,
cbdPercent,
p.thc_content,
p.cbd_content,
isInStock,
stockStatus,
p.total_quantity_available,
p.total_quantity_available,
p.total_kiosk_quantity_available,
p.primary_image_url,
p.local_image_url,
p.local_image_thumb_url,
p.local_image_medium_url,
p.original_image_url,
p.additional_images,
p.special || false,
p.featured || false,
p.medical_only || false,
p.rec_only || false,
p.is_below_threshold || false,
p.is_below_kiosk_threshold || false,
p.status,
p.c_name,
p.weight,
p.options,
p.measurements,
p.first_seen_at || p.updated_at,
p.last_seen_at || p.updated_at,
dispensaryId, // $1
p.external_product_id, // $2
p.brand_id, // $3
p.platform_dispensary_id, // $4
p.external_product_id, // $5 external_product_id
p.name, // $6
p.brand_name, // $7
p.type || p.category, // $8 category_raw
p.subcategory, // $9
p.strain_type, // $10
p.description, // $11
p.effects, // $12
p.cannabinoids_v2, // $13
thcPercent, // $14
cbdPercent, // $15
p.thc_content, // $16
p.cbd_content, // $17
isInStock, // $18
stockStatus, // $19
p.total_quantity_available || 0, // $20 stock_quantity
p.total_quantity_available || 0, // $21
p.primary_image_url, // $22 image_url
p.primary_image_url, // $23
p.additional_images, // $24 images
p.special || false, // $25
p.featured || false, // $26
p.medical_only || false, // $27
p.rec_only || false, // $28
p.is_below_threshold || false, // $29
p.is_below_kiosk_threshold || false, // $30
p.status, // $31
p.c_name, // $32
p.weight, // $33
p.measurements, // $34
p.first_seen_at || p.updated_at, // $35
p.last_seen_at || p.updated_at, // $36
]
);

View File

@@ -107,7 +107,8 @@ export class HydrationWorker {
console.log(
`[HydrationWorker] ${this.options.dryRun ? '[DryRun] ' : ''}Processed payload ${payload.id}: ` +
`${hydrateResult.productsNew} new, ${hydrateResult.productsUpdated} updated, ` +
`${hydrateResult.productsDiscontinued} discontinued, ${hydrateResult.snapshotsCreated} snapshots`
`${hydrateResult.productsDiscontinued} discontinued, ${hydrateResult.snapshotsCreated} snapshots, ` +
`${hydrateResult.variantsUpserted} variants (${hydrateResult.variantSnapshotsCreated} variant snapshots)`
);
return {

View File

@@ -6,22 +6,70 @@ import { initializeMinio, isMinioEnabled } from './utils/minio';
import { initializeImageStorage } from './utils/image-storage';
import { logger } from './services/logger';
import { cleanupOrphanedJobs } from './services/proxyTestQueue';
import { runAutoMigrations } from './db/auto-migrate';
import { getPool } from './db/pool';
import healthRoutes from './routes/health';
import imageProxyRoutes from './routes/image-proxy';
dotenv.config();
const app = express();
const PORT = process.env.PORT || 3010;
app.use(cors());
// CORS configuration - allow requests from any origin with API key auth
// WordPress plugins need to make requests from their own domains
app.use(cors({
origin: true, // Reflect the request origin
credentials: true,
methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS'],
allowedHeaders: ['Content-Type', 'Authorization', 'x-api-key', 'X-API-Key'],
exposedHeaders: ['Content-Length', 'X-Request-Id'],
}));
app.use(express.json());
// Serve static images when MinIO is not configured
const LOCAL_IMAGES_PATH = process.env.LOCAL_IMAGES_PATH || '/app/public/images';
// Uses ./public/images relative to working directory (works for both Docker and local dev)
const LOCAL_IMAGES_PATH = process.env.LOCAL_IMAGES_PATH || './public/images';
app.use('/images', express.static(LOCAL_IMAGES_PATH));
// Image proxy with on-demand resizing
// Usage: /img/products/az/store/brand/product/image.webp?w=200&h=200
app.use('/img', imageProxyRoutes);
// Serve static downloads (plugin files, etc.)
const LOCAL_DOWNLOADS_PATH = process.env.LOCAL_DOWNLOADS_PATH || '/app/public/downloads';
// Uses ./public/downloads relative to working directory (works for both Docker and local dev)
const LOCAL_DOWNLOADS_PATH = process.env.LOCAL_DOWNLOADS_PATH || './public/downloads';
// Dynamic "latest" redirect for WordPress plugin - finds highest version automatically
app.get('/downloads/cannaiq-menus-latest.zip', (req, res) => {
const fs = require('fs');
const path = require('path');
try {
const files = fs.readdirSync(LOCAL_DOWNLOADS_PATH);
const pluginFiles = files
.filter((f: string) => f.match(/^cannaiq-menus-\d+\.\d+\.\d+\.zip$/))
.sort((a: string, b: string) => {
const vA = a.match(/(\d+)\.(\d+)\.(\d+)/);
const vB = b.match(/(\d+)\.(\d+)\.(\d+)/);
if (!vA || !vB) return 0;
for (let i = 1; i <= 3; i++) {
const diff = parseInt(vB[i]) - parseInt(vA[i]);
if (diff !== 0) return diff;
}
return 0;
});
if (pluginFiles.length > 0) {
const latestFile = pluginFiles[0];
res.redirect(302, `/downloads/${latestFile}`);
} else {
res.status(404).json({ error: 'No plugin versions found' });
}
} catch (err) {
res.status(500).json({ error: 'Failed to find latest plugin' });
}
});
app.use('/downloads', express.static(LOCAL_DOWNLOADS_PATH));
// Simple health check for load balancers/K8s probes
@@ -61,11 +109,16 @@ import apiPermissionsRoutes from './routes/api-permissions';
import parallelScrapeRoutes from './routes/parallel-scrape';
import crawlerSandboxRoutes from './routes/crawler-sandbox';
import versionRoutes from './routes/version';
import deployStatusRoutes from './routes/deploy-status';
import publicApiRoutes from './routes/public-api';
import usersRoutes from './routes/users';
import staleProcessesRoutes from './routes/stale-processes';
import orchestratorAdminRoutes from './routes/orchestrator-admin';
import adminDebugRoutes from './routes/admin-debug';
import intelligenceRoutes from './routes/intelligence';
import marketsRoutes from './routes/markets';
import workersRoutes from './routes/workers';
import jobQueueRoutes from './routes/job-queue';
import { createMultiStateRoutes } from './multi-state';
import { trackApiUsage, checkRateLimit } from './middleware/apiTokenTracker';
import { validateWordPressPermissions } from './middleware/wordpressPermissions';
@@ -75,7 +128,7 @@ import { createPortalRoutes } from './portals';
import { createStatesRouter } from './routes/states';
import { createAnalyticsV2Router } from './routes/analytics-v2';
import { createDiscoveryRoutes } from './discovery';
import { getPool } from './db/pool';
import pipelineRoutes from './routes/pipeline';
// Consumer API routes (findadispo.com, findagram.co)
import consumerAuthRoutes from './routes/consumer-auth';
@@ -86,6 +139,9 @@ import consumerDealsRoutes from './routes/consumer-deals';
import eventsRoutes from './routes/events';
import clickAnalyticsRoutes from './routes/click-analytics';
import seoRoutes from './routes/seo';
import priceAnalyticsRoutes from './routes/price-analytics';
import tasksRoutes from './routes/tasks';
import workerRegistryRoutes from './routes/worker-registry';
// Mark requests from trusted domains (cannaiq.co, findagram.co, findadispo.com)
// These domains can access the API without authentication
@@ -128,11 +184,25 @@ app.use('/api/api-permissions', apiPermissionsRoutes);
app.use('/api/parallel-scrape', parallelScrapeRoutes);
app.use('/api/crawler-sandbox', crawlerSandboxRoutes);
app.use('/api/version', versionRoutes);
app.use('/api/admin/deploy-status', deployStatusRoutes);
console.log('[DeployStatus] Routes registered at /api/admin/deploy-status');
app.use('/api/users', usersRoutes);
app.use('/api/stale-processes', staleProcessesRoutes);
// Admin routes - orchestrator actions
app.use('/api/admin/orchestrator', orchestratorAdminRoutes);
// Admin routes - debug endpoints (snapshot inspection)
app.use('/api/admin/debug', adminDebugRoutes);
console.log('[AdminDebug] Routes registered at /api/admin/debug');
// Admin routes - intelligence (brands, pricing analytics)
app.use('/api/admin/intelligence', intelligenceRoutes);
console.log('[Intelligence] Routes registered at /api/admin/intelligence');
// Markets routes - store and product data for admin dashboard
app.use('/api/markets', marketsRoutes);
console.log('[Markets] Routes registered at /api/markets');
// SEO orchestrator routes
app.use('/api/seo', seoRoutes);
@@ -140,7 +210,17 @@ app.use('/api/seo', seoRoutes);
app.use('/api/workers', workersRoutes);
// Monitor routes - aliased from workers for convenience
app.use('/api/monitor', workersRoutes);
console.log('[Workers] Routes registered at /api/workers and /api/monitor');
// Job queue management
app.use('/api/job-queue', jobQueueRoutes);
console.log('[Workers] Routes registered at /api/workers, /api/monitor, and /api/job-queue');
// Task queue management - worker tasks with capacity planning
app.use('/api/tasks', tasksRoutes);
console.log('[Tasks] Routes registered at /api/tasks');
// Worker registry - dynamic worker registration, heartbeats, and name management
app.use('/api/worker-registry', workerRegistryRoutes);
console.log('[WorkerRegistry] Routes registered at /api/worker-registry');
// Phase 3: Analytics V2 - Enhanced analytics with rec/med state segmentation
try {
@@ -174,6 +254,10 @@ console.log('[Events] Routes registered at /api/events');
app.use('/api/analytics/clicks', clickAnalyticsRoutes);
console.log('[ClickAnalytics] Routes registered at /api/analytics/clicks');
// Price Analytics API - price history, specials, and market comparisons
app.use('/api/analytics/price', priceAnalyticsRoutes);
console.log('[PriceAnalytics] Routes registered at /api/analytics/price');
// States API routes - cannabis legalization status and targeting
try {
const statesRouter = createStatesRouter(getPool());
@@ -213,6 +297,10 @@ try {
console.warn('[Discovery] Failed to register routes:', error);
}
// Pipeline Stage Transitions - Explicit API for moving stores through 6-stage pipeline
app.use('/api/pipeline', pipelineRoutes);
console.log('[Pipeline] Routes registered at /api/pipeline');
// Platform-specific Discovery Routes
// TODO: Rebuild with /platforms/dutchie/ module
@@ -220,6 +308,17 @@ async function startServer() {
try {
logger.info('system', 'Starting server...');
// Run auto-migrations before anything else
const pool = getPool();
const migrationsApplied = await runAutoMigrations(pool);
if (migrationsApplied > 0) {
logger.info('system', `Applied ${migrationsApplied} database migrations`);
} else if (migrationsApplied === 0) {
logger.info('system', 'Database schema up to date');
} else {
logger.warn('system', 'Some migrations failed - check logs');
}
await initializeMinio();
await initializeImageStorage();
logger.info('system', isMinioEnabled() ? 'MinIO storage initialized' : 'Local filesystem storage initialized');

View File

@@ -319,12 +319,13 @@ export function createMultiStateRoutes(pool: Pool): Router {
// =========================================================================
/**
* GET /api/analytics/compare/brand/:brandId
* GET /api/analytics/compare/brand/:brandIdOrName
* Compare a brand across multiple states
* Accepts either numeric brand ID or brand name (URL encoded)
*/
router.get('/analytics/compare/brand/:brandId', async (req: Request, res: Response) => {
router.get('/analytics/compare/brand/:brandIdOrName', async (req: Request, res: Response) => {
try {
const brandId = parseInt(req.params.brandId);
const { brandIdOrName } = req.params;
const statesParam = req.query.states as string;
// Parse states - either comma-separated or get all active states
@@ -336,7 +337,22 @@ export function createMultiStateRoutes(pool: Pool): Router {
states = activeStates.map(s => s.code);
}
const comparison = await stateService.compareBrandAcrossStates(brandId, states);
// Check if it's a numeric ID or a brand name
const brandId = parseInt(brandIdOrName);
let comparison;
if (!isNaN(brandId)) {
// Try by ID first
try {
comparison = await stateService.compareBrandAcrossStates(brandId, states);
} catch (idErr: any) {
// If brand ID not found, try as name
comparison = await stateService.compareBrandByNameAcrossStates(brandIdOrName, states);
}
} else {
// Use brand name directly
comparison = await stateService.compareBrandByNameAcrossStates(decodeURIComponent(brandIdOrName), states);
}
res.json({
success: true,

View File

@@ -67,18 +67,19 @@ export class StateQueryService {
*/
async getStateSummary(state: string): Promise<StateSummary | null> {
// Get base metrics from materialized view
// Migration 051 uses dispensary_count column (not store_count)
const metricsResult = await this.pool.query(`
SELECT
state,
state_name AS "stateName",
dispensary_count AS "storeCount",
dispensary_count AS "dutchieStores",
dispensary_count AS "activeStores",
total_products AS "totalProducts",
in_stock_products AS "inStockProducts",
out_of_stock_products AS "outOfStockProducts",
unique_brands AS "uniqueBrands",
unique_categories AS "uniqueCategories",
COALESCE(dispensary_count, 0) AS "storeCount",
COALESCE(dispensary_count, 0) AS "dutchieStores",
COALESCE(dispensary_count, 0) AS "activeStores",
COALESCE(total_products, 0) AS "totalProducts",
COALESCE(in_stock_products, 0) AS "inStockProducts",
COALESCE(out_of_stock_products, 0) AS "outOfStockProducts",
COALESCE(unique_brands, 0) AS "uniqueBrands",
COALESCE(unique_categories, 0) AS "uniqueCategories",
avg_price_rec AS "avgPriceRec",
min_price_rec AS "minPriceRec",
max_price_rec AS "maxPriceRec",
@@ -110,10 +111,25 @@ export class StateQueryService {
// Get top categories
const topCategories = await this.getCategoriesByState(state, { limit: 5 });
// Parse numeric values from strings (PostgreSQL returns bigint as string)
return {
...metrics,
recentCrawls: parseInt(crawlResult.rows[0]?.recent_crawls || '0'),
failedCrawls: parseInt(crawlResult.rows[0]?.failed_crawls || '0'),
state: metrics.state,
stateName: metrics.stateName,
storeCount: parseInt(metrics.storeCount || '0', 10),
dutchieStores: parseInt(metrics.dutchieStores || '0', 10),
activeStores: parseInt(metrics.activeStores || '0', 10),
totalProducts: parseInt(metrics.totalProducts || '0', 10),
inStockProducts: parseInt(metrics.inStockProducts || '0', 10),
outOfStockProducts: parseInt(metrics.outOfStockProducts || '0', 10),
onSpecialProducts: parseInt(metrics.onSpecialProducts || '0', 10),
uniqueBrands: parseInt(metrics.uniqueBrands || '0', 10),
uniqueCategories: parseInt(metrics.uniqueCategories || '0', 10),
avgPriceRec: metrics.avgPriceRec ? parseFloat(metrics.avgPriceRec) : null,
minPriceRec: metrics.minPriceRec ? parseFloat(metrics.minPriceRec) : null,
maxPriceRec: metrics.maxPriceRec ? parseFloat(metrics.maxPriceRec) : null,
refreshedAt: metrics.refreshedAt,
recentCrawls: parseInt(crawlResult.rows[0]?.recent_crawls || '0', 10),
failedCrawls: parseInt(crawlResult.rows[0]?.failed_crawls || '0', 10),
lastCrawlAt: crawlResult.rows[0]?.last_crawl_at || null,
topBrands,
topCategories,
@@ -121,29 +137,49 @@ export class StateQueryService {
}
/**
* Get metrics for all states
* Get metrics for all states (including states with no data)
*/
async getAllStateMetrics(): Promise<StateMetrics[]> {
// Migration 051 uses dispensary_count column (not store_count)
const result = await this.pool.query(`
SELECT
state,
state_name AS "stateName",
dispensary_count AS "storeCount",
dispensary_count AS "dutchieStores",
dispensary_count AS "activeStores",
total_products AS "totalProducts",
in_stock_products AS "inStockProducts",
out_of_stock_products AS "outOfStockProducts",
unique_brands AS "uniqueBrands",
unique_categories AS "uniqueCategories",
avg_price_rec AS "avgPriceRec",
min_price_rec AS "minPriceRec",
max_price_rec AS "maxPriceRec",
refreshed_at AS "refreshedAt"
FROM mv_state_metrics
ORDER BY dispensary_count DESC
s.code AS state,
s.name AS "stateName",
COALESCE(m.dispensary_count, 0) AS "storeCount",
COALESCE(m.dispensary_count, 0) AS "dutchieStores",
COALESCE(m.dispensary_count, 0) AS "activeStores",
COALESCE(m.total_products, 0) AS "totalProducts",
COALESCE(m.in_stock_products, 0) AS "inStockProducts",
COALESCE(m.out_of_stock_products, 0) AS "outOfStockProducts",
COALESCE(m.unique_brands, 0) AS "uniqueBrands",
COALESCE(m.unique_categories, 0) AS "uniqueCategories",
m.avg_price_rec AS "avgPriceRec",
m.min_price_rec AS "minPriceRec",
m.max_price_rec AS "maxPriceRec",
m.refreshed_at AS "refreshedAt",
0 AS "onSpecialProducts"
FROM states s
LEFT JOIN mv_state_metrics m ON s.code = m.state
ORDER BY COALESCE(m.dispensary_count, 0) DESC, s.name ASC
`);
return result.rows;
// Parse numeric values from strings (PostgreSQL returns bigint as string)
return result.rows.map((row: any) => ({
state: row.state,
stateName: row.stateName,
storeCount: parseInt(row.storeCount || '0', 10),
dutchieStores: parseInt(row.dutchieStores || '0', 10),
activeStores: parseInt(row.activeStores || '0', 10),
totalProducts: parseInt(row.totalProducts || '0', 10),
inStockProducts: parseInt(row.inStockProducts || '0', 10),
outOfStockProducts: parseInt(row.outOfStockProducts || '0', 10),
uniqueBrands: parseInt(row.uniqueBrands || '0', 10),
uniqueCategories: parseInt(row.uniqueCategories || '0', 10),
avgPriceRec: row.avgPriceRec ? parseFloat(row.avgPriceRec) : null,
minPriceRec: row.minPriceRec ? parseFloat(row.minPriceRec) : null,
maxPriceRec: row.maxPriceRec ? parseFloat(row.maxPriceRec) : null,
refreshedAt: row.refreshedAt,
onSpecialProducts: parseInt(row.onSpecialProducts || '0', 10),
}));
}
// =========================================================================
@@ -152,29 +188,37 @@ export class StateQueryService {
/**
* Get brands present in a specific state
* Uses inline query instead of v_brand_state_presence view for compatibility
*/
async getBrandsByState(state: string, options: StateQueryOptions = {}): Promise<BrandInState[]> {
const { limit = 50, offset = 0, sortBy = 'productCount', sortDir = 'desc' } = options;
// Sort columns must reference the aliased output names with quotes
const sortColumn = {
productCount: 'product_count',
storeCount: 'store_count',
avgPrice: 'avg_price',
name: 'brand_name',
}[sortBy] || 'product_count';
productCount: '"productCount"',
storeCount: '"storeCount"',
avgPrice: '"avgPrice"',
name: '"brandName"',
}[sortBy] || '"productCount"';
// Inline query that aggregates brand data from store_products and dispensaries
// Works whether or not v_brand_state_presence view exists
const result = await this.pool.query(`
SELECT
brand_id AS "brandId",
brand_name AS "brandName",
brand_slug AS "brandSlug",
store_count AS "storeCount",
product_count AS "productCount",
avg_price AS "avgPrice",
first_seen_in_state AS "firstSeenInState",
last_seen_in_state AS "lastSeenInState"
FROM v_brand_state_presence
WHERE state = $1
COALESCE(sp.brand_id, 0) AS "brandId",
sp.brand_name_raw AS "brandName",
LOWER(REPLACE(sp.brand_name_raw, ' ', '-')) AS "brandSlug",
COUNT(DISTINCT d.id) AS "storeCount",
COUNT(DISTINCT sp.id) AS "productCount",
ROUND(AVG(sp.price_rec)::numeric, 2) AS "avgPrice",
MIN(sp.first_seen_at) AS "firstSeenInState",
MAX(sp.last_seen_at) AS "lastSeenInState"
FROM store_products sp
JOIN dispensaries d ON sp.dispensary_id = d.id
WHERE d.state = $1
AND sp.brand_name_raw IS NOT NULL
AND sp.brand_name_raw != ''
GROUP BY sp.brand_id, sp.brand_name_raw
ORDER BY ${sortColumn} ${sortDir === 'asc' ? 'ASC' : 'DESC'}
LIMIT $2 OFFSET $3
`, [state, limit, offset]);
@@ -184,18 +228,48 @@ export class StateQueryService {
/**
* Get brand penetration across all states
* Uses inline query instead of fn_brand_state_penetration function for compatibility
*/
async getBrandStatePenetration(brandId: number): Promise<BrandStatePenetration[]> {
// Inline query that calculates brand penetration by state
const result = await this.pool.query(`
WITH state_totals AS (
SELECT
state,
state_name AS "stateName",
total_stores AS "totalStores",
stores_with_brand AS "storesWithBrand",
penetration_pct AS "penetrationPct",
product_count AS "productCount",
avg_price AS "avgPrice"
FROM fn_brand_state_penetration($1)
d.state,
s.name AS state_name,
COUNT(DISTINCT d.id) AS total_stores
FROM dispensaries d
JOIN states s ON d.state = s.code
WHERE d.state IS NOT NULL
GROUP BY d.state, s.name
),
brand_presence AS (
SELECT
d.state,
COUNT(DISTINCT d.id) AS stores_with_brand,
COUNT(DISTINCT sp.id) AS product_count,
ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price
FROM store_products sp
JOIN dispensaries d ON sp.dispensary_id = d.id
WHERE (sp.brand_id = $1 OR sp.brand_name_raw = (SELECT name FROM brands WHERE id = $1))
AND d.state IS NOT NULL
GROUP BY d.state
)
SELECT
st.state,
st.state_name AS "stateName",
st.total_stores AS "totalStores",
COALESCE(bp.stores_with_brand, 0) AS "storesWithBrand",
CASE
WHEN st.total_stores > 0
THEN ROUND((COALESCE(bp.stores_with_brand, 0)::numeric / st.total_stores) * 100, 2)
ELSE 0
END AS "penetrationPct",
COALESCE(bp.product_count, 0) AS "productCount",
bp.avg_price AS "avgPrice"
FROM state_totals st
LEFT JOIN brand_presence bp ON st.state = bp.state
ORDER BY COALESCE(bp.stores_with_brand, 0) DESC
`, [brandId]);
return result.rows;
@@ -257,33 +331,128 @@ export class StateQueryService {
};
}
/**
* Compare a brand by name across multiple states
* Used when we only have a brand name (not an ID from the brands table)
*/
async compareBrandByNameAcrossStates(
brandName: string,
states: string[]
): Promise<BrandCrossStateComparison> {
// Get penetration data by brand name
const penetrationResult = await this.pool.query(`
WITH state_totals AS (
SELECT
d.state,
s.name AS state_name,
COUNT(DISTINCT d.id) AS total_stores
FROM dispensaries d
JOIN states s ON d.state = s.code
WHERE d.state IS NOT NULL
GROUP BY d.state, s.name
),
brand_presence AS (
SELECT
d.state,
COUNT(DISTINCT d.id) AS stores_with_brand,
COUNT(DISTINCT sp.id) AS product_count,
ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price
FROM store_products sp
JOIN dispensaries d ON sp.dispensary_id = d.id
WHERE sp.brand_name_raw ILIKE $1
AND d.state IS NOT NULL
GROUP BY d.state
)
SELECT
st.state,
st.state_name AS "stateName",
st.total_stores AS "totalStores",
COALESCE(bp.stores_with_brand, 0) AS "storesWithBrand",
CASE
WHEN st.total_stores > 0
THEN ROUND((COALESCE(bp.stores_with_brand, 0)::numeric / st.total_stores) * 100, 2)
ELSE 0
END AS "penetrationPct",
COALESCE(bp.product_count, 0) AS "productCount",
bp.avg_price AS "avgPrice"
FROM state_totals st
LEFT JOIN brand_presence bp ON st.state = bp.state
ORDER BY COALESCE(bp.stores_with_brand, 0) DESC
`, [brandName]);
// Filter by requested states
const filteredStates = penetrationResult.rows.filter((p: any) =>
states.includes(p.state)
);
// Calculate national metrics
const nationalResult = await this.pool.query(`
SELECT
COUNT(DISTINCT d.id) AS total_stores,
COUNT(DISTINCT CASE WHEN sp.brand_name_raw ILIKE $1 THEN d.id END) AS stores_with_brand,
AVG(sp.price_rec) FILTER (WHERE sp.brand_name_raw ILIKE $1) AS avg_price
FROM dispensaries d
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
WHERE d.state IS NOT NULL
`, [brandName]);
const nationalData = nationalResult.rows[0];
const nationalPenetration = nationalData.total_stores > 0
? (nationalData.stores_with_brand / nationalData.total_stores) * 100
: 0;
// Find best/worst states
const sortedByPenetration = [...filteredStates].sort(
(a: any, b: any) => parseFloat(b.penetrationPct) - parseFloat(a.penetrationPct)
);
return {
brandId: 0, // No ID when using brand name
brandName,
states: filteredStates,
nationalPenetration: Math.round(nationalPenetration * 100) / 100,
nationalAvgPrice: nationalData.avg_price
? Math.round(parseFloat(nationalData.avg_price) * 100) / 100
: null,
bestPerformingState: sortedByPenetration[0]?.state || null,
worstPerformingState: sortedByPenetration[sortedByPenetration.length - 1]?.state || null,
};
}
// =========================================================================
// Category Queries
// =========================================================================
/**
* Get categories in a specific state
* Uses inline query instead of v_category_state_distribution view for compatibility
*/
async getCategoriesByState(state: string, options: StateQueryOptions = {}): Promise<CategoryInState[]> {
const { limit = 50, offset = 0, sortBy = 'productCount', sortDir = 'desc' } = options;
// Sort columns must reference the aliased output names with quotes
const sortColumn = {
productCount: 'product_count',
storeCount: 'store_count',
avgPrice: 'avg_price',
productCount: '"productCount"',
storeCount: '"storeCount"',
avgPrice: '"avgPrice"',
category: 'category',
}[sortBy] || 'product_count';
}[sortBy] || '"productCount"';
// Inline query that aggregates category data from store_products and dispensaries
const result = await this.pool.query(`
SELECT
category,
product_count AS "productCount",
store_count AS "storeCount",
avg_price AS "avgPrice",
in_stock_count AS "inStockCount",
on_special_count AS "onSpecialCount"
FROM v_category_state_distribution
WHERE state = $1
sp.category_raw AS category,
COUNT(DISTINCT sp.id) AS "productCount",
COUNT(DISTINCT d.id) AS "storeCount",
ROUND(AVG(sp.price_rec)::numeric, 2) AS "avgPrice",
COUNT(DISTINCT CASE WHEN sp.is_in_stock THEN sp.id END) AS "inStockCount",
0 AS "onSpecialCount"
FROM store_products sp
JOIN dispensaries d ON sp.dispensary_id = d.id
WHERE d.state = $1
AND sp.category_raw IS NOT NULL
AND sp.category_raw != ''
GROUP BY sp.category_raw
ORDER BY ${sortColumn} ${sortDir === 'asc' ? 'ASC' : 'DESC'}
LIMIT $2 OFFSET $3
`, [state, limit, offset]);
@@ -293,25 +462,38 @@ export class StateQueryService {
/**
* Compare a category across multiple states
* Uses inline query instead of v_category_state_distribution view for compatibility
*/
async compareCategoryAcrossStates(
category: string,
states: string[]
): Promise<CategoryCrossStateComparison> {
// Inline query for category distribution by state
const result = await this.pool.query(`
WITH category_stats AS (
SELECT
v.state,
d.state,
sp.category_raw AS category,
COUNT(DISTINCT sp.id) AS product_count,
COUNT(DISTINCT d.id) AS store_count,
ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price
FROM store_products sp
JOIN dispensaries d ON sp.dispensary_id = d.id
WHERE sp.category_raw = $1
AND d.state = ANY($2)
GROUP BY d.state, sp.category_raw
)
SELECT
cs.state,
s.name AS "stateName",
v.category,
v.product_count AS "productCount",
v.store_count AS "storeCount",
v.avg_price AS "avgPrice",
ROUND(v.product_count::NUMERIC / SUM(v.product_count) OVER () * 100, 2) AS "marketShare"
FROM v_category_state_distribution v
JOIN states s ON v.state = s.code
WHERE v.category = $1
AND v.state = ANY($2)
ORDER BY v.product_count DESC
cs.category,
cs.product_count AS "productCount",
cs.store_count AS "storeCount",
cs.avg_price AS "avgPrice",
ROUND(cs.product_count::NUMERIC / NULLIF(SUM(cs.product_count) OVER (), 0) * 100, 2) AS "marketShare"
FROM category_stats cs
JOIN states s ON cs.state = s.code
ORDER BY cs.product_count DESC
`, [category, states]);
// Get national totals
@@ -345,41 +527,49 @@ export class StateQueryService {
/**
* Get stores in a specific state
* Uses inline query for compatibility - does not depend on v_store_state_summary view
*/
async getStoresByState(state: string, options: StateQueryOptions = {}): Promise<StoreInState[]> {
const { limit = 100, offset = 0, includeInactive = false, sortBy = 'productCount', sortDir = 'desc' } = options;
// Sort columns must reference the aliased output names with quotes
const sortColumn = {
productCount: 'product_count',
brandCount: 'brand_count',
avgPrice: 'avg_price',
name: 'dispensary_name',
productCount: '"productCount"',
brandCount: '"brandCount"',
avgPrice: '"avgPrice"',
name: '"dispensaryName"',
city: 'city',
lastCrawl: 'last_crawl_at',
}[sortBy] || 'product_count';
lastCrawl: '"lastCrawlAt"',
}[sortBy] || '"productCount"';
let whereClause = 'WHERE state = $1';
let whereClause = 'WHERE d.state = $1';
if (!includeInactive) {
whereClause += ` AND crawl_status != 'disabled'`;
// Use stage column instead of crawl_status (which doesn't exist)
whereClause += ` AND (d.stage IS NULL OR d.stage NOT IN ('disabled', 'failing'))`;
}
// Inline query that aggregates store data from dispensaries and store_products
// Works whether or not v_store_state_summary view exists
// Uses 'stage' column instead of 'crawl_status' which doesn't exist in this schema
const result = await this.pool.query(`
SELECT
dispensary_id AS "dispensaryId",
dispensary_name AS "dispensaryName",
dispensary_slug AS "dispensarySlug",
state,
city,
menu_type AS "menuType",
crawl_status AS "crawlStatus",
last_crawl_at AS "lastCrawlAt",
product_count AS "productCount",
in_stock_count AS "inStockCount",
brand_count AS "brandCount",
avg_price AS "avgPrice",
special_count AS "specialCount"
FROM v_store_state_summary
d.id AS "dispensaryId",
d.name AS "dispensaryName",
d.slug AS "dispensarySlug",
d.state,
d.city,
d.menu_type AS "menuType",
d.stage AS "crawlStatus",
d.last_crawl_at AS "lastCrawlAt",
COUNT(DISTINCT sp.id) AS "productCount",
COUNT(DISTINCT CASE WHEN sp.is_in_stock THEN sp.id END) AS "inStockCount",
COUNT(DISTINCT sp.brand_id) AS "brandCount",
ROUND(AVG(sp.price_rec)::numeric, 2) AS "avgPrice",
COUNT(DISTINCT CASE WHEN sp.is_on_special THEN sp.id END) AS "specialCount"
FROM dispensaries d
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
${whereClause}
GROUP BY d.id, d.name, d.slug, d.state, d.city, d.menu_type, d.stage, d.last_crawl_at
ORDER BY ${sortColumn} ${sortDir === 'asc' ? 'ASC' : 'DESC'} NULLS LAST
LIMIT $2 OFFSET $3
`, [state, limit, offset]);
@@ -393,6 +583,7 @@ export class StateQueryService {
/**
* Get price distribution by state
* Uses inline query instead of fn_national_price_comparison for compatibility
*/
async getStorePriceDistribution(
state: string,
@@ -400,44 +591,104 @@ export class StateQueryService {
): Promise<StatePriceDistribution[]> {
const { category, brandId } = options;
// Build WHERE conditions dynamically
const conditions = ['d.state = $1', 'sp.price_rec IS NOT NULL', 'sp.price_rec > 0'];
const params: any[] = [state];
let paramIndex = 2;
if (category) {
conditions.push(`sp.category_raw = $${paramIndex}`);
params.push(category);
paramIndex++;
}
if (brandId) {
conditions.push(`sp.brand_id = $${paramIndex}`);
params.push(brandId);
paramIndex++;
}
const result = await this.pool.query(`
SELECT * FROM fn_national_price_comparison($1, $2)
WHERE state = $3
`, [category || null, brandId || null, state]);
SELECT
d.state,
s.name AS state_name,
COUNT(DISTINCT sp.id) AS product_count,
ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price,
MIN(sp.price_rec) AS min_price,
MAX(sp.price_rec) AS max_price,
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) AS median_price,
ROUND(STDDEV(sp.price_rec)::numeric, 2) AS price_stddev
FROM dispensaries d
JOIN states s ON d.state = s.code
JOIN store_products sp ON d.id = sp.dispensary_id
WHERE ${conditions.join(' AND ')}
GROUP BY d.state, s.name
ORDER BY avg_price DESC
`, params);
return result.rows.map(row => ({
state: row.state,
stateName: row.state_name,
productCount: parseInt(row.product_count),
avgPrice: parseFloat(row.avg_price),
minPrice: parseFloat(row.min_price),
maxPrice: parseFloat(row.max_price),
medianPrice: parseFloat(row.median_price),
priceStddev: parseFloat(row.price_stddev),
productCount: parseInt(row.product_count || '0'),
avgPrice: parseFloat(row.avg_price || '0'),
minPrice: parseFloat(row.min_price || '0'),
maxPrice: parseFloat(row.max_price || '0'),
medianPrice: parseFloat(row.median_price || '0'),
priceStddev: parseFloat(row.price_stddev || '0'),
}));
}
/**
* Get national price comparison across all states
* Uses inline query instead of fn_national_price_comparison for compatibility
*/
async getNationalPriceComparison(
options: { category?: string; brandId?: number } = {}
): Promise<StatePriceDistribution[]> {
const { category, brandId } = options;
// Build WHERE conditions dynamically
const conditions = ['d.state IS NOT NULL', 'sp.price_rec IS NOT NULL', 'sp.price_rec > 0'];
const params: any[] = [];
let paramIndex = 1;
if (category) {
conditions.push(`sp.category_raw = $${paramIndex}`);
params.push(category);
paramIndex++;
}
if (brandId) {
conditions.push(`sp.brand_id = $${paramIndex}`);
params.push(brandId);
paramIndex++;
}
const result = await this.pool.query(`
SELECT * FROM fn_national_price_comparison($1, $2)
`, [category || null, brandId || null]);
SELECT
d.state,
s.name AS state_name,
COUNT(DISTINCT sp.id) AS product_count,
ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price,
MIN(sp.price_rec) AS min_price,
MAX(sp.price_rec) AS max_price,
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) AS median_price,
ROUND(STDDEV(sp.price_rec)::numeric, 2) AS price_stddev
FROM dispensaries d
JOIN states s ON d.state = s.code
JOIN store_products sp ON d.id = sp.dispensary_id
WHERE ${conditions.join(' AND ')}
GROUP BY d.state, s.name
ORDER BY avg_price DESC
`, params);
return result.rows.map(row => ({
state: row.state,
stateName: row.state_name,
productCount: parseInt(row.product_count),
avgPrice: parseFloat(row.avg_price),
minPrice: parseFloat(row.min_price),
maxPrice: parseFloat(row.max_price),
medianPrice: parseFloat(row.median_price),
priceStddev: parseFloat(row.price_stddev),
productCount: parseInt(row.product_count || '0'),
avgPrice: parseFloat(row.avg_price || '0'),
minPrice: parseFloat(row.min_price || '0'),
maxPrice: parseFloat(row.max_price || '0'),
medianPrice: parseFloat(row.median_price || '0'),
priceStddev: parseFloat(row.price_stddev || '0'),
}));
}
@@ -498,7 +749,7 @@ export class StateQueryService {
switch (metric) {
case 'stores':
query = `
SELECT state, state_name AS "stateName", dispensary_count AS value, 'stores' AS label
SELECT state, state_name AS "stateName", COALESCE(dispensary_count, 0) AS value, 'stores' AS label
FROM mv_state_metrics
WHERE state IS NOT NULL
ORDER BY state
@@ -507,7 +758,7 @@ export class StateQueryService {
case 'products':
query = `
SELECT state, state_name AS "stateName", total_products AS value, 'products' AS label
SELECT state, state_name AS "stateName", COALESCE(total_products, 0) AS value, 'products' AS label
FROM mv_state_metrics
WHERE state IS NOT NULL
ORDER BY state
@@ -516,7 +767,7 @@ export class StateQueryService {
case 'brands':
query = `
SELECT state, state_name AS "stateName", unique_brands AS value, 'brands' AS label
SELECT state, state_name AS "stateName", COALESCE(unique_brands, 0) AS value, 'brands' AS label
FROM mv_state_metrics
WHERE state IS NOT NULL
ORDER BY state
@@ -536,10 +787,33 @@ export class StateQueryService {
if (!options.brandId) {
throw new Error('brandId required for penetration heatmap');
}
// Inline query instead of fn_brand_state_penetration function
query = `
SELECT state, state_name AS "stateName", penetration_pct AS value, 'penetration %' AS label
FROM fn_brand_state_penetration($1)
ORDER BY state
WITH state_totals AS (
SELECT d.state, s.name AS state_name, COUNT(DISTINCT d.id) AS total_stores
FROM dispensaries d
JOIN states s ON d.state = s.code
WHERE d.state IS NOT NULL
GROUP BY d.state, s.name
),
brand_presence AS (
SELECT d.state, COUNT(DISTINCT d.id) AS stores_with_brand
FROM store_products sp
JOIN dispensaries d ON sp.dispensary_id = d.id
WHERE (sp.brand_id = $1 OR sp.brand_name_raw = (SELECT name FROM brands WHERE id = $1))
AND d.state IS NOT NULL
GROUP BY d.state
)
SELECT
st.state,
st.state_name AS "stateName",
CASE WHEN st.total_stores > 0
THEN ROUND((COALESCE(bp.stores_with_brand, 0)::numeric / st.total_stores) * 100, 2)
ELSE 0 END AS value,
'penetration %' AS label
FROM state_totals st
LEFT JOIN brand_presence bp ON st.state = bp.state
ORDER BY st.state
`;
params = [options.brandId];
break;
@@ -549,7 +823,14 @@ export class StateQueryService {
}
const result = await this.pool.query(query, params);
return result.rows;
// Parse numeric values from strings (PostgreSQL returns bigint as string)
// Round to 2 decimal places for display
return result.rows.map((row: any) => ({
state: row.state,
stateName: row.stateName,
value: row.value !== null ? Math.round(parseFloat(row.value) * 100) / 100 : 0,
label: row.label,
}));
}
/**

View File

@@ -17,6 +17,7 @@ export interface StateMetrics {
activeStores: number;
totalProducts: number;
inStockProducts: number;
outOfStockProducts: number;
onSpecialProducts: number;
uniqueBrands: number;
uniqueCategories: number;

View File

@@ -159,6 +159,7 @@ export const GRAPHQL_HASHES = {
GetAddressBasedDispensaryData: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
ConsumerDispensaries: '0a5bfa6ca1d64ae47bcccb7c8077c87147cbc4e6982c17ceec97a2a4948b311b',
DispensaryInfo: '13461f73abf7268770dfd05fe7e10c523084b2bb916a929c08efe3d87531977b',
GetAllCitiesByState: 'ae547a0466ace5a48f91e55bf6699eacd87e3a42841560f0c0eabed5a0a920e6',
};
// ============================================================
@@ -212,7 +213,24 @@ const FINGERPRINTS: Fingerprint[] = [
let currentFingerprintIndex = 0;
// Forward declaration for session (actual CrawlSession interface defined later)
let currentSession: {
sessionId: string;
fingerprint: Fingerprint;
proxyUrl: string | null;
stateCode?: string;
timezone?: string;
startedAt: Date;
} | null = null;
/**
* Get current fingerprint - returns session fingerprint if active, otherwise default
*/
export function getFingerprint(): Fingerprint {
// Use session fingerprint if a session is active
if (currentSession) {
return currentSession.fingerprint;
}
return FINGERPRINTS[currentFingerprintIndex];
}
@@ -227,6 +245,103 @@ export function resetFingerprint(): void {
currentFingerprintIndex = 0;
}
/**
* Get a random fingerprint from the pool
*/
export function getRandomFingerprint(): Fingerprint {
const index = Math.floor(Math.random() * FINGERPRINTS.length);
return FINGERPRINTS[index];
}
// ============================================================
// SESSION MANAGEMENT
// Per-session fingerprint rotation for stealth
// ============================================================
export interface CrawlSession {
sessionId: string;
fingerprint: Fingerprint;
proxyUrl: string | null;
stateCode?: string;
timezone?: string;
startedAt: Date;
}
// Note: currentSession variable declared earlier in file for proper scoping
/**
* Timezone to Accept-Language mapping
* US timezones all use en-US but this can be extended for international
*/
const TIMEZONE_TO_LOCALE: Record<string, string> = {
'America/Phoenix': 'en-US,en;q=0.9',
'America/Los_Angeles': 'en-US,en;q=0.9',
'America/Denver': 'en-US,en;q=0.9',
'America/Chicago': 'en-US,en;q=0.9',
'America/New_York': 'en-US,en;q=0.9',
'America/Detroit': 'en-US,en;q=0.9',
'America/Anchorage': 'en-US,en;q=0.9',
'Pacific/Honolulu': 'en-US,en;q=0.9',
};
/**
* Get Accept-Language header for a given timezone
*/
export function getLocaleForTimezone(timezone?: string): string {
if (!timezone) return 'en-US,en;q=0.9';
return TIMEZONE_TO_LOCALE[timezone] || 'en-US,en;q=0.9';
}
/**
* Start a new crawl session with a random fingerprint
* Call this before crawling a store to get a fresh identity
*/
export function startSession(stateCode?: string, timezone?: string): CrawlSession {
const baseFp = getRandomFingerprint();
// Override Accept-Language based on timezone for geographic consistency
const fingerprint: Fingerprint = {
...baseFp,
acceptLanguage: getLocaleForTimezone(timezone),
};
currentSession = {
sessionId: `session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
fingerprint,
proxyUrl: currentProxy,
stateCode,
timezone,
startedAt: new Date(),
};
console.log(`[Dutchie Client] Started session ${currentSession.sessionId}`);
console.log(`[Dutchie Client] Fingerprint: ${fingerprint.userAgent.slice(0, 50)}...`);
console.log(`[Dutchie Client] Accept-Language: ${fingerprint.acceptLanguage}`);
if (timezone) {
console.log(`[Dutchie Client] Timezone: ${timezone}`);
}
return currentSession;
}
/**
* End the current crawl session
*/
export function endSession(): void {
if (currentSession) {
const duration = Math.round((Date.now() - currentSession.startedAt.getTime()) / 1000);
console.log(`[Dutchie Client] Ended session ${currentSession.sessionId} (${duration}s)`);
currentSession = null;
}
}
/**
* Get current active session
*/
export function getCurrentSession(): CrawlSession | null {
return currentSession;
}
// ============================================================
// CURL HTTP CLIENT
// ============================================================
@@ -366,7 +481,7 @@ export function curlGet(url: string, headers: Record<string, string>, timeout =
export interface ExecuteGraphQLOptions {
maxRetries?: number;
retryOn403?: boolean;
cName: string;
cName?: string; // Optional - used for Referer header, defaults to 'cities'
}
/**
@@ -378,7 +493,7 @@ export async function executeGraphQL(
hash: string,
options: ExecuteGraphQLOptions
): Promise<any> {
const { maxRetries = 3, retryOn403 = true, cName } = options;
const { maxRetries = 3, retryOn403 = true, cName = 'cities' } = options;
const body = {
operationName,
@@ -419,7 +534,8 @@ export async function executeGraphQL(
}
if (response.status === 403 && retryOn403) {
console.warn(`[Dutchie Client] 403 blocked - rotating fingerprint...`);
console.warn(`[Dutchie Client] 403 blocked - rotating proxy and fingerprint...`);
await rotateProxyOn403('403 Forbidden on GraphQL');
rotateFingerprint();
attempt++;
await sleep(1000 * attempt);
@@ -502,7 +618,8 @@ export async function fetchPage(
}
if (response.status === 403 && retryOn403) {
console.warn(`[Dutchie Client] 403 blocked - rotating fingerprint...`);
console.warn(`[Dutchie Client] 403 blocked - rotating proxy and fingerprint...`);
await rotateProxyOn403('403 Forbidden on page fetch');
rotateFingerprint();
attempt++;
await sleep(1000 * attempt);

View File

@@ -18,6 +18,13 @@ export {
getFingerprint,
rotateFingerprint,
resetFingerprint,
getRandomFingerprint,
getLocaleForTimezone,
// Session Management (per-store fingerprint rotation)
startSession,
endSession,
getCurrentSession,
// Proxy
setProxy,
@@ -32,6 +39,7 @@ export {
// Types
type CurlResponse,
type Fingerprint,
type CrawlSession,
type ExecuteGraphQLOptions,
type FetchPageOptions,
} from './client';

View File

@@ -0,0 +1,168 @@
/**
* Admin Debug Routes
*
* Debug endpoints for inspecting crawl snapshots and raw payloads.
* Uses canonical store_* tables (not legacy dutchie_* tables).
*/
import { Router, Request, Response } from 'express';
import { authMiddleware } from '../auth/middleware';
import { pool } from '../db/pool';
const router = Router();
router.use(authMiddleware);
/**
* GET /api/admin/debug/stores/:id/snapshots
* List recent snapshots for a store's products
*/
router.get('/stores/:id/snapshots', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { limit = '50', offset = '0' } = req.query;
const dispensaryId = parseInt(id, 10);
const limitNum = Math.min(parseInt(limit as string, 10), 200);
const offsetNum = parseInt(offset as string, 10);
// Get snapshots with product info
const { rows } = await pool.query(`
SELECT
sps.id,
sps.store_product_id as product_id,
COALESCE(sps.name_raw, sp.name_raw, 'Unknown Product') as product_name,
COALESCE(sps.brand_name_raw, sp.brand_name_raw) as brand_name,
sps.captured_at as crawled_at,
COALESCE(sps.stock_status, 'unknown') as stock_status,
sps.price_rec as regular_price,
sps.price_rec_special as sale_price,
sps.raw_data as raw_payload
FROM store_product_snapshots sps
LEFT JOIN store_products sp ON sp.id = sps.store_product_id
WHERE sps.dispensary_id = $1
ORDER BY sps.captured_at DESC
LIMIT $2 OFFSET $3
`, [dispensaryId, limitNum, offsetNum]);
// Get total count
const { rows: countRows } = await pool.query(
`SELECT COUNT(*) as total FROM store_product_snapshots WHERE dispensary_id = $1`,
[dispensaryId]
);
res.json({
snapshots: rows.map((r: any) => ({
id: r.id,
productId: r.product_id,
productName: r.product_name,
brandName: r.brand_name,
crawledAt: r.crawled_at,
stockStatus: r.stock_status,
regularPrice: r.regular_price ? parseFloat(r.regular_price) : null,
salePrice: r.sale_price ? parseFloat(r.sale_price) : null,
rawPayload: r.raw_payload,
})),
total: parseInt(countRows[0]?.total || '0', 10),
limit: limitNum,
offset: offsetNum,
});
} catch (error: any) {
console.error('[AdminDebug] Error fetching store snapshots:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/admin/debug/snapshots/:id/raw-payload
* Get the raw payload for a specific snapshot
*/
router.get('/snapshots/:id/raw-payload', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const snapshotId = parseInt(id, 10);
const { rows } = await pool.query(`
SELECT
sps.id,
sps.store_product_id as product_id,
COALESCE(sps.name_raw, sp.name_raw, 'Unknown Product') as product_name,
sps.dispensary_id,
d.name as dispensary_name,
sps.captured_at as crawled_at,
sps.raw_data as raw_payload
FROM store_product_snapshots sps
LEFT JOIN store_products sp ON sp.id = sps.store_product_id
LEFT JOIN dispensaries d ON d.id = sps.dispensary_id
WHERE sps.id = $1
`, [snapshotId]);
if (rows.length === 0) {
return res.status(404).json({ error: 'Snapshot not found' });
}
const r = rows[0];
res.json({
snapshot: {
id: r.id,
productId: r.product_id,
productName: r.product_name,
dispensaryId: r.dispensary_id,
dispensaryName: r.dispensary_name,
crawledAt: r.crawled_at,
rawPayload: r.raw_payload,
},
});
} catch (error: any) {
console.error('[AdminDebug] Error fetching snapshot raw payload:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/admin/debug/products/:id/raw-payload
* Get raw payload and metadata for a specific product
*/
router.get('/products/:id/raw-payload', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const productId = parseInt(id, 10);
// Query store_products for the product and any raw_payload/metadata
const { rows } = await pool.query(`
SELECT
sp.id,
sp.name_raw as name,
sp.dispensary_id,
d.name as dispensary_name,
sp.raw_payload,
sp.provider_metadata as metadata,
sp.created_at,
sp.updated_at
FROM store_products sp
LEFT JOIN dispensaries d ON d.id = sp.dispensary_id
WHERE sp.id = $1
`, [productId]);
if (rows.length === 0) {
return res.status(404).json({ error: 'Product not found' });
}
const r = rows[0];
res.json({
product: {
id: r.id,
name: r.name,
dispensaryId: r.dispensary_id,
dispensaryName: r.dispensary_name,
rawPayload: r.raw_payload,
metadata: r.metadata,
createdAt: r.created_at,
updatedAt: r.updated_at,
},
});
} catch (error: any) {
console.error('[AdminDebug] Error fetching product raw payload:', error.message);
res.status(500).json({ error: error.message });
}
});
export default router;

View File

@@ -231,6 +231,34 @@ export function createAnalyticsV2Router(pool: Pool): Router {
}
});
/**
* GET /brand/:name/promotions
* Get brand promotional history - tracks specials, discounts, duration, and sales estimates
*
* Query params:
* - window: 7d|30d|90d (default: 90d)
* - state: state code filter (e.g., AZ)
* - category: category filter (e.g., Flower)
*/
router.get('/brand/:name/promotions', async (req: Request, res: Response) => {
try {
const brandName = decodeURIComponent(req.params.name);
const window = parseTimeWindow(req.query.window as string) || '90d';
const stateCode = req.query.state as string | undefined;
const category = req.query.category as string | undefined;
const result = await brandService.getBrandPromotionalHistory(brandName, {
window,
stateCode,
category,
});
res.json(result);
} catch (error) {
console.error('[AnalyticsV2] Brand promotions error:', error);
res.status(500).json({ error: 'Failed to fetch brand promotional history' });
}
});
// ============================================================
// CATEGORY ANALYTICS
// ============================================================
@@ -400,6 +428,31 @@ export function createAnalyticsV2Router(pool: Pool): Router {
}
});
/**
* GET /store/:id/quantity-changes
* Get quantity changes for a store (increases/decreases)
* Useful for estimating sales (decreases) or restocks (increases)
*
* Query params:
* - window: 7d|30d|90d (default: 7d)
* - direction: increase|decrease|all (default: all)
* - limit: number (default: 100)
*/
router.get('/store/:id/quantity-changes', async (req: Request, res: Response) => {
try {
const dispensaryId = parseInt(req.params.id);
const window = parseTimeWindow(req.query.window as string);
const direction = (req.query.direction as 'increase' | 'decrease' | 'all') || 'all';
const limit = req.query.limit ? parseInt(req.query.limit as string) : 100;
const result = await storeService.getQuantityChanges(dispensaryId, { window, direction, limit });
res.json(result);
} catch (error) {
console.error('[AnalyticsV2] Store quantity changes error:', error);
res.status(500).json({ error: 'Failed to fetch store quantity changes' });
}
});
/**
* GET /store/:id/inventory
* Get store inventory composition

View File

@@ -35,11 +35,11 @@ router.get('/overview', async (req, res) => {
// Top products
const topProductsResult = await pool.query(`
SELECT p.id, p.name, p.price, COUNT(c.id) as click_count
SELECT p.id, p.name_raw as name, p.price_rec as price, COUNT(c.id) as click_count
FROM clicks c
JOIN products p ON c.product_id = p.id
JOIN store_products p ON c.product_id = p.id
WHERE c.clicked_at >= NOW() - INTERVAL '${parseInt(days as string)} days'
GROUP BY p.id, p.name, p.price
GROUP BY p.id, p.name_raw, p.price_rec
ORDER BY click_count DESC
LIMIT 10
`);
@@ -109,12 +109,12 @@ router.get('/campaigns/:id', async (req, res) => {
// Clicks by product in this campaign
const byProductResult = await pool.query(`
SELECT p.id, p.name, COUNT(c.id) as clicks
SELECT p.id, p.name_raw as name, COUNT(c.id) as clicks
FROM clicks c
JOIN products p ON c.product_id = p.id
JOIN store_products p ON c.product_id = p.id
WHERE c.campaign_id = $1
AND c.clicked_at >= NOW() - INTERVAL '${parseInt(days as string)} days'
GROUP BY p.id, p.name
GROUP BY p.id, p.name_raw
ORDER BY clicks DESC
`, [id]);

View File

@@ -154,7 +154,7 @@ router.patch('/:id/toggle', requireRole('superadmin', 'admin'), async (req, res)
const result = await pool.query(`
UPDATE wp_dutchie_api_permissions
SET is_active = NOT is_active
SET is_active = CASE WHEN is_active = 1 THEN 0 ELSE 1 END
WHERE id = $1
RETURNING *
`, [id]);

View File

@@ -37,8 +37,22 @@ router.get('/:id', async (req, res) => {
}
const productsResult = await pool.query(`
SELECT p.*, cp.display_order
FROM products p
SELECT
p.id,
p.dispensary_id,
p.name_raw as name,
p.brand_name_raw as brand,
p.category_raw as category,
p.subcategory_raw as subcategory,
p.price_rec as price,
p.thc_percent,
p.cbd_percent,
p.strain_type,
p.primary_image_url as image_url,
p.stock_status,
p.is_in_stock as in_stock,
cp.display_order
FROM store_products p
JOIN campaign_products cp ON p.id = cp.product_id
WHERE cp.campaign_id = $1
ORDER BY cp.display_order

View File

@@ -5,31 +5,35 @@ import { pool } from '../db/pool';
const router = Router();
router.use(authMiddleware);
// Get categories (flat list)
// Get categories (flat list) - derived from actual product data
router.get('/', async (req, res) => {
try {
const { store_id } = req.query;
const { store_id, in_stock_only } = req.query;
let query = `
SELECT
c.*,
COUNT(DISTINCT p.id) as product_count,
pc.name as parent_name
FROM categories c
LEFT JOIN products p ON c.id = p.category_id
LEFT JOIN categories pc ON c.parent_id = pc.id
category_raw as name,
category_raw as slug,
COUNT(*) as product_count,
COUNT(*) FILTER (WHERE is_in_stock = true) as in_stock_count
FROM store_products
WHERE category_raw IS NOT NULL
`;
const params: any[] = [];
if (store_id) {
query += ' WHERE c.store_id = $1';
params.push(store_id);
query += ` AND dispensary_id = $${params.length}`;
}
if (in_stock_only === 'true') {
query += ` AND is_in_stock = true`;
}
query += `
GROUP BY c.id, pc.name
ORDER BY c.display_order, c.name
GROUP BY category_raw
ORDER BY category_raw
`;
const result = await pool.query(query, params);
@@ -40,49 +44,85 @@ router.get('/', async (req, res) => {
}
});
// Get category tree (hierarchical)
// Get category tree (hierarchical) - category -> subcategory structure from product data
router.get('/tree', async (req, res) => {
try {
const { store_id } = req.query;
const { store_id, in_stock_only } = req.query;
if (!store_id) {
return res.status(400).json({ error: 'store_id is required' });
}
// Get all categories for the store
const result = await pool.query(`
// Get category + subcategory combinations with counts
let query = `
SELECT
c.*,
COUNT(DISTINCT p.id) as product_count
FROM categories c
LEFT JOIN products p ON c.id = p.category_id AND p.in_stock = true
WHERE c.store_id = $1
GROUP BY c.id
ORDER BY c.display_order, c.name
`, [store_id]);
category_raw as category,
subcategory_raw as subcategory,
COUNT(*) as product_count,
COUNT(*) FILTER (WHERE is_in_stock = true) as in_stock_count
FROM store_products
WHERE category_raw IS NOT NULL
`;
// Build tree structure
const categories = result.rows;
const categoryMap = new Map();
const tree: any[] = [];
const params: any[] = [];
// First pass: create map
categories.forEach((cat: { id: number; parent_id?: number }) => {
categoryMap.set(cat.id, { ...cat, children: [] });
});
// Second pass: build tree
categories.forEach((cat: { id: number; parent_id?: number }) => {
const node = categoryMap.get(cat.id);
if (cat.parent_id) {
const parent = categoryMap.get(cat.parent_id);
if (parent) {
parent.children.push(node);
if (store_id) {
params.push(store_id);
query += ` AND dispensary_id = $${params.length}`;
}
} else {
tree.push(node);
if (in_stock_only === 'true') {
query += ` AND is_in_stock = true`;
}
query += `
GROUP BY category_raw, subcategory_raw
ORDER BY category_raw, subcategory_raw
`;
const result = await pool.query(query, params);
// Build tree structure: category -> subcategories
const categoryMap = new Map<string, {
name: string;
slug: string;
product_count: number;
in_stock_count: number;
subcategories: Array<{
name: string;
slug: string;
product_count: number;
in_stock_count: number;
}>;
}>();
for (const row of result.rows) {
const category = row.category;
const subcategory = row.subcategory;
const count = parseInt(row.product_count);
const inStockCount = parseInt(row.in_stock_count);
if (!categoryMap.has(category)) {
categoryMap.set(category, {
name: category,
slug: category.toLowerCase().replace(/\s+/g, '-'),
product_count: 0,
in_stock_count: 0,
subcategories: []
});
}
const cat = categoryMap.get(category)!;
cat.product_count += count;
cat.in_stock_count += inStockCount;
if (subcategory) {
cat.subcategories.push({
name: subcategory,
slug: subcategory.toLowerCase().replace(/\s+/g, '-'),
product_count: count,
in_stock_count: inStockCount
});
}
}
const tree = Array.from(categoryMap.values());
res.json({ tree });
} catch (error) {
@@ -91,4 +131,91 @@ router.get('/tree', async (req, res) => {
}
});
// Get all unique subcategories for a category
router.get('/:category/subcategories', async (req, res) => {
try {
const { category } = req.params;
const { store_id, in_stock_only } = req.query;
let query = `
SELECT
subcategory_raw as name,
subcategory_raw as slug,
COUNT(*) as product_count,
COUNT(*) FILTER (WHERE is_in_stock = true) as in_stock_count
FROM store_products
WHERE category_raw = $1
AND subcategory_raw IS NOT NULL
`;
const params: any[] = [category];
if (store_id) {
params.push(store_id);
query += ` AND dispensary_id = $${params.length}`;
}
if (in_stock_only === 'true') {
query += ` AND is_in_stock = true`;
}
query += `
GROUP BY subcategory_raw
ORDER BY subcategory_raw
`;
const result = await pool.query(query, params);
res.json({
category,
subcategories: result.rows
});
} catch (error) {
console.error('Error fetching subcategories:', error);
res.status(500).json({ error: 'Failed to fetch subcategories' });
}
});
// Get global category summary (across all stores)
router.get('/summary', async (req, res) => {
try {
const { state } = req.query;
let query = `
SELECT
sp.category_raw as category,
COUNT(DISTINCT sp.id) as product_count,
COUNT(DISTINCT sp.dispensary_id) as store_count,
COUNT(*) FILTER (WHERE sp.is_in_stock = true) as in_stock_count
FROM store_products sp
`;
const params: any[] = [];
if (state) {
query += `
JOIN dispensaries d ON sp.dispensary_id = d.id
WHERE sp.category_raw IS NOT NULL
AND d.state = $1
`;
params.push(state);
} else {
query += ` WHERE sp.category_raw IS NOT NULL`;
}
query += `
GROUP BY sp.category_raw
ORDER BY product_count DESC
`;
const result = await pool.query(query, params);
res.json({
categories: result.rows,
total_categories: result.rows.length
});
} catch (error) {
console.error('Error fetching category summary:', error);
res.status(500).json({ error: 'Failed to fetch category summary' });
}
});
export default router;

View File

@@ -92,9 +92,9 @@ router.get('/brands', async (req: Request, res: Response) => {
if (brandIds.length > 0) {
const brandNamesResult = await pool.query(`
SELECT DISTINCT brand_name
FROM dutchie_products
WHERE brand_name = ANY($1)
SELECT DISTINCT brand_name_raw as brand_name
FROM store_products
WHERE brand_name_raw = ANY($1)
`, [brandIds]);
brandNamesResult.rows.forEach(r => {
@@ -201,14 +201,14 @@ router.get('/products', async (req: Request, res: Response) => {
// Try to match by external_id or id
const productDetailsResult = await pool.query(`
SELECT
external_id,
provider_product_id as external_id,
id::text as product_id,
name,
brand_name,
type,
subcategory
FROM dutchie_products
WHERE external_id = ANY($1) OR id::text = ANY($1)
name_raw as name,
brand_name_raw as brand_name,
category_raw as type,
subcategory_raw as subcategory
FROM store_products
WHERE provider_product_id = ANY($1) OR id::text = ANY($1)
`, [productIds]);
productDetailsResult.rows.forEach(r => {

View File

@@ -26,10 +26,10 @@ router.get('/stats', async (req, res) => {
COUNT(*) as total,
COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock,
COUNT(*) FILTER (WHERE primary_image_url IS NOT NULL) as with_images,
COUNT(DISTINCT brand_name) FILTER (WHERE brand_name IS NOT NULL AND brand_name != '') as unique_brands,
COUNT(DISTINCT brand_name_raw) FILTER (WHERE brand_name_raw IS NOT NULL AND brand_name_raw != '') as unique_brands,
COUNT(DISTINCT dispensary_id) as dispensaries_with_products,
COUNT(*) FILTER (WHERE created_at >= NOW() - INTERVAL '24 hours') as new_products_24h
FROM dutchie_products
FROM store_products
)
SELECT
ds.total as store_total, ds.active as store_active,
@@ -96,25 +96,25 @@ router.get('/activity', async (req, res) => {
const scrapesResult = await pool.query(`
SELECT
d.name,
d.last_crawled_at as last_scraped_at,
d.last_crawl_at as last_scraped_at,
d.product_count
FROM dispensaries d
WHERE d.last_crawled_at IS NOT NULL
ORDER BY d.last_crawled_at DESC
WHERE d.last_crawl_at IS NOT NULL
ORDER BY d.last_crawl_at DESC
LIMIT $1
`, [limit]);
// Recent products from dutchie_products
// Recent products from store_products (canonical)
const productsResult = await pool.query(`
SELECT
p.name,
0 as price,
p.brand_name as brand,
p.thc as thc_percentage,
p.cbd as cbd_percentage,
p.name_raw as name,
p.price_rec as price,
p.brand_name_raw as brand,
p.thc_percent as thc_percentage,
p.cbd_percent as cbd_percentage,
d.name as store_name,
p.created_at as first_seen_at
FROM dutchie_products p
FROM store_products p
JOIN dispensaries d ON p.dispensary_id = d.id
ORDER BY p.created_at DESC
LIMIT $1

View File

@@ -0,0 +1,269 @@
import { Router, Request, Response } from 'express';
import axios from 'axios';
const router = Router();
// Woodpecker API config - uses env vars or falls back
const WOODPECKER_SERVER = process.env.WOODPECKER_SERVER || 'https://ci.cannabrands.app';
const WOODPECKER_TOKEN = process.env.WOODPECKER_TOKEN;
const GITEA_SERVER = process.env.GITEA_SERVER || 'https://code.cannabrands.app';
const GITEA_TOKEN = process.env.GITEA_TOKEN;
const REPO_OWNER = 'Creationshop';
const REPO_NAME = 'dispensary-scraper';
interface PipelineStep {
name: string;
state: 'pending' | 'running' | 'success' | 'failure' | 'skipped';
started?: number;
stopped?: number;
}
interface PipelineInfo {
number: number;
status: string;
event: string;
branch: string;
message: string;
commit: string;
author: string;
created: number;
started?: number;
finished?: number;
steps?: PipelineStep[];
}
interface DeployStatusResponse {
running: {
sha: string;
sha_full: string;
build_time: string;
image_tag: string;
};
latest: {
sha: string;
sha_full: string;
message: string;
author: string;
timestamp: string;
} | null;
is_latest: boolean;
commits_behind: number;
pipeline: PipelineInfo | null;
error?: string;
}
/**
* Fetch latest commit from Gitea
*/
async function getLatestCommit(): Promise<{
sha: string;
message: string;
author: string;
timestamp: string;
} | null> {
if (!GITEA_TOKEN) {
console.warn('[DeployStatus] GITEA_TOKEN not set, skipping latest commit fetch');
return null;
}
try {
const response = await axios.get(
`${GITEA_SERVER}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/commits?limit=1`,
{
headers: { Authorization: `token ${GITEA_TOKEN}` },
timeout: 5000,
}
);
if (response.data && response.data.length > 0) {
const commit = response.data[0];
return {
sha: commit.sha,
message: commit.commit?.message?.split('\n')[0] || '',
author: commit.commit?.author?.name || commit.author?.login || 'unknown',
timestamp: commit.commit?.author?.date || commit.created,
};
}
} catch (error: any) {
console.error('[DeployStatus] Failed to fetch latest commit:', error.message);
}
return null;
}
/**
* Fetch latest pipeline from Woodpecker
*/
async function getLatestPipeline(): Promise<PipelineInfo | null> {
if (!WOODPECKER_TOKEN) {
console.warn('[DeployStatus] WOODPECKER_TOKEN not set, skipping pipeline fetch');
return null;
}
try {
// Get latest pipeline
const listResponse = await axios.get(
`${WOODPECKER_SERVER}/api/repos/${REPO_OWNER}/${REPO_NAME}/pipelines?page=1&per_page=1`,
{
headers: { Authorization: `Bearer ${WOODPECKER_TOKEN}` },
timeout: 5000,
}
);
if (!listResponse.data || listResponse.data.length === 0) {
return null;
}
const pipeline = listResponse.data[0];
// Get pipeline steps
let steps: PipelineStep[] = [];
try {
const stepsResponse = await axios.get(
`${WOODPECKER_SERVER}/api/repos/${REPO_OWNER}/${REPO_NAME}/pipelines/${pipeline.number}`,
{
headers: { Authorization: `Bearer ${WOODPECKER_TOKEN}` },
timeout: 5000,
}
);
if (stepsResponse.data?.workflows) {
for (const workflow of stepsResponse.data.workflows) {
if (workflow.children) {
for (const step of workflow.children) {
steps.push({
name: step.name,
state: step.state,
started: step.start_time,
stopped: step.end_time,
});
}
}
}
}
} catch (stepError) {
// Steps fetch failed, continue without them
}
return {
number: pipeline.number,
status: pipeline.status,
event: pipeline.event,
branch: pipeline.branch,
message: pipeline.message?.split('\n')[0] || '',
commit: pipeline.commit?.slice(0, 8) || '',
author: pipeline.author || 'unknown',
created: pipeline.created_at,
started: pipeline.started_at,
finished: pipeline.finished_at,
steps,
};
} catch (error: any) {
console.error('[DeployStatus] Failed to fetch pipeline:', error.message);
}
return null;
}
/**
* Count commits between two SHAs
*/
async function countCommitsBetween(fromSha: string, toSha: string): Promise<number> {
if (!GITEA_TOKEN || !fromSha || !toSha) return 0;
if (fromSha === toSha) return 0;
try {
const response = await axios.get(
`${GITEA_SERVER}/api/v1/repos/${REPO_OWNER}/${REPO_NAME}/commits?sha=${toSha}&limit=50`,
{
headers: { Authorization: `token ${GITEA_TOKEN}` },
timeout: 5000,
}
);
if (response.data) {
const commits = response.data;
for (let i = 0; i < commits.length; i++) {
if (commits[i].sha.startsWith(fromSha)) {
return i;
}
}
// If not found in first 50, assume more than 50 behind
return commits.length;
}
} catch (error: any) {
console.error('[DeployStatus] Failed to count commits:', error.message);
}
return 0;
}
/**
* GET /api/admin/deploy-status
* Returns deployment status with version comparison and CI info
*/
router.get('/', async (req: Request, res: Response) => {
try {
// Get running version from env vars (set during Docker build)
const runningSha = process.env.APP_GIT_SHA || 'unknown';
const running = {
sha: runningSha.slice(0, 8),
sha_full: runningSha,
build_time: process.env.APP_BUILD_TIME || new Date().toISOString(),
image_tag: process.env.CONTAINER_IMAGE_TAG?.slice(0, 8) || 'local',
};
// Fetch latest commit and pipeline in parallel
const [latestCommit, pipeline] = await Promise.all([
getLatestCommit(),
getLatestPipeline(),
]);
// Build latest info
const latest = latestCommit ? {
sha: latestCommit.sha.slice(0, 8),
sha_full: latestCommit.sha,
message: latestCommit.message,
author: latestCommit.author,
timestamp: latestCommit.timestamp,
} : null;
// Determine if running latest
const isLatest = latest
? runningSha.startsWith(latest.sha_full.slice(0, 8)) ||
latest.sha_full.startsWith(runningSha.slice(0, 8))
: true;
// Count commits behind
const commitsBehind = isLatest
? 0
: await countCommitsBetween(runningSha, latest?.sha_full || '');
const response: DeployStatusResponse = {
running,
latest,
is_latest: isLatest,
commits_behind: commitsBehind,
pipeline,
};
res.json(response);
} catch (error: any) {
console.error('[DeployStatus] Error:', error);
res.status(500).json({
error: error.message,
running: {
sha: process.env.APP_GIT_SHA?.slice(0, 8) || 'unknown',
sha_full: process.env.APP_GIT_SHA || 'unknown',
build_time: process.env.APP_BUILD_TIME || 'unknown',
image_tag: process.env.CONTAINER_IMAGE_TAG?.slice(0, 8) || 'local',
},
latest: null,
is_latest: true,
commits_behind: 0,
pipeline: null,
});
}
});
export default router;

View File

@@ -8,32 +8,51 @@ router.use(authMiddleware);
// Valid menu_type values
const VALID_MENU_TYPES = ['dutchie', 'treez', 'jane', 'weedmaps', 'leafly', 'meadow', 'blaze', 'flowhub', 'dispense', 'cova', 'other', 'unknown'];
// Get all dispensaries
// Get all dispensaries (with pagination)
router.get('/', async (req, res) => {
try {
const { menu_type, city, state } = req.query;
const { menu_type, city, state, crawl_enabled, dutchie_verified, status, limit, offset, search } = req.query;
const pageLimit = Math.min(parseInt(limit as string) || 50, 500);
const pageOffset = parseInt(offset as string) || 0;
let query = `
SELECT
id,
name,
company_name,
slug,
address,
address1,
address2,
city,
state,
zip,
zipcode,
phone,
website,
email,
dba_name,
latitude,
longitude,
timezone,
menu_url,
menu_type,
platform,
platform_dispensary_id,
c_name,
chain_slug,
enterprise_id,
description,
logo_image,
banner_image,
offer_pickup,
offer_delivery,
offer_curbside_pickup,
is_medical,
is_recreational,
status,
country,
product_count,
last_crawl_at,
crawl_enabled,
dutchie_verified,
created_at,
updated_at
FROM dispensaries
@@ -48,10 +67,10 @@ router.get('/', async (req, res) => {
params.push(menu_type);
}
// Filter by city if provided
// Filter by city if provided (supports partial match)
if (city) {
conditions.push(`city ILIKE $${params.length + 1}`);
params.push(city);
params.push(`%${city}%`);
}
// Filter by state if provided
@@ -60,15 +79,61 @@ router.get('/', async (req, res) => {
params.push(state);
}
if (conditions.length > 0) {
query += ` WHERE ${conditions.join(' AND ')}`;
// Filter by crawl_enabled - defaults to showing only enabled
if (crawl_enabled === 'false' || crawl_enabled === '0') {
// Explicitly show disabled only
conditions.push(`(crawl_enabled = false OR crawl_enabled IS NULL)`);
} else if (crawl_enabled === 'all') {
// Show all (no filter)
} else {
// Default: show only enabled
conditions.push(`crawl_enabled = true`);
}
// Filter by dutchie_verified if provided
if (dutchie_verified !== undefined) {
const verified = dutchie_verified === 'true' || dutchie_verified === '1';
if (verified) {
conditions.push(`dutchie_verified = true`);
} else {
conditions.push(`(dutchie_verified = false OR dutchie_verified IS NULL)`);
}
}
// Filter by status (e.g., 'dropped', 'open', 'closed')
if (status) {
conditions.push(`status = $${params.length + 1}`);
params.push(status);
}
// Search filter (name, dba_name, city, company_name)
if (search) {
conditions.push(`(name ILIKE $${params.length + 1} OR dba_name ILIKE $${params.length + 1} OR city ILIKE $${params.length + 1})`);
params.push(`%${search}%`);
}
// Build WHERE clause
const whereClause = conditions.length > 0 ? ` WHERE ${conditions.join(' AND ')}` : '';
// Get total count first
const countResult = await pool.query(`SELECT COUNT(*) FROM dispensaries${whereClause}`, params);
const total = parseInt(countResult.rows[0].count);
// Add pagination
query += whereClause;
query += ` ORDER BY name`;
query += ` LIMIT $${params.length + 1} OFFSET $${params.length + 2}`;
params.push(pageLimit, pageOffset);
const result = await pool.query(query, params);
res.json({ dispensaries: result.rows });
res.json({
dispensaries: result.rows,
total,
limit: pageLimit,
offset: pageOffset,
hasMore: pageOffset + result.rows.length < total
});
} catch (error) {
console.error('Error fetching dispensaries:', error);
res.status(500).json({ error: 'Failed to fetch dispensaries' });
@@ -91,6 +156,75 @@ router.get('/stats/menu-types', async (req, res) => {
}
});
// Get crawl status stats
router.get('/stats/crawl-status', async (req, res) => {
try {
const { state, city } = req.query;
let query = `
SELECT
COUNT(*) FILTER (WHERE crawl_enabled = true) as enabled_count,
COUNT(*) FILTER (WHERE crawl_enabled = false OR crawl_enabled IS NULL) as disabled_count,
COUNT(*) FILTER (WHERE dutchie_verified = true) as verified_count,
COUNT(*) FILTER (WHERE dutchie_verified = false OR dutchie_verified IS NULL) as unverified_count,
COUNT(*) FILTER (WHERE status = 'dropped') as dropped_count,
COUNT(*) as total_count
FROM dispensaries
`;
const params: any[] = [];
const conditions: string[] = [];
if (state) {
conditions.push(`state = $${params.length + 1}`);
params.push(state);
}
if (city) {
conditions.push(`city ILIKE $${params.length + 1}`);
params.push(`%${city}%`);
}
if (conditions.length > 0) {
query += ` WHERE ${conditions.join(' AND ')}`;
}
const result = await pool.query(query, params);
res.json(result.rows[0]);
} catch (error) {
console.error('Error fetching crawl status stats:', error);
res.status(500).json({ error: 'Failed to fetch crawl status stats' });
}
});
// Get dropped stores count (for dashboard alert)
router.get('/stats/dropped', async (req, res) => {
try {
const result = await pool.query(`
SELECT
COUNT(*) as dropped_count,
json_agg(json_build_object(
'id', id,
'name', name,
'city', city,
'state', state,
'dropped_at', updated_at
) ORDER BY updated_at DESC) FILTER (WHERE status = 'dropped') as dropped_stores
FROM dispensaries
WHERE status = 'dropped'
`);
const row = result.rows[0];
res.json({
dropped_count: parseInt(row.dropped_count) || 0,
dropped_stores: row.dropped_stores || []
});
} catch (error) {
console.error('Error fetching dropped stores:', error);
res.status(500).json({ error: 'Failed to fetch dropped stores' });
}
});
// Get single dispensary by slug or ID
router.get('/:slugOrId', async (req, res) => {
try {
@@ -101,21 +235,36 @@ router.get('/:slugOrId', async (req, res) => {
SELECT
id,
name,
company_name,
slug,
address,
address1,
address2,
city,
state,
zip,
zipcode,
phone,
website,
email,
dba_name,
latitude,
longitude,
timezone,
menu_url,
menu_type,
platform,
platform_dispensary_id,
c_name,
chain_slug,
enterprise_id,
description,
logo_image,
banner_image,
offer_pickup,
offer_delivery,
offer_curbside_pickup,
is_medical,
is_recreational,
status,
country,
product_count,
last_crawl_at,
raw_metadata,
@@ -143,19 +292,34 @@ router.put('/:id', async (req, res) => {
const {
name,
dba_name,
company_name,
website,
phone,
address,
email,
address1,
address2,
city,
state,
zip,
zipcode,
latitude,
longitude,
timezone,
menu_url,
menu_type,
platform,
platform_dispensary_id,
c_name,
chain_slug,
enterprise_id,
description,
logo_image,
banner_image,
offer_pickup,
offer_delivery,
offer_curbside_pickup,
is_medical,
is_recreational,
status,
country,
slug,
} = req.body;
@@ -171,39 +335,69 @@ router.put('/:id', async (req, res) => {
SET
name = COALESCE($1, name),
dba_name = COALESCE($2, dba_name),
company_name = COALESCE($3, company_name),
website = COALESCE($4, website),
phone = COALESCE($5, phone),
address = COALESCE($6, address),
city = COALESCE($7, city),
state = COALESCE($8, state),
zip = COALESCE($9, zip),
latitude = COALESCE($10, latitude),
longitude = COALESCE($11, longitude),
menu_url = COALESCE($12, menu_url),
menu_type = COALESCE($13, menu_type),
platform = COALESCE($14, platform),
platform_dispensary_id = COALESCE($15, platform_dispensary_id),
slug = COALESCE($16, slug),
website = COALESCE($3, website),
phone = COALESCE($4, phone),
email = COALESCE($5, email),
address1 = COALESCE($6, address1),
address2 = COALESCE($7, address2),
city = COALESCE($8, city),
state = COALESCE($9, state),
zipcode = COALESCE($10, zipcode),
latitude = COALESCE($11, latitude),
longitude = COALESCE($12, longitude),
timezone = COALESCE($13, timezone),
menu_url = COALESCE($14, menu_url),
menu_type = COALESCE($15, menu_type),
platform = COALESCE($16, platform),
platform_dispensary_id = COALESCE($17, platform_dispensary_id),
c_name = COALESCE($18, c_name),
chain_slug = COALESCE($19, chain_slug),
enterprise_id = COALESCE($20, enterprise_id),
description = COALESCE($21, description),
logo_image = COALESCE($22, logo_image),
banner_image = COALESCE($23, banner_image),
offer_pickup = COALESCE($24, offer_pickup),
offer_delivery = COALESCE($25, offer_delivery),
offer_curbside_pickup = COALESCE($26, offer_curbside_pickup),
is_medical = COALESCE($27, is_medical),
is_recreational = COALESCE($28, is_recreational),
status = COALESCE($29, status),
country = COALESCE($30, country),
slug = COALESCE($31, slug),
updated_at = CURRENT_TIMESTAMP
WHERE id = $17
WHERE id = $32
RETURNING *
`, [
name,
dba_name,
company_name,
website,
phone,
address,
email,
address1,
address2,
city,
state,
zip,
zipcode,
latitude,
longitude,
timezone,
menu_url,
menu_type,
platform,
platform_dispensary_id,
c_name,
chain_slug,
enterprise_id,
description,
logo_image,
banner_image,
offer_pickup,
offer_delivery,
offer_curbside_pickup,
is_medical,
is_recreational,
status,
country,
slug,
id
]);
@@ -236,40 +430,42 @@ router.get('/:slug/products', async (req, res) => {
const dispensaryId = dispensaryResult.rows[0].id;
// Build query for products
// Build query for products using canonical store_products table
let query = `
SELECT
p.id,
p.name,
p.brand,
p.variant,
p.slug,
p.description,
p.regular_price,
p.sale_price,
p.thc_percentage,
p.cbd_percentage,
p.strain_type,
p.terpenes,
p.effects,
p.flavors,
p.image_url,
p.dutchie_url,
p.in_stock,
p.created_at,
p.updated_at
FROM products p
WHERE p.dispensary_id = $1
sp.id,
sp.name_raw as name,
sp.brand_name_raw as brand,
sp.description,
COALESCE(sp.stock_quantity, sp.total_quantity_available, 0) as quantity,
sp.price_rec as regular_price,
CASE WHEN sp.price_rec_special IS NOT NULL AND sp.price_rec_special > 0
THEN sp.price_rec_special
ELSE NULL END as sale_price,
sp.thc_percent as thc_percentage,
sp.cbd_percent as cbd_percentage,
sp.strain_type,
sp.effects,
sp.primary_image_url as image_url,
sp.stock_status,
sp.stock_status = 'in_stock' as in_stock,
sp.is_on_special as on_special,
sp.category_raw as category,
sp.subcategory_raw as subcategory,
sp.created_at,
sp.updated_at
FROM store_products sp
WHERE sp.dispensary_id = $1
`;
const params: any[] = [dispensaryId];
if (category) {
query += ` AND p.category = $2`;
query += ` AND sp.category_raw = $2`;
params.push(category);
}
query += ` ORDER BY p.created_at DESC`;
query += ` ORDER BY sp.name_raw ASC`;
const result = await pool.query(query, params);
@@ -297,23 +493,23 @@ router.get('/:slug/brands', async (req, res) => {
const dispensaryId = dispensaryResult.rows[0].id;
// Build query with optional search filter
// Build query with optional search filter using canonical tables
let query = `
SELECT DISTINCT
brand,
brand_name as brand,
COUNT(*) as product_count
FROM products
WHERE dispensary_id = $1 AND brand IS NOT NULL
FROM v_products
WHERE dispensary_id = $1 AND brand_name IS NOT NULL
`;
const params: any[] = [dispensaryId];
// Add search filter if provided
if (search) {
query += ` AND brand ILIKE $2`;
query += ` AND brand_name ILIKE $2`;
params.push(`%${search}%`);
}
query += ` GROUP BY brand ORDER BY product_count DESC, brand ASC`;
query += ` GROUP BY brand_name ORDER BY product_count DESC, brand_name ASC`;
const result = await pool.query(query, params);
@@ -341,44 +537,48 @@ router.get('/:slug/specials', async (req, res) => {
const dispensaryId = dispensaryResult.rows[0].id;
// Build query to get products with discounts
// Build query to get products with specials/discounts using canonical tables
let query = `
SELECT
p.id,
p.name,
p.brand,
p.variant,
p.slug,
p.description,
p.regular_price,
p.sale_price,
p.discount_type,
p.discount_value,
p.thc_percentage,
p.cbd_percentage,
p.brand_name as brand,
p.subcategory as variant,
sp.description,
COALESCE(snap.rec_min_price_cents, 0)::numeric / 100.0 as regular_price,
snap.rec_min_special_price_cents::numeric / 100.0 as sale_price,
snap.discount_percent,
p.thc as thc_percentage,
p.cbd as cbd_percentage,
p.strain_type,
p.terpenes,
p.effects,
p.flavors,
p.image_url,
p.dutchie_url,
p.in_stock,
sp.effects,
p.primary_image_url as image_url,
p.stock_status = 'in_stock' as in_stock,
p.stock_status,
true as on_special,
p.created_at,
p.updated_at
FROM products p
FROM v_products p
JOIN store_products sp ON sp.id = p.id
INNER JOIN LATERAL (
SELECT rec_min_price_cents, rec_min_special_price_cents, discount_percent, special
FROM v_product_snapshots vps
WHERE vps.store_product_id = p.id
AND (vps.special = true OR vps.rec_min_special_price_cents > 0)
ORDER BY vps.crawled_at DESC
LIMIT 1
) snap ON true
WHERE p.dispensary_id = $1
AND p.discount_type IS NOT NULL
AND p.discount_value IS NOT NULL
`;
const params: any[] = [dispensaryId];
// Add search filter if provided
if (search) {
query += ` AND (p.name ILIKE $2 OR p.brand ILIKE $2 OR p.description ILIKE $2)`;
query += ` AND (p.name ILIKE $2 OR p.brand_name ILIKE $2 OR sp.description ILIKE $2)`;
params.push(`%${search}%`);
}
query += ` ORDER BY p.created_at DESC`;
query += ` ORDER BY p.updated_at DESC`;
const result = await pool.query(query, params);

View File

@@ -22,11 +22,17 @@ interface ProductClickEventPayload {
store_id?: string;
brand_id?: string;
campaign_id?: string;
dispensary_name?: string;
action: 'view' | 'open_store' | 'open_product' | 'compare' | 'other';
source: string;
page_type?: string; // Page where event occurred (e.g., StoreDetailPage, BrandsIntelligence)
url_path?: string; // URL path for debugging
occurred_at?: string;
// Visitor location (from frontend IP geolocation)
visitor_city?: string;
visitor_state?: string;
visitor_lat?: number;
visitor_lng?: number;
}
/**
@@ -77,13 +83,14 @@ router.post('/product-click', optionalAuthMiddleware, async (req: Request, res:
// Insert the event with enhanced fields
await pool.query(
`INSERT INTO product_click_events
(product_id, store_id, brand_id, campaign_id, action, source, user_id, ip_address, user_agent, occurred_at, event_type, page_type, url_path, device_type)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)`,
(product_id, store_id, brand_id, campaign_id, dispensary_name, action, source, user_id, ip_address, user_agent, occurred_at, event_type, page_type, url_path, device_type, visitor_city, visitor_state, visitor_lat, visitor_lng)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)`,
[
payload.product_id,
payload.store_id || null,
payload.brand_id || null,
payload.campaign_id || null,
payload.dispensary_name || null,
payload.action,
payload.source,
userId,
@@ -93,7 +100,11 @@ router.post('/product-click', optionalAuthMiddleware, async (req: Request, res:
'product_click', // event_type
payload.page_type || null,
payload.url_path || null,
deviceType
deviceType,
payload.visitor_city || null,
payload.visitor_state || null,
payload.visitor_lat || null,
payload.visitor_lng || null
]
);

View File

@@ -45,6 +45,8 @@ interface ApiHealth extends HealthStatus {
uptime: number;
timestamp: string;
version: string;
build_sha: string | null;
build_time: string | null;
}
interface DbHealth extends HealthStatus {
@@ -113,6 +115,8 @@ async function getApiHealth(): Promise<ApiHealth> {
uptime: Math.floor((Date.now() - serverStartTime) / 1000),
timestamp: new Date().toISOString(),
version: packageVersion,
build_sha: process.env.APP_GIT_SHA && process.env.APP_GIT_SHA !== 'unknown' ? process.env.APP_GIT_SHA : null,
build_time: process.env.APP_BUILD_TIME && process.env.APP_BUILD_TIME !== 'unknown' ? process.env.APP_BUILD_TIME : null,
};
}
@@ -138,14 +142,16 @@ async function getDbHealth(): Promise<DbHealth> {
async function getRedisHealth(): Promise<RedisHealth> {
const start = Date.now();
const isLocal = process.env.NODE_ENV === 'development' || process.env.NODE_ENV === 'local' || !process.env.NODE_ENV;
// Check if Redis is configured
if (!process.env.REDIS_URL && !process.env.REDIS_HOST) {
// Redis is optional in local dev, required in prod/staging
return {
status: 'ok', // Redis is optional
status: isLocal ? 'ok' : 'error',
connected: false,
latency_ms: 0,
error: 'Redis not configured',
error: isLocal ? 'Redis not configured (optional in local)' : 'Redis not configured (required in production)',
};
}

View File

@@ -0,0 +1,214 @@
/**
* Image Proxy Route
*
* On-demand image resizing service. Serves images with URL-based transforms.
*
* Usage:
* /img/<path>?w=200&h=200&q=80&fit=cover
*
* Parameters:
* w - width (pixels)
* h - height (pixels)
* q - quality (1-100, default 80)
* fit - resize fit: cover, contain, fill, inside, outside (default: inside)
* blur - blur sigma (0.3-1000)
* gray - grayscale (1 = enabled)
* format - output format: webp, jpeg, png, avif (default: webp)
*
* Examples:
* /img/products/az/store/brand/product/image.webp?w=200
* /img/products/az/store/brand/product/image.webp?w=600&h=400&fit=cover
* /img/products/az/store/brand/product/image.webp?w=100&blur=5&gray=1
*/
import { Router, Request, Response } from 'express';
import * as fs from 'fs/promises';
import * as path from 'path';
// @ts-ignore
const sharp = require('sharp');
const router = Router();
// Base path for images
function getImagesBasePath(): string {
if (process.env.IMAGES_PATH) {
return process.env.IMAGES_PATH;
}
if (process.env.STORAGE_BASE_PATH) {
return path.join(process.env.STORAGE_BASE_PATH, 'images');
}
return './storage/images';
}
const IMAGES_BASE_PATH = getImagesBasePath();
// Allowed fit modes
const ALLOWED_FITS = ['cover', 'contain', 'fill', 'inside', 'outside'] as const;
type FitMode = typeof ALLOWED_FITS[number];
// Allowed formats
const ALLOWED_FORMATS = ['webp', 'jpeg', 'jpg', 'png', 'avif'] as const;
type OutputFormat = typeof ALLOWED_FORMATS[number];
// Cache headers (1 year for immutable content-addressed images)
const CACHE_MAX_AGE = 31536000; // 1 year in seconds
interface TransformParams {
width?: number;
height?: number;
quality: number;
fit: FitMode;
blur?: number;
grayscale: boolean;
format: OutputFormat;
}
function parseTransformParams(query: any): TransformParams {
return {
width: query.w ? Math.min(Math.max(parseInt(query.w, 10), 1), 4000) : undefined,
height: query.h ? Math.min(Math.max(parseInt(query.h, 10), 1), 4000) : undefined,
quality: query.q ? Math.min(Math.max(parseInt(query.q, 10), 1), 100) : 80,
fit: ALLOWED_FITS.includes(query.fit) ? query.fit : 'inside',
blur: query.blur ? Math.min(Math.max(parseFloat(query.blur), 0.3), 1000) : undefined,
grayscale: query.gray === '1' || query.grayscale === '1',
format: ALLOWED_FORMATS.includes(query.format) ? query.format : 'webp',
};
}
function getContentType(format: OutputFormat): string {
switch (format) {
case 'jpeg':
case 'jpg':
return 'image/jpeg';
case 'png':
return 'image/png';
case 'avif':
return 'image/avif';
case 'webp':
default:
return 'image/webp';
}
}
/**
* Image proxy endpoint
* GET /img/*
*/
router.get('/*', async (req: Request, res: Response) => {
try {
// Get the image path from URL (everything after /img/)
const imagePath = req.params[0];
if (!imagePath) {
return res.status(400).json({ error: 'Image path required' });
}
// Security: prevent directory traversal
const normalizedPath = path.normalize(imagePath).replace(/^(\.\.(\/|\\|$))+/, '');
const basePath = path.resolve(IMAGES_BASE_PATH);
const fullPath = path.resolve(path.join(IMAGES_BASE_PATH, normalizedPath));
// Ensure path is within base directory
if (!fullPath.startsWith(basePath)) {
console.error(`[ImageProxy] Path traversal attempt: ${fullPath} not in ${basePath}`);
return res.status(403).json({ error: 'Access denied' });
}
// Check if file exists
try {
await fs.access(fullPath);
} catch {
return res.status(404).json({ error: 'Image not found' });
}
// Parse transform parameters
const params = parseTransformParams(req.query);
// Check if any transforms are requested
const hasTransforms = params.width || params.height || params.blur || params.grayscale;
// Read the original image
const imageBuffer = await fs.readFile(fullPath);
let outputBuffer: Buffer;
if (hasTransforms) {
// Apply transforms
let pipeline = sharp(imageBuffer);
// Resize
if (params.width || params.height) {
pipeline = pipeline.resize(params.width, params.height, {
fit: params.fit,
withoutEnlargement: true,
});
}
// Blur
if (params.blur) {
pipeline = pipeline.blur(params.blur);
}
// Grayscale
if (params.grayscale) {
pipeline = pipeline.grayscale();
}
// Output format
switch (params.format) {
case 'jpeg':
case 'jpg':
pipeline = pipeline.jpeg({ quality: params.quality });
break;
case 'png':
pipeline = pipeline.png({ quality: params.quality });
break;
case 'avif':
pipeline = pipeline.avif({ quality: params.quality });
break;
case 'webp':
default:
pipeline = pipeline.webp({ quality: params.quality });
}
outputBuffer = await pipeline.toBuffer();
} else {
// No transforms - serve original (but maybe convert format)
if (params.format !== 'webp' || params.quality !== 80) {
let pipeline = sharp(imageBuffer);
switch (params.format) {
case 'jpeg':
case 'jpg':
pipeline = pipeline.jpeg({ quality: params.quality });
break;
case 'png':
pipeline = pipeline.png({ quality: params.quality });
break;
case 'avif':
pipeline = pipeline.avif({ quality: params.quality });
break;
case 'webp':
default:
pipeline = pipeline.webp({ quality: params.quality });
}
outputBuffer = await pipeline.toBuffer();
} else {
outputBuffer = imageBuffer;
}
}
// Set headers
res.setHeader('Content-Type', getContentType(params.format));
res.setHeader('Cache-Control', `public, max-age=${CACHE_MAX_AGE}, immutable`);
res.setHeader('X-Image-Size', outputBuffer.length);
// Send image
res.send(outputBuffer);
} catch (error: any) {
console.error('[ImageProxy] Error:', error.message);
res.status(500).json({ error: 'Failed to process image' });
}
});
export default router;

View File

@@ -0,0 +1,253 @@
/**
* Intelligence API Routes
*
* Brand and pricing intelligence endpoints for the CannaiQ admin dashboard.
* Uses canonical store_products table for aggregated analytics.
*/
import { Router, Request, Response } from 'express';
import { authMiddleware } from '../auth/middleware';
import { pool } from '../db/pool';
const router = Router();
router.use(authMiddleware);
/**
* GET /api/admin/intelligence/brands
* List all brands with state presence, store counts, and pricing
*/
router.get('/brands', async (req: Request, res: Response) => {
try {
const { limit = '500', offset = '0' } = req.query;
const limitNum = Math.min(parseInt(limit as string, 10), 1000);
const offsetNum = parseInt(offset as string, 10);
const { rows } = await pool.query(`
SELECT
sp.brand_name_raw as brand_name,
array_agg(DISTINCT d.state) FILTER (WHERE d.state IS NOT NULL) as states,
COUNT(DISTINCT d.id) as store_count,
COUNT(DISTINCT sp.id) as sku_count,
ROUND(AVG(sp.price_rec) FILTER (WHERE sp.price_rec > 0)::numeric, 2) as avg_price_rec,
ROUND(AVG(sp.price_med) FILTER (WHERE sp.price_med > 0)::numeric, 2) as avg_price_med
FROM store_products sp
JOIN dispensaries d ON sp.dispensary_id = d.id
WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != ''
GROUP BY sp.brand_name_raw
ORDER BY store_count DESC, sku_count DESC
LIMIT $1 OFFSET $2
`, [limitNum, offsetNum]);
// Get total count
const { rows: countRows } = await pool.query(`
SELECT COUNT(DISTINCT brand_name_raw) as total
FROM store_products
WHERE brand_name_raw IS NOT NULL AND brand_name_raw != ''
`);
res.json({
brands: rows.map((r: any) => ({
brandName: r.brand_name,
states: r.states || [],
storeCount: parseInt(r.store_count, 10),
skuCount: parseInt(r.sku_count, 10),
avgPriceRec: r.avg_price_rec ? parseFloat(r.avg_price_rec) : null,
avgPriceMed: r.avg_price_med ? parseFloat(r.avg_price_med) : null,
})),
total: parseInt(countRows[0]?.total || '0', 10),
limit: limitNum,
offset: offsetNum,
});
} catch (error: any) {
console.error('[Intelligence] Error fetching brands:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/admin/intelligence/brands/:brandName/penetration
* Get state-by-state penetration for a specific brand
*/
router.get('/brands/:brandName/penetration', async (req: Request, res: Response) => {
try {
const { brandName } = req.params;
const { rows } = await pool.query(`
WITH state_totals AS (
SELECT
d.state,
s.name AS state_name,
COUNT(DISTINCT d.id) AS total_stores
FROM dispensaries d
JOIN states s ON d.state = s.code
WHERE d.state IS NOT NULL
GROUP BY d.state, s.name
),
brand_presence AS (
SELECT
d.state,
COUNT(DISTINCT d.id) AS stores_with_brand,
COUNT(DISTINCT sp.id) AS product_count,
ROUND(AVG(sp.price_rec)::numeric, 2) AS avg_price
FROM store_products sp
JOIN dispensaries d ON sp.dispensary_id = d.id
WHERE sp.brand_name_raw ILIKE $1
AND d.state IS NOT NULL
GROUP BY d.state
)
SELECT
st.state,
st.state_name AS "stateName",
st.total_stores AS "totalStores",
COALESCE(bp.stores_with_brand, 0) AS "storesWithBrand",
CASE
WHEN st.total_stores > 0
THEN ROUND((COALESCE(bp.stores_with_brand, 0)::numeric / st.total_stores) * 100, 2)
ELSE 0
END AS "penetrationPct",
COALESCE(bp.product_count, 0) AS "productCount",
bp.avg_price AS "avgPrice"
FROM state_totals st
LEFT JOIN brand_presence bp ON st.state = bp.state
WHERE COALESCE(bp.stores_with_brand, 0) > 0
ORDER BY COALESCE(bp.stores_with_brand, 0) DESC
`, [brandName]);
// Calculate national metrics
const { rows: nationalRows } = await pool.query(`
SELECT
COUNT(DISTINCT d.id) AS total_stores,
COUNT(DISTINCT CASE WHEN sp.brand_name_raw ILIKE $1 THEN d.id END) AS stores_with_brand,
AVG(sp.price_rec) FILTER (WHERE sp.brand_name_raw ILIKE $1) AS avg_price
FROM dispensaries d
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
WHERE d.state IS NOT NULL
`, [brandName]);
const national = nationalRows[0];
const nationalPenetration = national.total_stores > 0
? (national.stores_with_brand / national.total_stores) * 100
: 0;
res.json({
brandName,
states: rows,
nationalPenetration: Math.round(nationalPenetration * 100) / 100,
nationalAvgPrice: national.avg_price
? Math.round(parseFloat(national.avg_price) * 100) / 100
: null,
bestPerformingState: rows[0]?.state || null,
worstPerformingState: rows[rows.length - 1]?.state || null,
});
} catch (error: any) {
console.error('[Intelligence] Error fetching brand penetration:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/admin/intelligence/pricing
* Get pricing analytics by category
*/
router.get('/pricing', async (req: Request, res: Response) => {
try {
const { rows: categoryRows } = await pool.query(`
SELECT
sp.category_raw as category,
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
MIN(sp.price_rec) as min_price,
MAX(sp.price_rec) as max_price,
ROUND(PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sp.price_rec)::numeric, 2) as median_price,
COUNT(*) as product_count
FROM store_products sp
WHERE sp.category_raw IS NOT NULL AND sp.price_rec > 0
GROUP BY sp.category_raw
ORDER BY product_count DESC
`);
const { rows: stateRows } = await pool.query(`
SELECT
d.state,
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
MIN(sp.price_rec) as min_price,
MAX(sp.price_rec) as max_price,
COUNT(DISTINCT sp.id) as product_count
FROM store_products sp
JOIN dispensaries d ON sp.dispensary_id = d.id
WHERE d.state IS NOT NULL AND sp.price_rec > 0
GROUP BY d.state
ORDER BY avg_price DESC
`);
res.json({
byCategory: categoryRows.map((r: any) => ({
category: r.category,
avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
minPrice: r.min_price ? parseFloat(r.min_price) : null,
maxPrice: r.max_price ? parseFloat(r.max_price) : null,
medianPrice: r.median_price ? parseFloat(r.median_price) : null,
productCount: parseInt(r.product_count, 10),
})),
byState: stateRows.map((r: any) => ({
state: r.state,
avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
minPrice: r.min_price ? parseFloat(r.min_price) : null,
maxPrice: r.max_price ? parseFloat(r.max_price) : null,
productCount: parseInt(r.product_count, 10),
})),
});
} catch (error: any) {
console.error('[Intelligence] Error fetching pricing:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/admin/intelligence/stores
* Get store intelligence summary
*/
router.get('/stores', async (req: Request, res: Response) => {
try {
const { rows: storeRows } = await pool.query(`
SELECT
d.id,
d.name,
d.dba_name,
d.city,
d.state,
d.menu_type,
d.crawl_enabled,
COUNT(DISTINCT sp.id) as product_count,
COUNT(DISTINCT sp.brand_name_raw) as brand_count,
ROUND(AVG(sp.price_rec)::numeric, 2) as avg_price,
MAX(sp.updated_at) as last_product_update
FROM dispensaries d
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
WHERE d.state IS NOT NULL
GROUP BY d.id, d.name, d.dba_name, d.city, d.state, d.menu_type, d.crawl_enabled
ORDER BY product_count DESC
LIMIT 200
`);
res.json({
stores: storeRows.map((r: any) => ({
id: r.id,
name: r.name,
dbaName: r.dba_name,
city: r.city,
state: r.state,
menuType: r.menu_type,
crawlEnabled: r.crawl_enabled,
productCount: parseInt(r.product_count || '0', 10),
brandCount: parseInt(r.brand_count || '0', 10),
avgPrice: r.avg_price ? parseFloat(r.avg_price) : null,
lastProductUpdate: r.last_product_update,
})),
total: storeRows.length,
});
} catch (error: any) {
console.error('[Intelligence] Error fetching stores:', error.message);
res.status(500).json({ error: error.message });
}
});
export default router;

View File

@@ -0,0 +1,773 @@
/**
* Job Queue Management API Routes
*
* Endpoints for viewing and managing the crawl job queue:
* GET /api/job-queue - List all jobs (with filters)
* GET /api/job-queue/stats - Queue statistics
* GET /api/job-queue/:id - Get single job details
* PUT /api/job-queue/:id/priority - Update job priority
* POST /api/job-queue/:id/cancel - Cancel a pending job
* POST /api/job-queue/:id/retry - Retry a failed job
* POST /api/job-queue/bulk-priority - Bulk update priorities
* POST /api/job-queue/pause - Pause queue processing
* POST /api/job-queue/resume - Resume queue processing
*/
import { Router, Request, Response } from 'express';
import { pool } from '../db/pool';
const router = Router();
// In-memory queue state (would be in Redis in production)
let queuePaused = false;
/**
* GET /api/job-queue - List jobs with filters
*/
router.get('/', async (req: Request, res: Response) => {
try {
const {
status = 'pending',
limit = '50',
offset = '0',
job_type,
dispensary_id,
sort_by = 'priority',
sort_order = 'desc'
} = req.query;
let query = `
SELECT
j.id,
j.dispensary_id,
d.name as dispensary_name,
d.city,
d.state,
j.job_type,
j.trigger_type,
j.priority,
j.status,
j.scheduled_at,
j.started_at,
j.completed_at,
j.duration_ms,
j.products_found,
j.error_message,
j.retry_count,
j.max_retries,
j.worker_id,
j.locked_by,
j.created_at
FROM dispensary_crawl_jobs j
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
WHERE 1=1
`;
const params: any[] = [];
let paramIndex = 1;
if (status && status !== 'all') {
params.push(status);
query += ` AND j.status = $${paramIndex++}`;
}
if (job_type) {
params.push(job_type);
query += ` AND j.job_type = $${paramIndex++}`;
}
if (dispensary_id) {
params.push(dispensary_id);
query += ` AND j.dispensary_id = $${paramIndex++}`;
}
// Sorting
const validSortColumns = ['priority', 'created_at', 'scheduled_at', 'dispensary_name'];
const sortCol = validSortColumns.includes(sort_by as string) ? sort_by : 'priority';
const sortDir = sort_order === 'asc' ? 'ASC' : 'DESC';
if (sortCol === 'dispensary_name') {
query += ` ORDER BY d.name ${sortDir} NULLS LAST`;
} else {
query += ` ORDER BY j.${sortCol} ${sortDir} NULLS LAST`;
}
// Add secondary sort by created_at for consistent ordering
if (sortCol !== 'created_at') {
query += `, j.created_at ASC`;
}
params.push(parseInt(limit as string));
query += ` LIMIT $${paramIndex++}`;
params.push(parseInt(offset as string));
query += ` OFFSET $${paramIndex++}`;
const { rows } = await pool.query(query, params);
// Get total count for pagination
let countQuery = `
SELECT COUNT(*) as total
FROM dispensary_crawl_jobs j
WHERE 1=1
`;
const countParams: any[] = [];
let countParamIndex = 1;
if (status && status !== 'all') {
countParams.push(status);
countQuery += ` AND j.status = $${countParamIndex++}`;
}
if (job_type) {
countParams.push(job_type);
countQuery += ` AND j.job_type = $${countParamIndex++}`;
}
if (dispensary_id) {
countParams.push(dispensary_id);
countQuery += ` AND j.dispensary_id = $${countParamIndex++}`;
}
const countResult = await pool.query(countQuery, countParams);
const total = parseInt(countResult.rows[0].total);
res.json({
success: true,
jobs: rows,
total,
limit: parseInt(limit as string),
offset: parseInt(offset as string),
queue_paused: queuePaused
});
} catch (error: any) {
console.error('[JobQueue] Error listing jobs:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/job-queue/available - List dispensaries available for crawling
* Query: { state_code?: string, limit?: number }
* NOTE: Must be defined BEFORE /:id route to avoid conflict
*/
router.get('/available', async (req: Request, res: Response) => {
try {
const { state_code, limit = '100' } = req.query;
let query = `
SELECT
d.id,
d.name,
d.city,
s.code as state_code,
d.platform_dispensary_id,
d.crawl_enabled,
(SELECT MAX(created_at) FROM dispensary_crawl_jobs WHERE dispensary_id = d.id AND status = 'completed') as last_crawl,
EXISTS (
SELECT 1 FROM dispensary_crawl_jobs
WHERE dispensary_id = d.id AND status IN ('pending', 'running')
) as has_pending_job
FROM dispensaries d
LEFT JOIN states s ON s.id = d.state_id
WHERE d.crawl_enabled = true
AND d.platform_dispensary_id IS NOT NULL
`;
const params: any[] = [];
let paramIndex = 1;
if (state_code) {
params.push((state_code as string).toUpperCase());
query += ` AND s.code = $${paramIndex++}`;
}
query += ` ORDER BY d.name LIMIT $${paramIndex}`;
params.push(parseInt(limit as string));
const { rows } = await pool.query(query, params);
// Get counts by state
const { rows: stateCounts } = await pool.query(`
SELECT s.code, COUNT(*) as count
FROM dispensaries d
JOIN states s ON s.id = d.state_id
WHERE d.crawl_enabled = true
AND d.platform_dispensary_id IS NOT NULL
GROUP BY s.code
ORDER BY count DESC
`);
res.json({
success: true,
dispensaries: rows,
total: rows.length,
by_state: stateCounts
});
} catch (error: any) {
console.error('[JobQueue] Error listing available:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/job-queue/history - Get recent job history with results
* Query: { state_code?: string, status?: string, limit?: number, hours?: number }
* NOTE: Must be defined BEFORE /:id route to avoid conflict
*/
router.get('/history', async (req: Request, res: Response) => {
try {
const {
state_code,
status,
limit = '50',
hours = '24'
} = req.query;
let query = `
SELECT
j.id,
j.dispensary_id,
d.name as dispensary_name,
s.code as state_code,
j.job_type,
j.status,
j.products_found,
j.error_message,
j.started_at,
j.completed_at,
j.duration_ms,
j.created_at
FROM dispensary_crawl_jobs j
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
LEFT JOIN states s ON s.id = d.state_id
WHERE j.created_at > NOW() - INTERVAL '${parseInt(hours as string)} hours'
`;
const params: any[] = [];
let paramIndex = 1;
if (status && status !== 'all') {
params.push(status);
query += ` AND j.status = $${paramIndex++}`;
}
if (state_code) {
params.push((state_code as string).toUpperCase());
query += ` AND s.code = $${paramIndex++}`;
}
query += ` ORDER BY j.created_at DESC LIMIT $${paramIndex}`;
params.push(parseInt(limit as string));
const { rows } = await pool.query(query, params);
// Get summary stats
const { rows: stats } = await pool.query(`
SELECT
COUNT(*) FILTER (WHERE status = 'completed') as completed,
COUNT(*) FILTER (WHERE status = 'failed') as failed,
COUNT(*) FILTER (WHERE status = 'running') as running,
COUNT(*) FILTER (WHERE status = 'pending') as pending,
SUM(products_found) FILTER (WHERE status = 'completed') as total_products,
AVG(duration_ms) FILTER (WHERE status = 'completed') as avg_duration_ms
FROM dispensary_crawl_jobs
WHERE created_at > NOW() - INTERVAL '${parseInt(hours as string)} hours'
`);
res.json({
success: true,
jobs: rows,
summary: {
completed: parseInt(stats[0].completed) || 0,
failed: parseInt(stats[0].failed) || 0,
running: parseInt(stats[0].running) || 0,
pending: parseInt(stats[0].pending) || 0,
total_products: parseInt(stats[0].total_products) || 0,
avg_duration_ms: Math.round(parseFloat(stats[0].avg_duration_ms)) || null
},
hours: parseInt(hours as string)
});
} catch (error: any) {
console.error('[JobQueue] Error getting history:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/job-queue/stats - Queue statistics
*/
router.get('/stats', async (_req: Request, res: Response) => {
try {
const { rows } = await pool.query(`
SELECT
COUNT(*) FILTER (WHERE status = 'pending') as pending_count,
COUNT(*) FILTER (WHERE status = 'running') as running_count,
COUNT(*) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as completed_24h,
COUNT(*) FILTER (WHERE status = 'failed' AND completed_at > NOW() - INTERVAL '24 hours') as failed_24h,
COUNT(*) FILTER (WHERE status = 'cancelled') as cancelled_count,
AVG(duration_ms) FILTER (WHERE status = 'completed' AND completed_at > NOW() - INTERVAL '24 hours') as avg_duration_ms,
MAX(priority) FILTER (WHERE status = 'pending') as max_priority,
MIN(created_at) FILTER (WHERE status = 'pending') as oldest_pending
FROM dispensary_crawl_jobs
`);
const stats = rows[0];
// Get jobs by type
const { rows: byType } = await pool.query(`
SELECT job_type, COUNT(*) as count
FROM dispensary_crawl_jobs
WHERE status = 'pending'
GROUP BY job_type
ORDER BY count DESC
`);
// Get top priority jobs
const { rows: topPriority } = await pool.query(`
SELECT
j.id,
j.dispensary_id,
d.name as dispensary_name,
j.job_type,
j.priority,
j.created_at
FROM dispensary_crawl_jobs j
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
WHERE j.status = 'pending'
ORDER BY j.priority DESC, j.created_at ASC
LIMIT 5
`);
// Estimate wait time based on avg processing rate
const pendingCount = parseInt(stats.pending_count) || 0;
const avgDuration = parseFloat(stats.avg_duration_ms) || 30000; // default 30s
const runningCount = parseInt(stats.running_count) || 1;
const estimatedWaitMs = (pendingCount * avgDuration) / Math.max(runningCount, 1);
res.json({
success: true,
stats: {
pending: parseInt(stats.pending_count) || 0,
running: parseInt(stats.running_count) || 0,
completed_24h: parseInt(stats.completed_24h) || 0,
failed_24h: parseInt(stats.failed_24h) || 0,
cancelled: parseInt(stats.cancelled_count) || 0,
avg_duration_ms: Math.round(parseFloat(stats.avg_duration_ms)) || null,
max_priority: parseInt(stats.max_priority) || 0,
oldest_pending: stats.oldest_pending,
estimated_wait_ms: Math.round(estimatedWaitMs),
queue_paused: queuePaused
},
by_type: byType,
top_priority: topPriority
});
} catch (error: any) {
console.error('[JobQueue] Error getting stats:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/job-queue/:id - Get single job
*/
router.get('/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { rows } = await pool.query(`
SELECT
j.*,
d.name as dispensary_name,
d.city,
d.state,
d.menu_url
FROM dispensary_crawl_jobs j
LEFT JOIN dispensaries d ON d.id = j.dispensary_id
WHERE j.id = $1
`, [id]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Job not found' });
}
res.json({ success: true, job: rows[0] });
} catch (error: any) {
console.error('[JobQueue] Error getting job:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* PUT /api/job-queue/:id/priority - Update job priority
*/
router.put('/:id/priority', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { priority } = req.body;
if (typeof priority !== 'number' || priority < 0 || priority > 100) {
return res.status(400).json({
success: false,
error: 'Priority must be a number between 0 and 100'
});
}
const { rows } = await pool.query(`
UPDATE dispensary_crawl_jobs
SET priority = $1, updated_at = NOW()
WHERE id = $2 AND status = 'pending'
RETURNING id, priority, status
`, [priority, id]);
if (rows.length === 0) {
return res.status(404).json({
success: false,
error: 'Job not found or not in pending status'
});
}
res.json({ success: true, job: rows[0] });
} catch (error: any) {
console.error('[JobQueue] Error updating priority:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/:id/cancel - Cancel a pending job
*/
router.post('/:id/cancel', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { rows } = await pool.query(`
UPDATE dispensary_crawl_jobs
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
WHERE id = $1 AND status = 'pending'
RETURNING id, status
`, [id]);
if (rows.length === 0) {
return res.status(404).json({
success: false,
error: 'Job not found or not in pending status'
});
}
res.json({ success: true, job: rows[0], message: 'Job cancelled' });
} catch (error: any) {
console.error('[JobQueue] Error cancelling job:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/:id/retry - Retry a failed job
*/
router.post('/:id/retry', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { priority } = req.body;
const { rows } = await pool.query(`
UPDATE dispensary_crawl_jobs
SET
status = 'pending',
priority = COALESCE($2, priority),
error_message = NULL,
started_at = NULL,
completed_at = NULL,
duration_ms = NULL,
worker_id = NULL,
locked_by = NULL,
locked_at = NULL,
retry_count = retry_count + 1,
updated_at = NOW()
WHERE id = $1 AND status IN ('failed', 'cancelled')
RETURNING id, status, priority, retry_count
`, [id, priority]);
if (rows.length === 0) {
return res.status(404).json({
success: false,
error: 'Job not found or not in failed/cancelled status'
});
}
res.json({ success: true, job: rows[0], message: 'Job queued for retry' });
} catch (error: any) {
console.error('[JobQueue] Error retrying job:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/bulk-priority - Bulk update priorities
*/
router.post('/bulk-priority', async (req: Request, res: Response) => {
try {
const { jobs } = req.body; // Array of { id, priority }
if (!Array.isArray(jobs) || jobs.length === 0) {
return res.status(400).json({
success: false,
error: 'jobs array is required'
});
}
const client = await pool.connect();
try {
await client.query('BEGIN');
let updated = 0;
for (const job of jobs) {
if (typeof job.id === 'number' && typeof job.priority === 'number') {
const result = await client.query(`
UPDATE dispensary_crawl_jobs
SET priority = $1, updated_at = NOW()
WHERE id = $2 AND status = 'pending'
`, [job.priority, job.id]);
updated += result.rowCount || 0;
}
}
await client.query('COMMIT');
res.json({ success: true, updated, message: `Updated ${updated} jobs` });
} catch (err) {
await client.query('ROLLBACK');
throw err;
} finally {
client.release();
}
} catch (error: any) {
console.error('[JobQueue] Error bulk updating priorities:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/enqueue - Add a new job to the queue
*/
router.post('/enqueue', async (req: Request, res: Response) => {
try {
const { dispensary_id, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
if (!dispensary_id) {
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
}
// Check if job already pending for this dispensary
const existing = await pool.query(`
SELECT id FROM dispensary_crawl_jobs
WHERE dispensary_id = $1 AND job_type = $2 AND status = 'pending'
`, [dispensary_id, job_type]);
if (existing.rows.length > 0) {
// Update priority if higher
await pool.query(`
UPDATE dispensary_crawl_jobs
SET priority = GREATEST(priority, $1), updated_at = NOW()
WHERE id = $2
`, [priority, existing.rows[0].id]);
return res.json({
success: true,
job_id: existing.rows[0].id,
message: 'Job already queued, priority updated'
});
}
const { rows } = await pool.query(`
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type)
VALUES ($1, $2, $3, 'manual')
RETURNING id
`, [dispensary_id, job_type, priority]);
res.json({ success: true, job_id: rows[0].id, message: 'Job enqueued' });
} catch (error: any) {
console.error('[JobQueue] Error enqueuing job:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/pause - Pause queue processing
*/
router.post('/pause', async (_req: Request, res: Response) => {
queuePaused = true;
res.json({ success: true, queue_paused: true, message: 'Queue paused' });
});
/**
* POST /api/job-queue/resume - Resume queue processing
*/
router.post('/resume', async (_req: Request, res: Response) => {
queuePaused = false;
res.json({ success: true, queue_paused: false, message: 'Queue resumed' });
});
/**
* GET /api/job-queue/paused - Check if queue is paused
*/
router.get('/paused', async (_req: Request, res: Response) => {
res.json({ success: true, queue_paused: queuePaused });
});
/**
* POST /api/job-queue/enqueue-batch - Queue multiple dispensaries at once
* Body: { dispensary_ids: number[], job_type?: string, priority?: number }
*/
router.post('/enqueue-batch', async (req: Request, res: Response) => {
try {
const { dispensary_ids, job_type = 'dutchie_product_crawl', priority = 0 } = req.body;
if (!Array.isArray(dispensary_ids) || dispensary_ids.length === 0) {
return res.status(400).json({ success: false, error: 'dispensary_ids array is required' });
}
if (dispensary_ids.length > 500) {
return res.status(400).json({ success: false, error: 'Maximum 500 dispensaries per batch' });
}
// Insert jobs, skipping duplicates
const { rows } = await pool.query(`
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type, status, created_at)
SELECT
d.id,
$2::text,
$3::integer,
'api_batch',
'pending',
NOW()
FROM dispensaries d
WHERE d.id = ANY($1::int[])
AND d.crawl_enabled = true
AND d.platform_dispensary_id IS NOT NULL
AND NOT EXISTS (
SELECT 1 FROM dispensary_crawl_jobs cj
WHERE cj.dispensary_id = d.id
AND cj.job_type = $2::text
AND cj.status IN ('pending', 'running')
)
RETURNING id, dispensary_id
`, [dispensary_ids, job_type, priority]);
res.json({
success: true,
queued: rows.length,
requested: dispensary_ids.length,
job_ids: rows.map(r => r.id),
message: `Queued ${rows.length} of ${dispensary_ids.length} dispensaries`
});
} catch (error: any) {
console.error('[JobQueue] Error batch enqueuing:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/enqueue-state - Queue all crawl-enabled dispensaries for a state
* Body: { state_code: string, job_type?: string, priority?: number, limit?: number }
*/
router.post('/enqueue-state', async (req: Request, res: Response) => {
try {
const { state_code, job_type = 'dutchie_product_crawl', priority = 0, limit = 200 } = req.body;
if (!state_code) {
return res.status(400).json({ success: false, error: 'state_code is required (e.g., "AZ")' });
}
// Get state_id and queue jobs
const { rows } = await pool.query(`
WITH target_state AS (
SELECT id FROM states WHERE code = $1
)
INSERT INTO dispensary_crawl_jobs (dispensary_id, job_type, priority, trigger_type, status, created_at)
SELECT
d.id,
$2::text,
$3::integer,
'api_state',
'pending',
NOW()
FROM dispensaries d, target_state
WHERE d.state_id = target_state.id
AND d.crawl_enabled = true
AND d.platform_dispensary_id IS NOT NULL
AND NOT EXISTS (
SELECT 1 FROM dispensary_crawl_jobs cj
WHERE cj.dispensary_id = d.id
AND cj.job_type = $2::text
AND cj.status IN ('pending', 'running')
)
LIMIT $4::integer
RETURNING id, dispensary_id
`, [state_code.toUpperCase(), job_type, priority, limit]);
// Get total available count
const countResult = await pool.query(`
WITH target_state AS (
SELECT id FROM states WHERE code = $1
)
SELECT COUNT(*) as total
FROM dispensaries d, target_state
WHERE d.state_id = target_state.id
AND d.crawl_enabled = true
AND d.platform_dispensary_id IS NOT NULL
`, [state_code.toUpperCase()]);
res.json({
success: true,
queued: rows.length,
total_available: parseInt(countResult.rows[0].total),
state: state_code.toUpperCase(),
job_type,
message: `Queued ${rows.length} dispensaries for ${state_code.toUpperCase()}`
});
} catch (error: any) {
console.error('[JobQueue] Error enqueuing state:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/job-queue/clear-pending - Clear all pending jobs (optionally filtered)
* Body: { state_code?: string, job_type?: string }
*/
router.post('/clear-pending', async (req: Request, res: Response) => {
try {
const { state_code, job_type } = req.body;
let query = `
UPDATE dispensary_crawl_jobs
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
WHERE status = 'pending'
`;
const params: any[] = [];
let paramIndex = 1;
if (job_type) {
params.push(job_type);
query += ` AND job_type = $${paramIndex++}`;
}
if (state_code) {
params.push((state_code as string).toUpperCase());
query += ` AND dispensary_id IN (
SELECT d.id FROM dispensaries d
JOIN states s ON s.id = d.state_id
WHERE s.code = $${paramIndex++}
)`;
}
const result = await pool.query(query, params);
res.json({
success: true,
cleared: result.rowCount,
message: `Cancelled ${result.rowCount} pending jobs`
});
} catch (error: any) {
console.error('[JobQueue] Error clearing pending:', error);
res.status(500).json({ success: false, error: error.message });
}
});
export default router;
export { queuePaused };

View File

@@ -0,0 +1,667 @@
/**
* Markets API Routes
*
* Provider-agnostic store and product endpoints for the CannaiQ admin dashboard.
* Queries the dispensaries and dutchie_products tables directly.
*/
import { Router, Request, Response } from 'express';
import { authMiddleware } from '../auth/middleware';
import { pool } from '../db/pool';
const router = Router();
router.use(authMiddleware);
/**
* GET /api/markets/dashboard
* Dashboard summary with counts for dispensaries, products, brands, etc.
*/
router.get('/dashboard', async (req: Request, res: Response) => {
try {
// Get dispensary count
const { rows: dispRows } = await pool.query(
`SELECT COUNT(*) as count FROM dispensaries`
);
// Get product count from store_products (canonical) or fallback to dutchie_products
const { rows: productRows } = await pool.query(`
SELECT COUNT(*) as count FROM store_products
`);
// Get brand count
const { rows: brandRows } = await pool.query(`
SELECT COUNT(DISTINCT brand_name_raw) as count
FROM store_products
WHERE brand_name_raw IS NOT NULL
`);
// Get category count
const { rows: categoryRows } = await pool.query(`
SELECT COUNT(DISTINCT category_raw) as count
FROM store_products
WHERE category_raw IS NOT NULL
`);
// Get snapshot count in last 24 hours
const { rows: snapshotRows } = await pool.query(`
SELECT COUNT(*) as count
FROM store_product_snapshots
WHERE captured_at >= NOW() - INTERVAL '24 hours'
`);
// Get last crawl time
const { rows: lastCrawlRows } = await pool.query(`
SELECT MAX(completed_at) as last_crawl
FROM crawl_orchestration_traces
WHERE success = true
`);
// Get failed job count (jobs in last 24h that failed)
const { rows: failedRows } = await pool.query(`
SELECT COUNT(*) as count
FROM crawl_orchestration_traces
WHERE success = false
AND started_at >= NOW() - INTERVAL '24 hours'
`);
res.json({
dispensaryCount: parseInt(dispRows[0]?.count || '0', 10),
productCount: parseInt(productRows[0]?.count || '0', 10),
brandCount: parseInt(brandRows[0]?.count || '0', 10),
categoryCount: parseInt(categoryRows[0]?.count || '0', 10),
snapshotCount24h: parseInt(snapshotRows[0]?.count || '0', 10),
lastCrawlTime: lastCrawlRows[0]?.last_crawl || null,
failedJobCount: parseInt(failedRows[0]?.count || '0', 10),
});
} catch (error: any) {
console.error('[Markets] Error fetching dashboard:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/markets/stores
* List all stores from the dispensaries table
*/
router.get('/stores', async (req: Request, res: Response) => {
try {
const { city, hasPlatformId, limit = '100', offset = '0' } = req.query;
let whereClause = 'WHERE 1=1';
const params: any[] = [];
let paramIndex = 1;
if (city) {
whereClause += ` AND d.city ILIKE $${paramIndex}`;
params.push(`%${city}%`);
paramIndex++;
}
if (hasPlatformId === 'true') {
whereClause += ` AND d.platform_dispensary_id IS NOT NULL`;
} else if (hasPlatformId === 'false') {
whereClause += ` AND d.platform_dispensary_id IS NULL`;
}
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
const { rows } = await pool.query(`
SELECT
d.id,
d.name,
d.dba_name,
d.city,
d.state,
d.address1 as address,
d.zipcode as zip,
d.phone,
d.website,
d.menu_url,
d.menu_type,
d.platform_dispensary_id,
d.crawl_enabled,
d.dutchie_verified,
d.last_crawl_at,
d.product_count,
d.created_at,
d.updated_at
FROM dispensaries d
${whereClause}
ORDER BY d.name
LIMIT $${paramIndex} OFFSET $${paramIndex + 1}
`, params);
// Get total count
const { rows: countRows } = await pool.query(
`SELECT COUNT(*) as total FROM dispensaries d ${whereClause}`,
params.slice(0, -2)
);
res.json({
stores: rows,
total: parseInt(countRows[0]?.total || '0', 10),
});
} catch (error: any) {
console.error('[Markets] Error fetching stores:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/markets/stores/:id
* Get a single store by ID
*/
router.get('/stores/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { rows } = await pool.query(`
SELECT
d.id,
d.name,
d.dba_name,
d.city,
d.state,
d.address1 as address,
d.zipcode as zip,
d.phone,
d.website,
d.menu_url,
d.menu_type,
d.platform_dispensary_id,
d.crawl_enabled,
d.dutchie_verified,
d.last_crawl_at,
d.product_count,
d.created_at,
d.updated_at
FROM dispensaries d
WHERE d.id = $1
`, [parseInt(id, 10)]);
if (rows.length === 0) {
return res.status(404).json({ error: 'Store not found' });
}
res.json(rows[0]);
} catch (error: any) {
console.error('[Markets] Error fetching store:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/markets/stores/:id/summary
* Get store summary with aggregated metrics, brands, and categories
*/
router.get('/stores/:id/summary', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const dispensaryId = parseInt(id, 10);
// Get dispensary info
const { rows: dispRows } = await pool.query(`
SELECT
d.id,
d.name,
d.dba_name,
d.c_name as company_name,
d.city,
d.state,
d.address1 as address,
d.zipcode as zip,
d.phone,
d.website,
d.menu_url,
d.menu_type,
d.platform_dispensary_id,
d.crawl_enabled,
d.last_crawl_at
FROM dispensaries d
WHERE d.id = $1
`, [dispensaryId]);
if (dispRows.length === 0) {
return res.status(404).json({ error: 'Store not found' });
}
const dispensary = dispRows[0];
// Get product counts using canonical store_products table
const { rows: countRows } = await pool.query(`
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE stock_status = 'in_stock') as in_stock,
COUNT(*) FILTER (WHERE stock_status = 'out_of_stock') as out_of_stock,
COUNT(*) FILTER (WHERE stock_status NOT IN ('in_stock', 'out_of_stock') OR stock_status IS NULL) as unknown,
COUNT(*) FILTER (WHERE stock_status = 'missing_from_feed') as missing_from_feed
FROM store_products
WHERE dispensary_id = $1
`, [dispensaryId]);
const counts = countRows[0] || {};
// Get brands using canonical table
const { rows: brandRows } = await pool.query(`
SELECT brand_name_raw as brand_name, COUNT(*) as product_count
FROM store_products
WHERE dispensary_id = $1 AND brand_name_raw IS NOT NULL
GROUP BY brand_name_raw
ORDER BY product_count DESC, brand_name_raw
`, [dispensaryId]);
// Get categories using canonical table
const { rows: categoryRows } = await pool.query(`
SELECT category_raw as type, subcategory_raw as subcategory, COUNT(*) as product_count
FROM store_products
WHERE dispensary_id = $1
GROUP BY category_raw, subcategory_raw
ORDER BY product_count DESC
`, [dispensaryId]);
// Get last crawl info from job_run_logs or crawl_orchestration_traces
const { rows: crawlRows } = await pool.query(`
SELECT
completed_at,
CASE WHEN success THEN 'completed' ELSE 'failed' END as status,
error_message
FROM crawl_orchestration_traces
WHERE dispensary_id = $1
ORDER BY completed_at DESC
LIMIT 1
`, [dispensaryId]);
const lastCrawl = crawlRows.length > 0 ? crawlRows[0] : null;
res.json({
dispensary,
totalProducts: parseInt(counts.total || '0', 10),
inStockCount: parseInt(counts.in_stock || '0', 10),
outOfStockCount: parseInt(counts.out_of_stock || '0', 10),
unknownStockCount: parseInt(counts.unknown || '0', 10),
missingFromFeedCount: parseInt(counts.missing_from_feed || '0', 10),
brands: brandRows,
brandCount: brandRows.length,
categories: categoryRows,
categoryCount: categoryRows.length,
lastCrawl,
});
} catch (error: any) {
console.error('[Markets] Error fetching store summary:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/markets/stores/:id/products
* Get products for a store with filtering and pagination
*/
router.get('/stores/:id/products', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const {
stockStatus,
type,
subcategory,
brandName,
search,
limit = '25',
offset = '0'
} = req.query;
const dispensaryId = parseInt(id, 10);
let whereClause = 'WHERE sp.dispensary_id = $1';
const params: any[] = [dispensaryId];
let paramIndex = 2;
if (stockStatus) {
whereClause += ` AND sp.stock_status = $${paramIndex}`;
params.push(stockStatus);
paramIndex++;
}
if (type) {
whereClause += ` AND sp.category_raw = $${paramIndex}`;
params.push(type);
paramIndex++;
}
if (subcategory) {
whereClause += ` AND sp.subcategory_raw = $${paramIndex}`;
params.push(subcategory);
paramIndex++;
}
if (brandName) {
whereClause += ` AND sp.brand_name_raw ILIKE $${paramIndex}`;
params.push(`%${brandName}%`);
paramIndex++;
}
if (search) {
whereClause += ` AND (sp.name_raw ILIKE $${paramIndex} OR sp.brand_name_raw ILIKE $${paramIndex})`;
params.push(`%${search}%`);
paramIndex++;
}
const limitNum = Math.min(parseInt(limit as string, 10), 100);
const offsetNum = parseInt(offset as string, 10);
params.push(limitNum, offsetNum);
// Get products with latest snapshot data using canonical tables
const { rows } = await pool.query(`
SELECT
sp.id,
sp.external_product_id as external_id,
sp.name_raw as name,
sp.brand_name_raw as brand,
sp.category_raw as type,
sp.subcategory_raw as subcategory,
sp.strain_type,
sp.stock_status,
sp.stock_status = 'in_stock' as in_stock,
sp.stock_status != 'missing_from_feed' as is_present_in_feed,
sp.stock_status = 'missing_from_feed' as missing_from_feed,
sp.thc_percent as thc_percentage,
sp.cbd_percent as cbd_percentage,
sp.primary_image_url as image_url,
sp.description,
sp.total_quantity_available as total_quantity,
sp.first_seen_at,
sp.last_seen_at,
sp.updated_at,
(
SELECT jsonb_build_object(
'regular_price', COALESCE(sps.price_rec, 0)::numeric,
'sale_price', CASE WHEN sps.price_rec_special > 0
THEN sps.price_rec_special::numeric
ELSE NULL END,
'med_price', COALESCE(sps.price_med, 0)::numeric,
'med_sale_price', CASE WHEN sps.price_med_special > 0
THEN sps.price_med_special::numeric
ELSE NULL END,
'snapshot_at', sps.captured_at
)
FROM store_product_snapshots sps
WHERE sps.store_product_id = sp.id
ORDER BY sps.captured_at DESC
LIMIT 1
) as pricing
FROM store_products sp
${whereClause}
ORDER BY sp.name_raw
LIMIT $${paramIndex} OFFSET $${paramIndex + 1}
`, params);
// Flatten pricing into the product object
const products = rows.map((row: any) => {
const pricing = row.pricing || {};
return {
...row,
regular_price: pricing.regular_price || null,
sale_price: pricing.sale_price || null,
med_price: pricing.med_price || null,
med_sale_price: pricing.med_sale_price || null,
snapshot_at: pricing.snapshot_at || null,
pricing: undefined, // Remove the nested object
};
});
// Get total count
const { rows: countRows } = await pool.query(
`SELECT COUNT(*) as total FROM store_products sp ${whereClause}`,
params.slice(0, -2)
);
res.json({
products,
total: parseInt(countRows[0]?.total || '0', 10),
limit: limitNum,
offset: offsetNum,
});
} catch (error: any) {
console.error('[Markets] Error fetching store products:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/markets/stores/:id/brands
* Get brands for a store
*/
router.get('/stores/:id/brands', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const dispensaryId = parseInt(id, 10);
const { rows } = await pool.query(`
SELECT brand_name_raw as brand, COUNT(*) as product_count
FROM store_products
WHERE dispensary_id = $1 AND brand_name_raw IS NOT NULL
GROUP BY brand_name_raw
ORDER BY product_count DESC, brand_name_raw
`, [dispensaryId]);
res.json({ brands: rows });
} catch (error: any) {
console.error('[Markets] Error fetching store brands:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/markets/stores/:id/categories
* Get categories for a store
*/
router.get('/stores/:id/categories', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const dispensaryId = parseInt(id, 10);
const { rows } = await pool.query(`
SELECT category_raw as type, subcategory_raw as subcategory, COUNT(*) as product_count
FROM store_products
WHERE dispensary_id = $1
GROUP BY category_raw, subcategory_raw
ORDER BY product_count DESC
`, [dispensaryId]);
res.json({ categories: rows });
} catch (error: any) {
console.error('[Markets] Error fetching store categories:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/markets/stores/:id/crawl
* Trigger a crawl for a store (alias for existing crawl endpoint)
*/
router.post('/stores/:id/crawl', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const dispensaryId = parseInt(id, 10);
// Verify store exists and has platform_dispensary_id
const { rows } = await pool.query(`
SELECT id, name, platform_dispensary_id, menu_type
FROM dispensaries
WHERE id = $1
`, [dispensaryId]);
if (rows.length === 0) {
return res.status(404).json({ error: 'Store not found' });
}
const store = rows[0];
if (!store.platform_dispensary_id) {
return res.status(400).json({
error: 'Store does not have a platform ID resolved. Cannot crawl.',
store: { id: store.id, name: store.name, menu_type: store.menu_type }
});
}
// Insert a job into the crawl queue
await pool.query(`
INSERT INTO crawl_jobs (dispensary_id, job_type, status, created_at)
VALUES ($1, 'dutchie_product_crawl', 'pending', NOW())
`, [dispensaryId]);
res.json({
success: true,
message: `Crawl queued for ${store.name}`,
store: { id: store.id, name: store.name }
});
} catch (error: any) {
console.error('[Markets] Error triggering crawl:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/markets/brands
* List all brands with product counts and store presence
*/
router.get('/brands', async (req: Request, res: Response) => {
try {
const { search, limit = '100', offset = '0', sortBy = 'products' } = req.query;
const limitNum = Math.min(parseInt(limit as string, 10), 500);
const offsetNum = parseInt(offset as string, 10);
let whereClause = 'WHERE brand_name_raw IS NOT NULL AND brand_name_raw != \'\'';
const params: any[] = [];
let paramIndex = 1;
if (search) {
whereClause += ` AND brand_name_raw ILIKE $${paramIndex}`;
params.push(`%${search}%`);
paramIndex++;
}
// Determine sort column
let orderBy = 'product_count DESC';
if (sortBy === 'stores') {
orderBy = 'store_count DESC';
} else if (sortBy === 'name') {
orderBy = 'brand_name ASC';
}
params.push(limitNum, offsetNum);
const { rows } = await pool.query(`
SELECT
brand_name_raw as brand_name,
COUNT(*) as product_count,
COUNT(DISTINCT dispensary_id) as store_count,
AVG(price_rec) FILTER (WHERE price_rec > 0) as avg_price,
array_agg(DISTINCT category_raw) FILTER (WHERE category_raw IS NOT NULL) as categories,
MIN(first_seen_at) as first_seen_at,
MAX(last_seen_at) as last_seen_at
FROM store_products
${whereClause}
GROUP BY brand_name_raw
ORDER BY ${orderBy}
LIMIT $${paramIndex} OFFSET $${paramIndex + 1}
`, params);
// Get total count
const { rows: countRows } = await pool.query(`
SELECT COUNT(DISTINCT brand_name_raw) as total
FROM store_products
${whereClause}
`, params.slice(0, -2));
// Calculate summary stats
const { rows: summaryRows } = await pool.query(`
SELECT
COUNT(DISTINCT brand_name_raw) as total_brands,
AVG(product_count) as avg_products_per_brand
FROM (
SELECT brand_name_raw, COUNT(*) as product_count
FROM store_products
WHERE brand_name_raw IS NOT NULL AND brand_name_raw != ''
GROUP BY brand_name_raw
) brand_counts
`);
res.json({
brands: rows.map((r: any, idx: number) => ({
id: idx + 1 + offsetNum,
name: r.brand_name,
normalized_name: null,
product_count: parseInt(r.product_count, 10),
store_count: parseInt(r.store_count, 10),
avg_price: r.avg_price ? parseFloat(r.avg_price) : null,
categories: r.categories || [],
is_portfolio: false,
first_seen_at: r.first_seen_at,
last_seen_at: r.last_seen_at,
})),
total: parseInt(countRows[0]?.total || '0', 10),
summary: {
total_brands: parseInt(summaryRows[0]?.total_brands || '0', 10),
portfolio_brands: 0,
avg_products_per_brand: Math.round(parseFloat(summaryRows[0]?.avg_products_per_brand || '0')),
top_categories: [],
},
limit: limitNum,
offset: offsetNum,
});
} catch (error: any) {
console.error('[Markets] Error fetching brands:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/markets/categories
* List all categories with product counts
*/
router.get('/categories', async (req: Request, res: Response) => {
try {
const { search, limit = '100' } = req.query;
const limitNum = Math.min(parseInt(limit as string, 10), 500);
let whereClause = 'WHERE category_raw IS NOT NULL AND category_raw != \'\'';
const params: any[] = [];
let paramIndex = 1;
if (search) {
whereClause += ` AND category_raw ILIKE $${paramIndex}`;
params.push(`%${search}%`);
paramIndex++;
}
params.push(limitNum);
const { rows } = await pool.query(`
SELECT
category_raw as name,
COUNT(*) as product_count,
COUNT(DISTINCT dispensary_id) as store_count,
AVG(price_rec) FILTER (WHERE price_rec > 0) as avg_price
FROM store_products
${whereClause}
GROUP BY category_raw
ORDER BY product_count DESC
LIMIT $${paramIndex}
`, params);
res.json({
categories: rows.map((r: any, idx: number) => ({
id: idx + 1,
name: r.name,
product_count: parseInt(r.product_count, 10),
store_count: parseInt(r.store_count, 10),
avg_price: r.avg_price ? parseFloat(r.avg_price) : null,
})),
total: rows.length,
});
} catch (error: any) {
console.error('[Markets] Error fetching categories:', error.message);
res.status(500).json({ error: error.message });
}
});
export default router;

View File

@@ -24,37 +24,22 @@ const router = Router();
*/
router.get('/metrics', async (_req: Request, res: Response) => {
try {
// Get aggregate metrics
// Get aggregate metrics using 7-stage pipeline
const { rows: metrics } = await pool.query(`
SELECT
(SELECT COUNT(*) FROM dutchie_products) as total_products,
(SELECT COUNT(DISTINCT brand_name) FROM dutchie_products WHERE brand_name IS NOT NULL) as total_brands,
(SELECT COUNT(*) FROM dispensaries WHERE state = 'AZ') as total_stores,
(
SELECT COUNT(*)
FROM dispensary_crawler_profiles dcp
WHERE dcp.enabled = true
AND (dcp.status = 'production' OR (dcp.config->>'status')::text = 'production')
) as healthy_count,
(
SELECT COUNT(*)
FROM dispensary_crawler_profiles dcp
WHERE dcp.enabled = true
AND (dcp.status = 'sandbox' OR (dcp.config->>'status')::text = 'sandbox')
) as sandbox_count,
(
SELECT COUNT(*)
FROM dispensary_crawler_profiles dcp
WHERE dcp.enabled = true
AND (dcp.status = 'needs_manual' OR (dcp.config->>'status')::text = 'needs_manual')
) as needs_manual_count,
(
SELECT COUNT(*)
FROM dispensary_crawler_profiles dcp
JOIN dispensaries d ON d.id = dcp.dispensary_id
WHERE d.state = 'AZ'
AND dcp.status = 'needs_manual'
) as failing_count
(SELECT COUNT(*) FROM store_products) as total_products,
(SELECT COUNT(DISTINCT brand_name_raw) FROM store_products WHERE brand_name_raw IS NOT NULL) as total_brands,
(SELECT COUNT(*) FROM dispensaries WHERE menu_type = 'dutchie' AND crawl_enabled = true) as total_stores,
-- Stage counts from dispensaries table (7-stage pipeline)
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'discovered') as discovered_count,
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'validated') as validated_count,
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'promoted') as promoted_count,
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'sandbox') as sandbox_count,
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'hydrating') as hydrating_count,
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'production') as production_count,
(SELECT COUNT(*) FROM dispensaries WHERE stage = 'failing') as failing_count,
-- Discovery pipeline counts
(SELECT COUNT(*) FROM dutchie_discovery_locations WHERE stage = 'discovered' AND active = true) as discovery_pending
`);
const row = metrics[0] || {};
@@ -63,13 +48,22 @@ router.get('/metrics', async (_req: Request, res: Response) => {
total_products: parseInt(row.total_products || '0', 10),
total_brands: parseInt(row.total_brands || '0', 10),
total_stores: parseInt(row.total_stores || '0', 10),
// Placeholder sentiment values - these would come from actual analytics
market_sentiment: 'neutral',
market_direction: 'stable',
// Health counts
healthy_count: parseInt(row.healthy_count || '0', 10),
// 7-Stage Pipeline Counts
stages: {
discovered: parseInt(row.discovered_count || '0', 10),
validated: parseInt(row.validated_count || '0', 10),
promoted: parseInt(row.promoted_count || '0', 10),
sandbox: parseInt(row.sandbox_count || '0', 10),
hydrating: parseInt(row.hydrating_count || '0', 10),
production: parseInt(row.production_count || '0', 10),
failing: parseInt(row.failing_count || '0', 10),
},
// Discovery pipeline
discovery_pending: parseInt(row.discovery_pending || '0', 10),
// Legacy compatibility
healthy_count: parseInt(row.production_count || '0', 10),
sandbox_count: parseInt(row.sandbox_count || '0', 10),
needs_manual_count: parseInt(row.needs_manual_count || '0', 10),
needs_manual_count: parseInt(row.failing_count || '0', 10),
failing_count: parseInt(row.failing_count || '0', 10),
});
} catch (error: any) {
@@ -117,12 +111,13 @@ router.get('/states', async (_req: Request, res: Response) => {
* Returns list of stores with orchestrator status info
* Query params:
* - state: Filter by state (e.g., "AZ")
* - crawl_enabled: Filter by crawl status (default: true, use "all" to show all, "false" for disabled only)
* - limit: Max results (default 100)
* - offset: Pagination offset
*/
router.get('/stores', async (req: Request, res: Response) => {
try {
const { state, limit = '100', offset = '0' } = req.query;
const { state, crawl_enabled, limit = '100', offset = '0' } = req.query;
let whereClause = 'WHERE 1=1';
const params: any[] = [];
@@ -134,6 +129,16 @@ router.get('/stores', async (req: Request, res: Response) => {
paramIndex++;
}
// Filter by crawl_enabled - defaults to showing only enabled
if (crawl_enabled === 'false' || crawl_enabled === '0') {
whereClause += ` AND (d.crawl_enabled = false OR d.crawl_enabled IS NULL)`;
} else if (crawl_enabled === 'all') {
// Show all (no filter)
} else {
// Default: show only enabled
whereClause += ` AND d.crawl_enabled = true`;
}
params.push(parseInt(limit as string, 10), parseInt(offset as string, 10));
const { rows } = await pool.query(`
@@ -145,9 +150,15 @@ router.get('/stores', async (req: Request, res: Response) => {
d.menu_type as provider,
d.platform_dispensary_id,
d.last_crawl_at,
d.crawl_enabled,
d.stage,
d.stage_changed_at,
d.first_crawl_at,
d.last_successful_crawl_at,
dcp.id as profile_id,
dcp.profile_key,
COALESCE(dcp.status, dcp.config->>'status', 'legacy') as crawler_status,
dcp.consecutive_successes,
dcp.consecutive_failures,
(
SELECT MAX(cot.completed_at)
FROM crawl_orchestration_traces cot
@@ -160,8 +171,8 @@ router.get('/stores', async (req: Request, res: Response) => {
) as last_failure_at,
(
SELECT COUNT(*)
FROM dutchie_products dp
WHERE dp.dispensary_id = d.id
FROM store_products sp
WHERE sp.dispensary_id = d.id
) as product_count
FROM dispensaries d
LEFT JOIN dispensary_crawler_profiles dcp
@@ -185,9 +196,17 @@ router.get('/stores', async (req: Request, res: Response) => {
state: r.state,
provider: r.provider || 'unknown',
provider_raw: r.provider || null,
provider_display: getProviderDisplayName(r.provider),
// Admin routes show actual provider names (not anonymized)
provider_display: r.provider || 'Unknown',
platformDispensaryId: r.platform_dispensary_id,
status: r.crawler_status || (r.platform_dispensary_id ? 'legacy' : 'pending'),
crawlEnabled: r.crawl_enabled ?? false,
// Use stage from dispensaries table (6-stage pipeline)
stage: r.stage || 'discovered',
stageChangedAt: r.stage_changed_at,
firstCrawlAt: r.first_crawl_at,
lastSuccessfulCrawlAt: r.last_successful_crawl_at,
consecutiveSuccesses: r.consecutive_successes || 0,
consecutiveFailures: r.consecutive_failures || 0,
profileId: r.profile_id,
profileKey: r.profile_key,
lastCrawlAt: r.last_crawl_at,
@@ -425,4 +444,392 @@ router.get('/crawl-traces/:traceId', async (req: Request, res: Response) => {
}
});
// ============================================================
// STATUS MANAGEMENT
// ============================================================
// 6-Stage Pipeline Statuses
const VALID_STAGES = ['discovered', 'validated', 'promoted', 'sandbox', 'production', 'failing'] as const;
/**
* POST /api/admin/orchestrator/stores/:id/stage
* Manually update the stage for a store (use /api/pipeline for proper transitions)
* Body: { stage: 'discovered' | 'validated' | 'promoted' | 'sandbox' | 'production' | 'failing', reason?: string }
*/
router.post('/stores/:id/stage', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { stage: status, reason } = req.body;
if (!status || !VALID_STAGES.includes(status)) {
return res.status(400).json({
error: `Invalid stage. Must be one of: ${VALID_STAGES.join(', ')}`,
});
}
const dispensaryId = parseInt(id, 10);
// Get current profile and status
const { rows: profileRows } = await pool.query(`
SELECT dcp.id, dcp.status as current_status, d.name as dispensary_name
FROM dispensary_crawler_profiles dcp
JOIN dispensaries d ON d.id = dcp.dispensary_id
WHERE dcp.dispensary_id = $1 AND dcp.enabled = true
ORDER BY dcp.updated_at DESC
LIMIT 1
`, [dispensaryId]);
if (profileRows.length === 0) {
return res.status(404).json({ error: 'No crawler profile found for this store' });
}
const profileId = profileRows[0].id;
const currentStatus = profileRows[0].current_status;
const dispensaryName = profileRows[0].dispensary_name;
// Update the status
await pool.query(`
UPDATE dispensary_crawler_profiles
SET
status = $1,
status_reason = $2,
status_changed_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = $3
`, [status, reason || `Manual status change to ${status}`, profileId]);
// Create status alert
const severity = status === 'production' ? 'info'
: status === 'needs_manual' ? 'warning'
: status === 'failing' ? 'error'
: 'info';
await pool.query(`
INSERT INTO crawler_status_alerts
(dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata)
VALUES ($1, $2, 'status_change', $3, $4, $5, $6, $7)
`, [
dispensaryId,
profileId,
severity,
`${dispensaryName}: Status changed from ${currentStatus || 'unknown'} to ${status}`,
currentStatus,
status,
JSON.stringify({ reason, changedBy: 'admin_api' }),
]);
res.json({
success: true,
dispensaryId,
profileId,
previousStatus: currentStatus,
newStatus: status,
message: `Status updated to ${status}`,
});
} catch (error: any) {
console.error('[OrchestratorAdmin] Error updating status:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* GET /api/admin/orchestrator/alerts
* Get recent status alerts for the dashboard
* Query params:
* - severity: Filter by severity (info, warning, error, critical)
* - acknowledged: Filter by acknowledged status (true/false)
* - limit: Max results (default 50)
*/
router.get('/alerts', async (req: Request, res: Response) => {
try {
const { severity, acknowledged, dispensary_id, limit = '50' } = req.query;
let whereClause = 'WHERE 1=1';
const params: any[] = [];
let paramIndex = 1;
if (severity) {
whereClause += ` AND csa.severity = $${paramIndex}`;
params.push(severity);
paramIndex++;
}
if (acknowledged === 'true') {
whereClause += ' AND csa.acknowledged = true';
} else if (acknowledged === 'false') {
whereClause += ' AND csa.acknowledged = false';
}
if (dispensary_id) {
whereClause += ` AND csa.dispensary_id = $${paramIndex}`;
params.push(parseInt(dispensary_id as string, 10));
paramIndex++;
}
params.push(parseInt(limit as string, 10));
const { rows } = await pool.query(`
SELECT
csa.*,
d.name as dispensary_name,
d.city,
d.state
FROM crawler_status_alerts csa
LEFT JOIN dispensaries d ON csa.dispensary_id = d.id
${whereClause}
ORDER BY csa.created_at DESC
LIMIT $${paramIndex}
`, params);
// Get unacknowledged count by severity
const { rows: countRows } = await pool.query(`
SELECT severity, COUNT(*) as count
FROM crawler_status_alerts
WHERE acknowledged = false
GROUP BY severity
`);
const unacknowledgedCounts = countRows.reduce((acc: Record<string, number>, row: any) => {
acc[row.severity] = parseInt(row.count, 10);
return acc;
}, {});
res.json({
alerts: rows.map((r: any) => ({
id: r.id,
dispensaryId: r.dispensary_id,
dispensaryName: r.dispensary_name,
city: r.city,
state: r.state,
profileId: r.profile_id,
alertType: r.alert_type,
severity: r.severity,
message: r.message,
previousStatus: r.previous_status,
newStatus: r.new_status,
errorDetails: r.error_details,
metadata: r.metadata,
acknowledged: r.acknowledged,
acknowledgedAt: r.acknowledged_at,
acknowledgedBy: r.acknowledged_by,
createdAt: r.created_at,
})),
unacknowledgedCounts,
});
} catch (error: any) {
console.error('[OrchestratorAdmin] Error fetching alerts:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/admin/orchestrator/alerts/:id/acknowledge
* Acknowledge an alert
*/
router.post('/alerts/:id/acknowledge', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { acknowledgedBy = 'admin' } = req.body;
await pool.query(`
UPDATE crawler_status_alerts
SET acknowledged = true, acknowledged_at = CURRENT_TIMESTAMP, acknowledged_by = $1
WHERE id = $2
`, [acknowledgedBy, parseInt(id, 10)]);
res.json({ success: true, alertId: parseInt(id, 10) });
} catch (error: any) {
console.error('[OrchestratorAdmin] Error acknowledging alert:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/admin/orchestrator/alerts/acknowledge-all
* Acknowledge all unacknowledged alerts (optionally filtered)
*/
router.post('/alerts/acknowledge-all', async (req: Request, res: Response) => {
try {
const { severity, dispensaryId, acknowledgedBy = 'admin' } = req.body;
let whereClause = 'WHERE acknowledged = false';
const params: any[] = [acknowledgedBy];
let paramIndex = 2;
if (severity) {
whereClause += ` AND severity = $${paramIndex}`;
params.push(severity);
paramIndex++;
}
if (dispensaryId) {
whereClause += ` AND dispensary_id = $${paramIndex}`;
params.push(dispensaryId);
paramIndex++;
}
const result = await pool.query(`
UPDATE crawler_status_alerts
SET acknowledged = true, acknowledged_at = CURRENT_TIMESTAMP, acknowledged_by = $1
${whereClause}
`, params);
res.json({ success: true, acknowledgedCount: result.rowCount });
} catch (error: any) {
console.error('[OrchestratorAdmin] Error acknowledging alerts:', error.message);
res.status(500).json({ error: error.message });
}
});
/**
* POST /api/admin/orchestrator/crawl-outcome
* Record a crawl outcome and update status based on success/failure
* This endpoint is called by the crawler after each crawl attempt
*/
router.post('/crawl-outcome', async (req: Request, res: Response) => {
try {
const {
dispensaryId,
success,
productsFound = 0,
error,
metadata = {},
} = req.body;
if (!dispensaryId) {
return res.status(400).json({ error: 'dispensaryId is required' });
}
// Get current profile
const { rows: profileRows } = await pool.query(`
SELECT
dcp.id,
dcp.status,
dcp.consecutive_successes,
dcp.consecutive_failures,
d.name as dispensary_name
FROM dispensary_crawler_profiles dcp
JOIN dispensaries d ON d.id = dcp.dispensary_id
WHERE dcp.dispensary_id = $1 AND dcp.enabled = true
ORDER BY dcp.updated_at DESC
LIMIT 1
`, [dispensaryId]);
if (profileRows.length === 0) {
return res.status(404).json({ error: 'No crawler profile found' });
}
const profile = profileRows[0];
const currentStatus = profile.status;
let newStatus = currentStatus;
let statusChanged = false;
let consecutiveSuccesses = profile.consecutive_successes || 0;
let consecutiveFailures = profile.consecutive_failures || 0;
if (success) {
consecutiveSuccesses++;
consecutiveFailures = 0;
// Auto-promote from sandbox to production after 3 consecutive successes
if (currentStatus === 'sandbox' && consecutiveSuccesses >= 3) {
newStatus = 'production';
statusChanged = true;
}
// Auto-recover from needs_manual/failing after 2 consecutive successes
else if ((currentStatus === 'needs_manual' || currentStatus === 'failing') && consecutiveSuccesses >= 2) {
newStatus = 'production';
statusChanged = true;
}
} else {
consecutiveFailures++;
consecutiveSuccesses = 0;
// Demote to needs_manual after 2 consecutive failures
if (currentStatus === 'production' && consecutiveFailures >= 2) {
newStatus = 'needs_manual';
statusChanged = true;
}
// Demote to failing after 5 consecutive failures
else if (currentStatus === 'needs_manual' && consecutiveFailures >= 5) {
newStatus = 'failing';
statusChanged = true;
}
// Keep sandbox as sandbox even with failures (needs manual intervention to fix)
else if (currentStatus === 'sandbox' && consecutiveFailures >= 3) {
newStatus = 'needs_manual';
statusChanged = true;
}
}
// Update profile
await pool.query(`
UPDATE dispensary_crawler_profiles
SET
consecutive_successes = $1,
consecutive_failures = $2,
status = $3,
status_reason = CASE WHEN $4 THEN $5 ELSE status_reason END,
status_changed_at = CASE WHEN $4 THEN CURRENT_TIMESTAMP ELSE status_changed_at END,
updated_at = CURRENT_TIMESTAMP
WHERE id = $6
`, [
consecutiveSuccesses,
consecutiveFailures,
newStatus,
statusChanged,
statusChanged ? (success ? 'Auto-promoted after consecutive successes' : `Auto-demoted after ${consecutiveFailures} consecutive failures`) : null,
profile.id,
]);
// Create alert if status changed or error occurred
if (statusChanged) {
const severity = newStatus === 'production' ? 'info'
: newStatus === 'needs_manual' ? 'warning'
: 'error';
await pool.query(`
INSERT INTO crawler_status_alerts
(dispensary_id, profile_id, alert_type, severity, message, previous_status, new_status, metadata)
VALUES ($1, $2, 'status_change', $3, $4, $5, $6, $7)
`, [
dispensaryId,
profile.id,
severity,
`${profile.dispensary_name}: ${success ? 'Promoted' : 'Demoted'} from ${currentStatus} to ${newStatus}`,
currentStatus,
newStatus,
JSON.stringify({ productsFound, consecutiveSuccesses, consecutiveFailures, ...metadata }),
]);
} else if (!success && error) {
// Log crawl error as alert
await pool.query(`
INSERT INTO crawler_status_alerts
(dispensary_id, profile_id, alert_type, severity, message, error_details, metadata)
VALUES ($1, $2, 'crawl_error', $3, $4, $5, $6)
`, [
dispensaryId,
profile.id,
consecutiveFailures >= 2 ? 'warning' : 'info',
`${profile.dispensary_name}: Crawl failed - ${error}`,
JSON.stringify({ error, stack: metadata.stack }),
JSON.stringify({ consecutiveFailures, ...metadata }),
]);
}
res.json({
success: true,
dispensaryId,
profileId: profile.id,
statusChanged,
previousStatus: currentStatus,
newStatus,
consecutiveSuccesses,
consecutiveFailures,
});
} catch (error: any) {
console.error('[OrchestratorAdmin] Error recording crawl outcome:', error.message);
res.status(500).json({ error: error.message });
}
});
export default router;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,472 @@
/**
* Price Analytics API Routes
*
* Endpoints for price history, specials, and price comparison analytics
* Uses the new product_variants and product_variant_snapshots tables
*/
import { Router, Request, Response } from 'express';
import { pool } from '../db/pool';
const router = Router();
// ============================================================
// PRICE HISTORY
// ============================================================
/**
* GET /api/price-analytics/products/:id/history
* Get price and stock history for a product variant
*
* Query params:
* - days: Number of days to look back (default: 30, max: 90)
* - option: Specific variant option (e.g., "1g", "3.5g")
*/
router.get('/products/:id/history', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { days = '30', option } = req.query;
const daysNum = Math.min(parseInt(days as string, 10) || 30, 90);
// Get product info
const productResult = await pool.query(`
SELECT
sp.id, sp.name_raw as name, sp.brand_name_raw as brand,
sp.category_raw as category, sp.dispensary_id,
d.name as dispensary_name
FROM store_products sp
JOIN dispensaries d ON d.id = sp.dispensary_id
WHERE sp.id = $1
`, [id]);
if (productResult.rows.length === 0) {
return res.status(404).json({ error: 'Product not found' });
}
const product = productResult.rows[0];
// Get variant history
let variantQuery = `
SELECT
pv.id as variant_id,
pv.option,
pvs.price_rec,
pvs.price_med,
pvs.price_rec_special,
pvs.price_med_special,
pvs.quantity,
pvs.in_stock,
pvs.is_on_special,
pvs.captured_at
FROM product_variant_snapshots pvs
JOIN product_variants pv ON pv.id = pvs.product_variant_id
WHERE pv.store_product_id = $1
AND pvs.captured_at >= NOW() - ($2 || ' days')::INTERVAL
`;
const params: any[] = [id, daysNum];
if (option) {
variantQuery += ` AND pv.option = $3`;
params.push(option);
}
variantQuery += ` ORDER BY pv.option, pvs.captured_at ASC`;
const historyResult = await pool.query(variantQuery, params);
// Get current variants
const currentResult = await pool.query(`
SELECT
id, option, price_rec, price_med, price_rec_special, price_med_special,
quantity, in_stock, is_on_special, last_price_change_at, last_stock_change_at
FROM product_variants
WHERE store_product_id = $1
ORDER BY option
`, [id]);
// Get sale stats using the function
const saleStatsResult = await pool.query(`
SELECT
pv.option,
(get_variant_sale_stats(pv.id, $2)).*
FROM product_variants pv
WHERE pv.store_product_id = $1
`, [id, daysNum]);
// Group history by variant
const historyByVariant: Record<string, any[]> = {};
for (const row of historyResult.rows) {
if (!historyByVariant[row.option]) {
historyByVariant[row.option] = [];
}
historyByVariant[row.option].push({
price_rec: row.price_rec ? parseFloat(row.price_rec) : null,
price_med: row.price_med ? parseFloat(row.price_med) : null,
price_rec_special: row.price_rec_special ? parseFloat(row.price_rec_special) : null,
quantity: row.quantity,
in_stock: row.in_stock,
is_on_special: row.is_on_special,
captured_at: row.captured_at,
});
}
res.json({
product: {
id: product.id,
name: product.name,
brand: product.brand,
category: product.category,
dispensary_id: product.dispensary_id,
dispensary_name: product.dispensary_name,
},
current_variants: currentResult.rows.map((v: any) => ({
...v,
price_rec: v.price_rec ? parseFloat(v.price_rec) : null,
price_med: v.price_med ? parseFloat(v.price_med) : null,
price_rec_special: v.price_rec_special ? parseFloat(v.price_rec_special) : null,
})),
history: historyByVariant,
sale_stats: saleStatsResult.rows.reduce((acc: any, row: any) => {
acc[row.option] = {
total_snapshots: parseInt(row.total_snapshots),
times_on_special: parseInt(row.times_on_special),
special_frequency_pct: row.special_frequency_pct ? parseFloat(row.special_frequency_pct) : 0,
avg_discount_pct: row.avg_discount_pct ? parseFloat(row.avg_discount_pct) : null,
min_price: row.min_price ? parseFloat(row.min_price) : null,
max_price: row.max_price ? parseFloat(row.max_price) : null,
avg_price: row.avg_price ? parseFloat(row.avg_price) : null,
};
return acc;
}, {}),
days: daysNum,
});
} catch (error: any) {
console.error('Product history error:', error);
res.status(500).json({ error: 'Failed to fetch product history', message: error.message });
}
});
// ============================================================
// CURRENT SPECIALS
// ============================================================
/**
* GET /api/price-analytics/specials
* Get all products currently on special
*
* Query params:
* - state: Filter by state code
* - city: Filter by city
* - category: Filter by category
* - min_discount: Minimum discount percentage
* - limit: Max results (default: 100, max: 500)
* - offset: Pagination offset
*/
router.get('/specials', async (req: Request, res: Response) => {
try {
const {
state,
city,
category,
min_discount = '0',
limit = '100',
offset = '0',
} = req.query;
const limitNum = Math.min(parseInt(limit as string, 10) || 100, 500);
const offsetNum = parseInt(offset as string, 10) || 0;
const minDiscountNum = parseFloat(min_discount as string) || 0;
let whereClause = `WHERE pv.is_on_special = TRUE AND pv.in_stock = TRUE`;
const params: any[] = [];
let paramIndex = 1;
if (state) {
whereClause += ` AND d.state = $${paramIndex}`;
params.push(state);
paramIndex++;
}
if (city) {
whereClause += ` AND LOWER(d.city) LIKE LOWER($${paramIndex})`;
params.push(`%${city}%`);
paramIndex++;
}
if (category) {
whereClause += ` AND LOWER(sp.category_raw) = LOWER($${paramIndex})`;
params.push(category);
paramIndex++;
}
// Calculate discount and filter
const discountCalc = `ROUND(((pv.price_rec - pv.price_rec_special) / NULLIF(pv.price_rec, 0)) * 100, 1)`;
if (minDiscountNum > 0) {
whereClause += ` AND ${discountCalc} >= $${paramIndex}`;
params.push(minDiscountNum);
paramIndex++;
}
params.push(limitNum, offsetNum);
const { rows: specials } = await pool.query(`
SELECT
pv.id as variant_id,
sp.id as product_id,
sp.name_raw as product_name,
sp.brand_name_raw as brand_name,
sp.category_raw as category,
sp.image_url,
d.id as dispensary_id,
d.name as dispensary_name,
d.city,
d.state,
pv.option,
pv.price_rec,
pv.price_rec_special,
${discountCalc} as discount_percent,
pv.quantity,
pv.last_seen_at
FROM product_variants pv
JOIN store_products sp ON sp.id = pv.store_product_id
JOIN dispensaries d ON d.id = pv.dispensary_id
${whereClause}
AND pv.price_rec_special IS NOT NULL
AND pv.price_rec_special < pv.price_rec
ORDER BY ${discountCalc} DESC
LIMIT $${paramIndex} OFFSET $${paramIndex + 1}
`, params);
// Get count
const countParams = params.slice(0, -2);
const { rows: countRows } = await pool.query(`
SELECT COUNT(*) as total
FROM product_variants pv
JOIN store_products sp ON sp.id = pv.store_product_id
JOIN dispensaries d ON d.id = pv.dispensary_id
${whereClause}
AND pv.price_rec_special IS NOT NULL
AND pv.price_rec_special < pv.price_rec
`, countParams);
res.json({
specials: specials.map((s: any) => ({
...s,
price_rec: s.price_rec ? parseFloat(s.price_rec) : null,
price_rec_special: s.price_rec_special ? parseFloat(s.price_rec_special) : null,
discount_percent: s.discount_percent ? parseFloat(s.discount_percent) : null,
})),
pagination: {
total: parseInt(countRows[0]?.total || '0', 10),
limit: limitNum,
offset: offsetNum,
has_more: offsetNum + specials.length < parseInt(countRows[0]?.total || '0', 10),
},
});
} catch (error: any) {
console.error('Specials error:', error);
res.status(500).json({ error: 'Failed to fetch specials', message: error.message });
}
});
// ============================================================
// PRICE COMPARISON
// ============================================================
/**
* GET /api/price-analytics/compare
* Compare prices for a product across stores
*
* Query params:
* - name: Product name to search
* - option: Variant option (e.g., "1g", "3.5g")
* - state: Filter by state
* - limit: Max results (default: 50)
*/
router.get('/compare', async (req: Request, res: Response) => {
try {
const { name, option, state, limit = '50' } = req.query;
if (!name) {
return res.status(400).json({ error: 'Product name is required' });
}
const limitNum = Math.min(parseInt(limit as string, 10) || 50, 200);
let whereClause = `WHERE sp.name_raw ILIKE $1 AND pv.in_stock = TRUE`;
const params: any[] = [`%${name}%`];
let paramIndex = 2;
if (option) {
whereClause += ` AND pv.option = $${paramIndex}`;
params.push(option);
paramIndex++;
}
if (state) {
whereClause += ` AND d.state = $${paramIndex}`;
params.push(state);
paramIndex++;
}
params.push(limitNum);
const { rows } = await pool.query(`
SELECT
sp.id as product_id,
sp.name_raw as product_name,
sp.brand_name_raw as brand_name,
sp.category_raw as category,
sp.image_url,
d.id as dispensary_id,
d.name as dispensary_name,
d.city,
d.state,
pv.option,
pv.price_rec,
pv.price_rec_special,
pv.is_on_special,
pv.quantity,
COALESCE(pv.price_rec_special, pv.price_rec) as effective_price,
RANK() OVER (PARTITION BY pv.option ORDER BY COALESCE(pv.price_rec_special, pv.price_rec) ASC) as price_rank
FROM product_variants pv
JOIN store_products sp ON sp.id = pv.store_product_id
JOIN dispensaries d ON d.id = pv.dispensary_id
${whereClause}
AND (pv.price_rec IS NOT NULL OR pv.price_rec_special IS NOT NULL)
ORDER BY pv.option, effective_price ASC
LIMIT $${paramIndex}
`, params);
// Group by option
const byOption: Record<string, any[]> = {};
for (const row of rows) {
if (!byOption[row.option]) {
byOption[row.option] = [];
}
byOption[row.option].push({
product_id: row.product_id,
product_name: row.product_name,
brand_name: row.brand_name,
category: row.category,
image_url: row.image_url,
dispensary_id: row.dispensary_id,
dispensary_name: row.dispensary_name,
city: row.city,
state: row.state,
price_rec: row.price_rec ? parseFloat(row.price_rec) : null,
price_rec_special: row.price_rec_special ? parseFloat(row.price_rec_special) : null,
effective_price: row.effective_price ? parseFloat(row.effective_price) : null,
is_on_special: row.is_on_special,
quantity: row.quantity,
price_rank: parseInt(row.price_rank),
});
}
// Calculate stats per option
const stats: Record<string, any> = {};
for (const [opt, items] of Object.entries(byOption)) {
const prices = items.map((i: any) => i.effective_price).filter((p: any) => p !== null);
stats[opt] = {
count: items.length,
min_price: Math.min(...prices),
max_price: Math.max(...prices),
avg_price: prices.reduce((a: number, b: number) => a + b, 0) / prices.length,
cheapest_store: items[0]?.dispensary_name,
on_special_count: items.filter((i: any) => i.is_on_special).length,
};
}
res.json({
search_term: name,
results: byOption,
stats,
options: Object.keys(byOption),
});
} catch (error: any) {
console.error('Price compare error:', error);
res.status(500).json({ error: 'Failed to compare prices', message: error.message });
}
});
// ============================================================
// MARKET SUMMARY
// ============================================================
/**
* GET /api/price-analytics/market-summary
* Get overall market analytics summary
*/
router.get('/market-summary', async (req: Request, res: Response) => {
try {
const { state } = req.query;
let stateFilter = '';
const params: any[] = [];
if (state) {
stateFilter = 'WHERE d.state = $1';
params.push(state);
}
// Get variant counts
const variantStats = await pool.query(`
SELECT
COUNT(DISTINCT pv.id) as total_variants,
COUNT(DISTINCT pv.id) FILTER (WHERE pv.is_on_special) as on_special,
COUNT(DISTINCT pv.id) FILTER (WHERE pv.in_stock) as in_stock,
COUNT(DISTINCT pv.store_product_id) as total_products,
COUNT(DISTINCT pv.dispensary_id) as total_stores
FROM product_variants pv
JOIN dispensaries d ON d.id = pv.dispensary_id
${stateFilter}
`, params);
// Get category breakdown
const categoryStats = await pool.query(`
SELECT
sp.category_raw as category,
COUNT(DISTINCT pv.id) as variant_count,
AVG(COALESCE(pv.price_rec_special, pv.price_rec)) as avg_price,
COUNT(DISTINCT pv.id) FILTER (WHERE pv.is_on_special) as on_special_count
FROM product_variants pv
JOIN store_products sp ON sp.id = pv.store_product_id
JOIN dispensaries d ON d.id = pv.dispensary_id
${stateFilter}
GROUP BY sp.category_raw
ORDER BY variant_count DESC
LIMIT 10
`, params);
// Get recent price changes (last 24h)
const recentChanges = await pool.query(`
SELECT COUNT(*) as price_changes_24h
FROM product_variants pv
JOIN dispensaries d ON d.id = pv.dispensary_id
${stateFilter ? stateFilter + ' AND' : 'WHERE'}
pv.last_price_change_at >= NOW() - INTERVAL '24 hours'
`, params);
res.json({
summary: {
total_variants: parseInt(variantStats.rows[0]?.total_variants || '0'),
on_special: parseInt(variantStats.rows[0]?.on_special || '0'),
in_stock: parseInt(variantStats.rows[0]?.in_stock || '0'),
total_products: parseInt(variantStats.rows[0]?.total_products || '0'),
total_stores: parseInt(variantStats.rows[0]?.total_stores || '0'),
price_changes_24h: parseInt(recentChanges.rows[0]?.price_changes_24h || '0'),
},
categories: categoryStats.rows.map((c: any) => ({
category: c.category || 'Unknown',
variant_count: parseInt(c.variant_count),
avg_price: c.avg_price ? parseFloat(c.avg_price).toFixed(2) : null,
on_special_count: parseInt(c.on_special_count),
})),
});
} catch (error: any) {
console.error('Market summary error:', error);
res.status(500).json({ error: 'Failed to fetch market summary', message: error.message });
}
});
export default router;

View File

@@ -1,11 +1,29 @@
import { Router } from 'express';
import { authMiddleware } from '../auth/middleware';
import { pool } from '../db/pool';
import { getImageUrl } from '../utils/minio';
const router = Router();
router.use(authMiddleware);
/**
* Convert local image path to proxy URL
* /images/products/... -> /img/products/...
*/
function getImageUrl(localPath: string): string {
if (!localPath) return '';
// If already a full URL, return as-is
if (localPath.startsWith('http')) return localPath;
// Convert /images/ path to /img/ proxy path
if (localPath.startsWith('/images/')) {
return '/img' + localPath.substring(7);
}
// Handle paths without leading slash
if (localPath.startsWith('images/')) {
return '/img/' + localPath.substring(7);
}
return '/img/' + localPath;
}
// Freshness threshold: data older than this is considered stale
const STALE_THRESHOLD_HOURS = 4;
@@ -101,10 +119,27 @@ router.get('/', async (req, res) => {
const sortDirection = (sort_order as string).toLowerCase() === 'asc' ? 'ASC' : 'DESC';
let query = `
SELECT p.*, s.name as store_name, c.name as category_name
FROM products p
LEFT JOIN stores s ON p.store_id = s.id
LEFT JOIN categories c ON p.category_id = c.id
SELECT
p.id,
p.dispensary_id as store_id,
p.name_raw as name,
p.brand_name_raw as brand,
p.category_raw as category_name,
p.subcategory_raw as subcategory,
p.description,
p.price_rec as price,
p.thc_percent as thc_percentage,
p.cbd_percent as cbd_percentage,
p.strain_type,
p.primary_image_url as image_url,
p.stock_status,
p.stock_status = 'in_stock' as in_stock,
p.created_at,
p.updated_at,
p.last_seen_at,
d.name as store_name
FROM store_products p
LEFT JOIN dispensaries d ON p.dispensary_id = d.id
WHERE 1=1
`;
const params: any[] = [];
@@ -112,61 +147,60 @@ router.get('/', async (req, res) => {
// Store filter
if (store_id) {
query += ` AND p.store_id = $${paramCount}`;
query += ` AND p.dispensary_id = $${paramCount}`;
params.push(store_id);
paramCount++;
}
// Category filter
// Category filter (uses category name now)
if (category_id) {
query += ` AND p.category_id = $${paramCount}`;
query += ` AND p.category_raw = $${paramCount}`;
params.push(category_id);
paramCount++;
}
// Stock filter
if (in_stock !== undefined) {
query += ` AND p.in_stock = $${paramCount}`;
params.push(in_stock === 'true');
paramCount++;
const inStockVal = in_stock === 'true';
query += inStockVal ? ` AND p.stock_status = 'in_stock'` : ` AND p.stock_status != 'in_stock'`;
}
// Search filter
if (search) {
query += ` AND (p.name ILIKE $${paramCount} OR p.brand ILIKE $${paramCount} OR p.description ILIKE $${paramCount})`;
query += ` AND (p.name_raw ILIKE $${paramCount} OR p.brand_name_raw ILIKE $${paramCount} OR p.description ILIKE $${paramCount})`;
params.push(`%${search}%`);
paramCount++;
}
// Brand filter
if (brand) {
query += ` AND p.brand ILIKE $${paramCount}`;
query += ` AND p.brand_name_raw ILIKE $${paramCount}`;
params.push(`%${brand}%`);
paramCount++;
}
// Price range filter
if (min_price) {
query += ` AND p.price >= $${paramCount}`;
query += ` AND p.price_rec >= $${paramCount}`;
params.push(parseFloat(min_price as string));
paramCount++;
}
if (max_price) {
query += ` AND p.price <= $${paramCount}`;
query += ` AND p.price_rec <= $${paramCount}`;
params.push(parseFloat(max_price as string));
paramCount++;
}
// THC range filter
if (min_thc) {
query += ` AND p.thc_percentage >= $${paramCount}`;
query += ` AND p.thc_percent >= $${paramCount}`;
params.push(parseFloat(min_thc as string));
paramCount++;
}
if (max_thc) {
query += ` AND p.thc_percentage <= $${paramCount}`;
query += ` AND p.thc_percent <= $${paramCount}`;
params.push(parseFloat(max_thc as string));
paramCount++;
}
@@ -199,60 +233,59 @@ router.get('/', async (req, res) => {
}
// Get total count (reuse same filters)
let countQuery = `SELECT COUNT(*) FROM products p WHERE 1=1`;
let countQuery = `SELECT COUNT(*) FROM store_products p WHERE 1=1`;
const countParams: any[] = [];
let countParamCount = 1;
if (store_id) {
countQuery += ` AND p.store_id = $${countParamCount}`;
countQuery += ` AND p.dispensary_id = $${countParamCount}`;
countParams.push(store_id);
countParamCount++;
}
if (category_id) {
countQuery += ` AND p.category_id = $${countParamCount}`;
countQuery += ` AND p.category_raw = $${countParamCount}`;
countParams.push(category_id);
countParamCount++;
}
if (in_stock !== undefined) {
countQuery += ` AND p.in_stock = $${countParamCount}`;
countParams.push(in_stock === 'true');
countParamCount++;
const inStockVal = in_stock === 'true';
countQuery += inStockVal ? ` AND p.stock_status = 'in_stock'` : ` AND p.stock_status != 'in_stock'`;
}
if (search) {
countQuery += ` AND (p.name ILIKE $${countParamCount} OR p.brand ILIKE $${countParamCount} OR p.description ILIKE $${countParamCount})`;
countQuery += ` AND (p.name_raw ILIKE $${countParamCount} OR p.brand_name_raw ILIKE $${countParamCount} OR p.description ILIKE $${countParamCount})`;
countParams.push(`%${search}%`);
countParamCount++;
}
if (brand) {
countQuery += ` AND p.brand ILIKE $${countParamCount}`;
countQuery += ` AND p.brand_name_raw ILIKE $${countParamCount}`;
countParams.push(`%${brand}%`);
countParamCount++;
}
if (min_price) {
countQuery += ` AND p.price >= $${countParamCount}`;
countQuery += ` AND p.price_rec >= $${countParamCount}`;
countParams.push(parseFloat(min_price as string));
countParamCount++;
}
if (max_price) {
countQuery += ` AND p.price <= $${countParamCount}`;
countQuery += ` AND p.price_rec <= $${countParamCount}`;
countParams.push(parseFloat(max_price as string));
countParamCount++;
}
if (min_thc) {
countQuery += ` AND p.thc_percentage >= $${countParamCount}`;
countQuery += ` AND p.thc_percent >= $${countParamCount}`;
countParams.push(parseFloat(min_thc as string));
countParamCount++;
}
if (max_thc) {
countQuery += ` AND p.thc_percentage <= $${countParamCount}`;
countQuery += ` AND p.thc_percent <= $${countParamCount}`;
countParams.push(parseFloat(max_thc as string));
countParamCount++;
}
@@ -271,7 +304,7 @@ router.get('/', async (req, res) => {
if (store_id) {
const storeResult = await pool.query(
'SELECT id, name, last_scraped_at FROM stores WHERE id = $1',
'SELECT id, name, last_crawled_at as last_scraped_at FROM dispensaries WHERE id = $1',
[store_id]
);
if (storeResult.rows.length > 0) {
@@ -322,10 +355,27 @@ router.get('/:id', async (req, res) => {
const { fields } = req.query;
const result = await pool.query(`
SELECT p.*, s.name as store_name, c.name as category_name
FROM products p
LEFT JOIN stores s ON p.store_id = s.id
LEFT JOIN categories c ON p.category_id = c.id
SELECT
p.id,
p.dispensary_id as store_id,
p.name_raw as name,
p.brand_name_raw as brand,
p.category_raw as category_name,
p.subcategory_raw as subcategory,
p.description,
p.price_rec as price,
p.thc_percent as thc_percentage,
p.cbd_percent as cbd_percentage,
p.strain_type,
p.primary_image_url as image_url,
p.stock_status,
p.stock_status = 'in_stock' as in_stock,
p.created_at,
p.updated_at,
p.last_seen_at,
d.name as store_name
FROM store_products p
LEFT JOIN dispensaries d ON p.dispensary_id = d.id
WHERE p.id = $1
`, [id]);
@@ -359,18 +409,18 @@ router.get('/meta/brands', async (req, res) => {
const { store_id } = req.query;
let query = `
SELECT DISTINCT brand
FROM products
WHERE brand IS NOT NULL AND brand != ''
SELECT DISTINCT brand_name_raw as brand
FROM store_products
WHERE brand_name_raw IS NOT NULL AND brand_name_raw != ''
`;
const params: any[] = [];
if (store_id) {
query += ' AND store_id = $1';
query += ' AND dispensary_id = $1';
params.push(store_id);
}
query += ' ORDER BY brand';
query += ' ORDER BY brand_name_raw';
const result = await pool.query(query, params);
const brands = result.rows.map((row: { brand: string }) => row.brand);
@@ -389,16 +439,16 @@ router.get('/meta/price-range', async (req, res) => {
let query = `
SELECT
MIN(price) as min_price,
MAX(price) as max_price,
AVG(price) as avg_price
FROM products
WHERE price IS NOT NULL
MIN(price_rec) as min_price,
MAX(price_rec) as max_price,
AVG(price_rec) as avg_price
FROM store_products
WHERE price_rec IS NOT NULL
`;
const params: any[] = [];
if (store_id) {
query += ' AND store_id = $1';
query += ' AND dispensary_id = $1';
params.push(store_id);
}
@@ -415,4 +465,133 @@ router.get('/meta/price-range', async (req, res) => {
}
});
// Get product stats - inventory movement, price history, etc.
router.get('/:id/stats', async (req, res) => {
try {
const { id } = req.params;
// Get current product info
const productResult = await pool.query(`
SELECT id, name_raw as name, stock_quantity, total_quantity_available,
price_rec, price_rec_special, price_med, price_med_special,
first_seen_at, last_seen_at
FROM store_products
WHERE id = $1
`, [id]);
if (productResult.rows.length === 0) {
return res.status(404).json({ error: 'Product not found' });
}
const product = productResult.rows[0];
const currentQty = product.stock_quantity || product.total_quantity_available || 0;
const currentPrice = parseFloat(product.price_rec) || 0;
// Get snapshot history for the last 30 days
const historyResult = await pool.query(`
SELECT
DATE(crawled_at) as date,
AVG(COALESCE(stock_quantity, total_quantity_available, 0)) as avg_quantity,
MIN(COALESCE(stock_quantity, total_quantity_available, 0)) as min_quantity,
MAX(COALESCE(stock_quantity, total_quantity_available, 0)) as max_quantity,
AVG(price_rec) as avg_price,
MIN(price_rec) as min_price,
MAX(price_rec) as max_price,
COUNT(*) as snapshot_count
FROM store_product_snapshots
WHERE store_product_id = $1
AND crawled_at >= NOW() - INTERVAL '30 days'
GROUP BY DATE(crawled_at)
ORDER BY date DESC
`, [id]);
// Calculate inventory movement stats
const history = historyResult.rows;
const today = history[0] || null;
const weekAgo = history.find((h: any) => {
const date = new Date(h.date);
const diff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24);
return diff >= 6 && diff <= 8;
});
const monthAgo = history.find((h: any) => {
const date = new Date(h.date);
const diff = (Date.now() - date.getTime()) / (1000 * 60 * 60 * 24);
return diff >= 27 && diff <= 31;
});
// Inventory movement calculations
const inventoryStats = {
current: currentQty,
daily: today ? {
change: currentQty - (parseFloat(today.avg_quantity) || 0),
start: parseFloat(today.avg_quantity) || 0,
end: currentQty
} : null,
weekly: weekAgo ? {
change: currentQty - (parseFloat(weekAgo.avg_quantity) || 0),
start: parseFloat(weekAgo.avg_quantity) || 0,
end: currentQty,
percent_change: weekAgo.avg_quantity > 0
? ((currentQty - parseFloat(weekAgo.avg_quantity)) / parseFloat(weekAgo.avg_quantity) * 100).toFixed(1)
: null
} : null,
monthly: monthAgo ? {
change: currentQty - (parseFloat(monthAgo.avg_quantity) || 0),
start: parseFloat(monthAgo.avg_quantity) || 0,
end: currentQty,
percent_change: monthAgo.avg_quantity > 0
? ((currentQty - parseFloat(monthAgo.avg_quantity)) / parseFloat(monthAgo.avg_quantity) * 100).toFixed(1)
: null
} : null
};
// Price movement calculations
const priceStats = {
current: currentPrice,
weekly: weekAgo ? {
change: currentPrice - (parseFloat(weekAgo.avg_price) || 0),
start: parseFloat(weekAgo.avg_price) || 0,
end: currentPrice,
percent_change: weekAgo.avg_price > 0
? ((currentPrice - parseFloat(weekAgo.avg_price)) / parseFloat(weekAgo.avg_price) * 100).toFixed(1)
: null
} : null,
monthly: monthAgo ? {
change: currentPrice - (parseFloat(monthAgo.avg_price) || 0),
start: parseFloat(monthAgo.avg_price) || 0,
end: currentPrice,
percent_change: monthAgo.avg_price > 0
? ((currentPrice - parseFloat(monthAgo.avg_price)) / parseFloat(monthAgo.avg_price) * 100).toFixed(1)
: null
} : null
};
// Get total snapshots count
const snapshotCountResult = await pool.query(`
SELECT COUNT(*) as total_snapshots
FROM store_product_snapshots
WHERE store_product_id = $1
`, [id]);
res.json({
product_id: parseInt(id),
product_name: product.name,
first_seen: product.first_seen_at,
last_seen: product.last_seen_at,
total_snapshots: parseInt(snapshotCountResult.rows[0].total_snapshots),
inventory: inventoryStats,
price: priceStats,
history: history.slice(0, 30).map((h: any) => ({
date: h.date,
avg_quantity: parseFloat(h.avg_quantity) || 0,
avg_price: parseFloat(h.avg_price) || 0,
snapshots: parseInt(h.snapshot_count)
}))
});
} catch (error) {
console.error('Error fetching product stats:', error);
res.status(500).json({ error: 'Failed to fetch product stats' });
}
});
export default router;

View File

@@ -2,7 +2,7 @@ import { Router } from 'express';
import { authMiddleware, requireRole } from '../auth/middleware';
import { pool } from '../db/pool';
import { testProxy, addProxy, addProxiesFromList } from '../services/proxy';
import { createProxyTestJob, getProxyTestJob, getActiveProxyTestJob, cancelProxyTestJob } from '../services/proxyTestQueue';
import { createProxyTestJob, getProxyTestJob, getActiveProxyTestJob, cancelProxyTestJob, ProxyTestMode } from '../services/proxyTestQueue';
const router = Router();
router.use(authMiddleware);
@@ -11,9 +11,10 @@ router.use(authMiddleware);
router.get('/', async (req, res) => {
try {
const result = await pool.query(`
SELECT id, host, port, protocol, active, is_anonymous,
SELECT id, host, port, protocol, username, password, active, is_anonymous,
last_tested_at, test_result, response_time_ms, created_at,
city, state, country, country_code, location_updated_at
city, state, country, country_code, location_updated_at,
COALESCE(max_connections, 1) as max_connections
FROM proxies
ORDER BY created_at DESC
`);
@@ -166,13 +167,39 @@ router.post('/:id/test', requireRole('superadmin', 'admin'), async (req, res) =>
});
// Start proxy test job
// Query params: mode=all|failed|inactive, concurrency=10
router.post('/test-all', requireRole('superadmin', 'admin'), async (req, res) => {
try {
const jobId = await createProxyTestJob();
res.json({ jobId, message: 'Proxy test job started' });
} catch (error) {
const mode = (req.query.mode as ProxyTestMode) || 'all';
const concurrency = parseInt(req.query.concurrency as string) || 10;
// Validate mode
if (!['all', 'failed', 'inactive'].includes(mode)) {
return res.status(400).json({ error: 'Invalid mode. Use: all, failed, or inactive' });
}
// Validate concurrency (1-50)
if (concurrency < 1 || concurrency > 50) {
return res.status(400).json({ error: 'Concurrency must be between 1 and 50' });
}
const { jobId, totalProxies } = await createProxyTestJob(mode, concurrency);
res.json({ jobId, total: totalProxies, mode, concurrency, message: `Proxy test job started (mode: ${mode}, concurrency: ${concurrency})` });
} catch (error: any) {
console.error('Error starting proxy test job:', error);
res.status(500).json({ error: 'Failed to start proxy test job' });
res.status(500).json({ error: error.message || 'Failed to start proxy test job' });
}
});
// Convenience endpoint: Test only failed proxies
router.post('/test-failed', requireRole('superadmin', 'admin'), async (req, res) => {
try {
const concurrency = parseInt(req.query.concurrency as string) || 10;
const { jobId, totalProxies } = await createProxyTestJob('failed', concurrency);
res.json({ jobId, total: totalProxies, mode: 'failed', concurrency, message: 'Retesting failed proxies...' });
} catch (error: any) {
console.error('Error starting failed proxy test:', error);
res.status(500).json({ error: error.message || 'Failed to start proxy test job' });
}
});
@@ -197,7 +224,7 @@ router.post('/test-job/:jobId/cancel', requireRole('superadmin', 'admin'), async
router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
try {
const { id } = req.params;
const { host, port, protocol, username, password, active } = req.body;
const { host, port, protocol, username, password, active, max_connections } = req.body;
const result = await pool.query(`
UPDATE proxies
@@ -207,10 +234,11 @@ router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
username = COALESCE($4, username),
password = COALESCE($5, password),
active = COALESCE($6, active),
max_connections = COALESCE($7, max_connections),
updated_at = CURRENT_TIMESTAMP
WHERE id = $7
WHERE id = $8
RETURNING *
`, [host, port, protocol, username, password, active, id]);
`, [host, port, protocol, username, password, active, max_connections, id]);
if (result.rows.length === 0) {
return res.status(404).json({ error: 'Proxy not found' });

File diff suppressed because it is too large Load Diff

View File

@@ -92,9 +92,9 @@ router.get('/history', async (req, res) => {
dcj.error_message,
(
SELECT COUNT(*)
FROM products p
WHERE p.dispensary_id = d.id
AND p.last_seen_at >= NOW() - INTERVAL '7 days'
FROM store_products sp
WHERE sp.dispensary_id = d.id
AND sp.last_seen_at >= NOW() - INTERVAL '7 days'
) as product_count
FROM dispensary_crawl_jobs dcj
JOIN dispensaries d ON d.id = dcj.dispensary_id

View File

@@ -10,6 +10,25 @@ import { getPool } from '../db/pool';
import { authMiddleware } from '../auth/middleware';
import { ContentValidator } from '../utils/ContentValidator';
import { generateSeoPageWithClaude } from '../services/seoGenerator';
import {
getAllSettings,
setSetting,
setMultipleSettings,
resetToDefaults,
ensureSettingsExist,
DEFAULT_SETTINGS,
} from '../seo/settings';
import {
applyTemplateVariables,
getTemplateForPageType,
generatePreview,
generatePageContent,
regenerateContent,
getAllTemplates,
validateTemplate,
MOCK_DATA,
PageType,
} from '../seo/template-engine';
const router = Router();
@@ -160,10 +179,12 @@ router.get('/pages', authMiddleware, async (req: Request, res: Response) => {
const metricsResult = await pool.query(`
SELECT COUNT(DISTINCT d.id) as dispensary_count,
COUNT(DISTINCT p.id) as product_count,
COUNT(DISTINCT p.brand_name) as brand_count
COUNT(DISTINCT p.brand_name_raw) as brand_count
FROM dispensaries d
LEFT JOIN dutchie_products p ON p.dispensary_id = d.id
LEFT JOIN store_products p ON p.dispensary_id = d.id
WHERE d.state = $1
AND d.menu_type = 'dutchie'
AND d.platform_dispensary_id IS NOT NULL
`, [stateCode]);
const m = metricsResult.rows[0];
metrics = {
@@ -199,11 +220,13 @@ router.post('/sync-state-pages', authMiddleware, async (req: Request, res: Respo
try {
const pool = getPool();
// Get all states that have dispensaries
// Get all states that have active/crawlable dispensaries
const statesResult = await pool.query(`
SELECT DISTINCT state, COUNT(*) as dispensary_count
FROM dispensaries
WHERE state IS NOT NULL AND state != ''
AND menu_type = 'dutchie'
AND platform_dispensary_id IS NOT NULL
GROUP BY state
HAVING COUNT(*) > 0
ORDER BY state
@@ -245,6 +268,45 @@ router.post('/sync-state-pages', authMiddleware, async (req: Request, res: Respo
}
});
/**
* GET /api/seo/state-metrics - Get all state metrics for SEO dashboard
*/
router.get('/state-metrics', authMiddleware, async (req: Request, res: Response) => {
try {
const pool = getPool();
const result = await pool.query(`
SELECT
d.state as state_code,
COALESCE(s.name, d.state) as state_name,
COUNT(DISTINCT d.id) as dispensary_count,
COUNT(DISTINCT sp.id) as product_count,
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL) as brand_count
FROM dispensaries d
LEFT JOIN states s ON d.state = s.code
LEFT JOIN store_products sp ON sp.dispensary_id = d.id
WHERE d.state IS NOT NULL AND d.state != ''
AND d.menu_type = 'dutchie'
AND d.platform_dispensary_id IS NOT NULL
GROUP BY d.state, s.name
ORDER BY dispensary_count DESC
`);
const states = result.rows.map(row => ({
stateCode: row.state_code,
stateName: row.state_name || row.state_code,
dispensaryCount: parseInt(row.dispensary_count, 10) || 0,
productCount: parseInt(row.product_count, 10) || 0,
brandCount: parseInt(row.brand_count, 10) || 0,
}));
res.json({ states });
} catch (error: any) {
console.error('[SEO] Error fetching state metrics:', error.message);
res.status(500).json({ error: 'Failed to fetch state metrics' });
}
});
/**
* GET /api/seo/state/:stateCode - State SEO data with metrics
*/
@@ -257,16 +319,20 @@ router.get('/state/:stateCode', async (req: Request, res: Response) => {
const metricsResult = await pool.query(`
SELECT COUNT(DISTINCT d.id) as dispensary_count,
COUNT(DISTINCT p.id) as product_count,
COUNT(DISTINCT p.brand_name) as brand_count
COUNT(DISTINCT p.brand_name_raw) as brand_count
FROM dispensaries d
LEFT JOIN dutchie_products p ON p.dispensary_id = d.id
WHERE d.state = $1`, [code]);
LEFT JOIN store_products p ON p.dispensary_id = d.id
WHERE d.state = $1
AND d.menu_type = 'dutchie'
AND d.platform_dispensary_id IS NOT NULL`, [code]);
const brandsResult = await pool.query(`
SELECT brand_name, COUNT(*) as product_count
FROM dutchie_products p JOIN dispensaries d ON p.dispensary_id = d.id
WHERE d.state = $1 AND p.brand_name IS NOT NULL
GROUP BY brand_name ORDER BY product_count DESC LIMIT 10`, [code]);
SELECT brand_name_raw as brand_name, COUNT(*) as product_count
FROM store_products p JOIN dispensaries d ON p.dispensary_id = d.id
WHERE d.state = $1 AND p.brand_name_raw IS NOT NULL
AND d.menu_type = 'dutchie'
AND d.platform_dispensary_id IS NOT NULL
GROUP BY brand_name_raw ORDER BY product_count DESC LIMIT 10`, [code]);
const metrics = metricsResult.rows[0];
const response = ContentValidator.sanitizeContent({
@@ -359,4 +425,259 @@ router.get('/public/content', async (req: Request, res: Response) => {
}
});
// ============================================================================
// SEO Settings Endpoints
// ============================================================================
/**
* GET /api/seo/settings - Get all SEO settings
*/
router.get('/settings', authMiddleware, async (req: Request, res: Response) => {
try {
// Ensure settings exist on first access
await ensureSettingsExist();
const settings = await getAllSettings();
res.json({ settings });
} catch (error: any) {
console.error('[SEO] Error fetching settings:', error.message);
res.status(500).json({ error: 'Failed to fetch SEO settings' });
}
});
/**
* POST /api/seo/settings - Save a single setting
*/
router.post('/settings', authMiddleware, async (req: Request, res: Response) => {
try {
const { key, value } = req.body;
if (!key || typeof key !== 'string') {
return res.status(400).json({ error: 'key is required' });
}
if (value === undefined) {
return res.status(400).json({ error: 'value is required' });
}
await setSetting(key, value);
res.json({ success: true, key, value });
} catch (error: any) {
console.error('[SEO] Error saving setting:', error.message);
res.status(500).json({ error: 'Failed to save SEO setting' });
}
});
/**
* POST /api/seo/settings/bulk - Save multiple settings at once
*/
router.post('/settings/bulk', authMiddleware, async (req: Request, res: Response) => {
try {
const { settings } = req.body;
if (!settings || typeof settings !== 'object') {
return res.status(400).json({ error: 'settings object is required' });
}
await setMultipleSettings(settings);
res.json({ success: true, count: Object.keys(settings).length });
} catch (error: any) {
console.error('[SEO] Error saving bulk settings:', error.message);
res.status(500).json({ error: 'Failed to save SEO settings' });
}
});
/**
* POST /api/seo/settings/reset - Reset all settings to defaults
*/
router.post('/settings/reset', authMiddleware, async (req: Request, res: Response) => {
try {
const settings = await resetToDefaults();
res.json({
success: true,
message: 'Settings reset to defaults',
settings,
});
} catch (error: any) {
console.error('[SEO] Error resetting settings:', error.message);
res.status(500).json({ error: 'Failed to reset SEO settings' });
}
});
/**
* GET /api/seo/settings/defaults - Get default settings (without modifying DB)
*/
router.get('/settings/defaults', authMiddleware, async (req: Request, res: Response) => {
res.json({ settings: DEFAULT_SETTINGS });
});
/**
* GET /api/seo/settings/preview - Preview merged prompt with sample variables
*/
router.post('/settings/preview', authMiddleware, async (req: Request, res: Response) => {
try {
const { template, variables } = req.body;
if (!template || typeof template !== 'string') {
return res.status(400).json({ error: 'template is required' });
}
// Sample variables for preview
const sampleVariables: Record<string, string> = {
page_type: 'state',
subject: 'Arizona Dispensaries',
focus_areas: 'local stores, product variety, pricing',
tone: 'informational',
length: 'medium',
state_name: 'Arizona',
state_code: 'AZ',
state_code_lower: 'az',
dispensary_count: '150',
improvement_areas: 'SEO keywords, local relevance',
...variables,
};
let preview = template;
for (const [key, value] of Object.entries(sampleVariables)) {
preview = preview.replace(new RegExp(`{{${key}}}`, 'g'), value);
}
res.json({ preview, variables: sampleVariables });
} catch (error: any) {
console.error('[SEO] Error generating preview:', error.message);
res.status(500).json({ error: 'Failed to generate preview' });
}
});
// ============================================================================
// Template Library Endpoints
// ============================================================================
/**
* GET /api/seo/templates - Get all templates with metadata
*/
router.get('/templates', authMiddleware, async (req: Request, res: Response) => {
try {
const templates = await getAllTemplates();
res.json({ templates });
} catch (error: any) {
console.error('[SEO] Error fetching templates:', error.message);
res.status(500).json({ error: 'Failed to fetch templates' });
}
});
/**
* POST /api/seo/templates/preview - Preview a template with mock data by page type
*/
router.post('/templates/preview', authMiddleware, async (req: Request, res: Response) => {
try {
const { pageType, customTemplate } = req.body;
if (!pageType || typeof pageType !== 'string') {
return res.status(400).json({ error: 'pageType is required' });
}
const result = await generatePreview(pageType, customTemplate);
res.json(result);
} catch (error: any) {
console.error('[SEO] Error generating template preview:', error.message);
res.status(500).json({ error: 'Failed to generate template preview' });
}
});
/**
* POST /api/seo/templates/validate - Validate a template string
*/
router.post('/templates/validate', authMiddleware, async (req: Request, res: Response) => {
try {
const { template } = req.body;
if (!template || typeof template !== 'string') {
return res.status(400).json({ error: 'template is required' });
}
const validation = validateTemplate(template);
res.json(validation);
} catch (error: any) {
console.error('[SEO] Error validating template:', error.message);
res.status(500).json({ error: 'Failed to validate template' });
}
});
/**
* POST /api/seo/templates/generate - Generate content using a template
*/
router.post('/templates/generate', authMiddleware, async (req: Request, res: Response) => {
try {
const { pageType, data } = req.body;
if (!pageType || typeof pageType !== 'string') {
return res.status(400).json({ error: 'pageType is required' });
}
if (!data || typeof data !== 'object') {
return res.status(400).json({ error: 'data object is required' });
}
const result = await generatePageContent(pageType, data);
res.json(result);
} catch (error: any) {
console.error('[SEO] Error generating from template:', error.message);
res.status(500).json({ error: 'Failed to generate content from template' });
}
});
/**
* POST /api/seo/templates/regenerate - Regenerate content with improvements
*/
router.post('/templates/regenerate', authMiddleware, async (req: Request, res: Response) => {
try {
const { pageType, originalContent, newData, improvementAreas } = req.body;
if (!pageType || typeof pageType !== 'string') {
return res.status(400).json({ error: 'pageType is required' });
}
if (!originalContent || typeof originalContent !== 'string') {
return res.status(400).json({ error: 'originalContent is required' });
}
const result = await regenerateContent(
pageType,
originalContent,
newData || {},
improvementAreas
);
res.json(result);
} catch (error: any) {
console.error('[SEO] Error regenerating content:', error.message);
res.status(500).json({ error: 'Failed to regenerate content' });
}
});
/**
* GET /api/seo/templates/variables/:pageType - Get available variables for a page type
*/
router.get('/templates/variables/:pageType', authMiddleware, async (req: Request, res: Response) => {
try {
const { pageType } = req.params;
const normalizedType = (pageType?.toLowerCase().trim() || 'state') as PageType;
const mockData = MOCK_DATA[normalizedType] || MOCK_DATA.state;
res.json({
pageType: normalizedType,
variables: Object.keys(mockData),
sampleValues: mockData,
});
} catch (error: any) {
console.error('[SEO] Error fetching template variables:', error.message);
res.status(500).json({ error: 'Failed to fetch template variables' });
}
});
export default router;

View File

@@ -78,6 +78,60 @@ router.put('/:key', requireRole('superadmin', 'admin'), async (req, res) => {
}
});
// Test AI provider connection
router.post('/test-ai', requireRole('superadmin', 'admin'), async (req, res) => {
try {
const { provider, apiKey } = req.body;
if (!provider || !apiKey) {
return res.status(400).json({ success: false, error: 'Provider and API key required' });
}
if (provider === 'anthropic') {
// Test Anthropic API
const response = await fetch('https://api.anthropic.com/v1/messages', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': apiKey,
'anthropic-version': '2023-06-01'
},
body: JSON.stringify({
model: 'claude-3-haiku-20240307',
max_tokens: 10,
messages: [{ role: 'user', content: 'Hi' }]
})
});
if (response.ok) {
res.json({ success: true, model: 'claude-3-haiku-20240307' });
} else {
const error = await response.json().catch(() => ({ error: { message: 'Unknown error' } }));
res.json({ success: false, error: error.error?.message || 'Invalid API key' });
}
} else if (provider === 'openai') {
// Test OpenAI API
const response = await fetch('https://api.openai.com/v1/models', {
headers: {
'Authorization': `Bearer ${apiKey}`
}
});
if (response.ok) {
res.json({ success: true, model: 'gpt-4' });
} else {
const error = await response.json().catch(() => ({ error: { message: 'Unknown error' } }));
res.json({ success: false, error: error.error?.message || 'Invalid API key' });
}
} else {
res.status(400).json({ success: false, error: 'Unknown provider' });
}
} catch (error: any) {
console.error('Error testing AI connection:', error);
res.json({ success: false, error: error.message || 'Connection failed' });
}
});
// Update multiple settings at once
router.put('/', requireRole('superadmin', 'admin'), async (req, res) => {
try {

View File

@@ -70,7 +70,7 @@ function detectProvider(menuUrl: string | null): string {
// Get all stores (from dispensaries table)
router.get('/', async (req, res) => {
try {
const { city, state, menu_type } = req.query;
const { city, state, menu_type, crawl_enabled, dutchie_verified } = req.query;
let query = `
SELECT
@@ -79,18 +79,36 @@ router.get('/', async (req, res) => {
slug,
city,
state,
address,
zip,
address1,
address2,
zipcode,
phone,
website,
email,
latitude,
longitude,
timezone,
menu_url,
menu_type,
platform,
platform_dispensary_id,
c_name,
chain_slug,
enterprise_id,
description,
logo_image,
banner_image,
offer_pickup,
offer_delivery,
offer_curbside_pickup,
is_medical,
is_recreational,
status,
country,
product_count,
last_crawl_at,
crawl_enabled,
dutchie_verified,
created_at,
updated_at
FROM dispensaries
@@ -99,21 +117,45 @@ router.get('/', async (req, res) => {
const params: any[] = [];
const conditions: string[] = [];
// Filter by city (partial match)
if (city) {
conditions.push(`city ILIKE $${params.length + 1}`);
params.push(city);
params.push(`%${city}%`);
}
// Filter by state
if (state) {
conditions.push(`state = $${params.length + 1}`);
params.push(state);
}
// Filter by menu_type
if (menu_type) {
conditions.push(`menu_type = $${params.length + 1}`);
params.push(menu_type);
}
// Filter by crawl_enabled - defaults to showing only enabled
if (crawl_enabled === 'false' || crawl_enabled === '0') {
// Explicitly show disabled only
conditions.push(`(crawl_enabled = false OR crawl_enabled IS NULL)`);
} else if (crawl_enabled === 'all') {
// Show all (no filter)
} else {
// Default: show only enabled
conditions.push(`crawl_enabled = true`);
}
// Filter by dutchie_verified
if (dutchie_verified !== undefined) {
const verified = dutchie_verified === 'true' || dutchie_verified === '1';
if (verified) {
conditions.push(`dutchie_verified = true`);
} else {
conditions.push(`(dutchie_verified = false OR dutchie_verified IS NULL)`);
}
}
if (conditions.length > 0) {
query += ` WHERE ${conditions.join(' AND ')}`;
}
@@ -129,7 +171,7 @@ router.get('/', async (req, res) => {
...calculateFreshness(row.last_crawl_at)
}));
res.json({ stores });
res.json({ stores, total: result.rowCount });
} catch (error) {
console.error('Error fetching stores:', error);
res.status(500).json({ error: 'Failed to fetch stores' });
@@ -148,18 +190,33 @@ router.get('/:id', async (req, res) => {
slug,
city,
state,
address,
zip,
address1,
address2,
zipcode,
phone,
website,
email,
dba_name,
company_name,
latitude,
longitude,
timezone,
menu_url,
menu_type,
platform,
platform_dispensary_id,
c_name,
chain_slug,
enterprise_id,
description,
logo_image,
banner_image,
offer_pickup,
offer_delivery,
offer_curbside_pickup,
is_medical,
is_recreational,
status,
country,
product_count,
last_crawl_at,
raw_metadata,
@@ -203,16 +260,32 @@ router.post('/', requireRole('superadmin', 'admin'), async (req, res) => {
slug,
city,
state,
address,
zip,
address1,
address2,
zipcode,
phone,
website,
email,
menu_url,
menu_type,
platform,
platform_dispensary_id,
c_name,
chain_slug,
enterprise_id,
latitude,
longitude
longitude,
timezone,
description,
logo_image,
banner_image,
offer_pickup,
offer_delivery,
offer_curbside_pickup,
is_medical,
is_recreational,
status,
country
} = req.body;
if (!name || !slug || !city || !state) {
@@ -221,16 +294,19 @@ router.post('/', requireRole('superadmin', 'admin'), async (req, res) => {
const result = await pool.query(`
INSERT INTO dispensaries (
name, slug, city, state, address, zip, phone, website,
menu_url, menu_type, platform, platform_dispensary_id,
latitude, longitude, created_at, updated_at
name, slug, city, state, address1, address2, zipcode, phone, website, email,
menu_url, menu_type, platform, platform_dispensary_id, c_name, chain_slug, enterprise_id,
latitude, longitude, timezone, description, logo_image, banner_image,
offer_pickup, offer_delivery, offer_curbside_pickup, is_medical, is_recreational, status, country,
created_at, updated_at
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
RETURNING *
`, [
name, slug, city, state, address, zip, phone, website,
menu_url, menu_type, platform || 'dutchie', platform_dispensary_id,
latitude, longitude
name, slug, city, state, address1, address2, zipcode, phone, website, email,
menu_url, menu_type, platform || 'dutchie', platform_dispensary_id, c_name, chain_slug, enterprise_id,
latitude, longitude, timezone, description, logo_image, banner_image,
offer_pickup, offer_delivery, offer_curbside_pickup, is_medical, is_recreational, status, country || 'United States'
]);
res.status(201).json(result.rows[0]);
@@ -253,16 +329,32 @@ router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
slug,
city,
state,
address,
zip,
address1,
address2,
zipcode,
phone,
website,
email,
menu_url,
menu_type,
platform,
platform_dispensary_id,
c_name,
chain_slug,
enterprise_id,
latitude,
longitude
longitude,
timezone,
description,
logo_image,
banner_image,
offer_pickup,
offer_delivery,
offer_curbside_pickup,
is_medical,
is_recreational,
status,
country
} = req.body;
const result = await pool.query(`
@@ -272,23 +364,40 @@ router.put('/:id', requireRole('superadmin', 'admin'), async (req, res) => {
slug = COALESCE($2, slug),
city = COALESCE($3, city),
state = COALESCE($4, state),
address = COALESCE($5, address),
zip = COALESCE($6, zip),
phone = COALESCE($7, phone),
website = COALESCE($8, website),
menu_url = COALESCE($9, menu_url),
menu_type = COALESCE($10, menu_type),
platform = COALESCE($11, platform),
platform_dispensary_id = COALESCE($12, platform_dispensary_id),
latitude = COALESCE($13, latitude),
longitude = COALESCE($14, longitude),
address1 = COALESCE($5, address1),
address2 = COALESCE($6, address2),
zipcode = COALESCE($7, zipcode),
phone = COALESCE($8, phone),
website = COALESCE($9, website),
email = COALESCE($10, email),
menu_url = COALESCE($11, menu_url),
menu_type = COALESCE($12, menu_type),
platform = COALESCE($13, platform),
platform_dispensary_id = COALESCE($14, platform_dispensary_id),
c_name = COALESCE($15, c_name),
chain_slug = COALESCE($16, chain_slug),
enterprise_id = COALESCE($17, enterprise_id),
latitude = COALESCE($18, latitude),
longitude = COALESCE($19, longitude),
timezone = COALESCE($20, timezone),
description = COALESCE($21, description),
logo_image = COALESCE($22, logo_image),
banner_image = COALESCE($23, banner_image),
offer_pickup = COALESCE($24, offer_pickup),
offer_delivery = COALESCE($25, offer_delivery),
offer_curbside_pickup = COALESCE($26, offer_curbside_pickup),
is_medical = COALESCE($27, is_medical),
is_recreational = COALESCE($28, is_recreational),
status = COALESCE($29, status),
country = COALESCE($30, country),
updated_at = CURRENT_TIMESTAMP
WHERE id = $15
WHERE id = $31
RETURNING *
`, [
name, slug, city, state, address, zip, phone, website,
menu_url, menu_type, platform, platform_dispensary_id,
latitude, longitude, id
name, slug, city, state, address1, address2, zipcode, phone, website, email,
menu_url, menu_type, platform, platform_dispensary_id, c_name, chain_slug, enterprise_id,
latitude, longitude, timezone, description, logo_image, banner_image,
offer_pickup, offer_delivery, offer_curbside_pickup, is_medical, is_recreational, status, country, id
]);
if (result.rows.length === 0) {
@@ -320,28 +429,49 @@ router.delete('/:id', requireRole('superadmin'), async (req, res) => {
}
});
// Get products for a store (uses dutchie_products table)
// Get products for a store (uses store_products via v_products view with snapshot pricing)
router.get('/:id/products', async (req, res) => {
try {
const { id } = req.params;
const result = await pool.query(`
SELECT
id,
name,
brand_name,
type,
subcategory,
stock_status,
thc_content,
cbd_content,
primary_image_url,
external_product_id,
created_at,
updated_at
FROM dutchie_products
WHERE dispensary_id = $1
ORDER BY name
p.id,
p.name,
p.brand_name,
p.type,
p.subcategory,
p.strain_type,
p.stock_status,
p.thc as thc_content,
p.cbd as cbd_content,
sp.description,
sp.total_quantity_available as quantity,
p.primary_image_url,
p.external_product_id,
p.created_at,
p.updated_at,
COALESCE(snap.rec_min_price_cents, 0)::numeric / 100.0 as regular_price,
CASE WHEN snap.rec_min_special_price_cents > 0
THEN snap.rec_min_special_price_cents::numeric / 100.0
ELSE NULL END as sale_price,
COALESCE(snap.med_min_price_cents, 0)::numeric / 100.0 as med_price,
CASE WHEN snap.med_min_special_price_cents > 0
THEN snap.med_min_special_price_cents::numeric / 100.0
ELSE NULL END as med_sale_price,
snap.special as on_special
FROM v_products p
JOIN store_products sp ON sp.id = p.id
LEFT JOIN LATERAL (
SELECT rec_min_price_cents, rec_min_special_price_cents,
med_min_price_cents, med_min_special_price_cents, special
FROM v_product_snapshots vps
WHERE vps.store_product_id = p.id
ORDER BY vps.crawled_at DESC
LIMIT 1
) snap ON true
WHERE p.dispensary_id = $1
ORDER BY p.name
`, [id]);
res.json({ products: result.rows });
@@ -351,6 +481,55 @@ router.get('/:id/products', async (req, res) => {
}
});
// Get specials for a store (products with sale prices or on_special flag)
router.get('/:id/specials', async (req, res) => {
try {
const { id } = req.params;
const result = await pool.query(`
SELECT
p.id,
p.name,
p.brand_name,
p.type,
p.subcategory,
p.strain_type,
p.stock_status,
p.thc as thc_content,
p.cbd as cbd_content,
sp.description,
sp.total_quantity_available as quantity,
p.primary_image_url,
p.external_product_id,
p.created_at,
p.updated_at,
COALESCE(snap.rec_min_price_cents, 0)::numeric / 100.0 as regular_price,
snap.rec_min_special_price_cents::numeric / 100.0 as sale_price,
COALESCE(snap.med_min_price_cents, 0)::numeric / 100.0 as med_price,
snap.med_min_special_price_cents::numeric / 100.0 as med_sale_price,
true as on_special
FROM v_products p
JOIN store_products sp ON sp.id = p.id
INNER JOIN LATERAL (
SELECT rec_min_price_cents, rec_min_special_price_cents,
med_min_price_cents, med_min_special_price_cents, special
FROM v_product_snapshots vps
WHERE vps.store_product_id = p.id
AND (vps.special = true OR vps.rec_min_special_price_cents > 0 OR vps.med_min_special_price_cents > 0)
ORDER BY vps.crawled_at DESC
LIMIT 1
) snap ON true
WHERE p.dispensary_id = $1
ORDER BY p.name
`, [id]);
res.json({ specials: result.rows });
} catch (error) {
console.error('Error fetching store specials:', error);
res.status(500).json({ error: 'Failed to fetch specials' });
}
});
// Get brands for a store
router.get('/:id/brands', async (req, res) => {
try {
@@ -358,7 +537,7 @@ router.get('/:id/brands', async (req, res) => {
const result = await pool.query(`
SELECT DISTINCT brand_name as name, COUNT(*) as product_count
FROM dutchie_products
FROM v_products
WHERE dispensary_id = $1 AND brand_name IS NOT NULL
GROUP BY brand_name
ORDER BY product_count DESC, brand_name

595
backend/src/routes/tasks.ts Normal file
View File

@@ -0,0 +1,595 @@
/**
* Task Queue API Routes
*
* Endpoints for managing worker tasks, viewing capacity metrics,
* and generating batch tasks.
*/
import { Router, Request, Response } from 'express';
import {
taskService,
TaskRole,
TaskStatus,
TaskFilter,
} from '../tasks/task-service';
import { pool } from '../db/pool';
const router = Router();
/**
* GET /api/tasks
* List tasks with optional filters
*
* Query params:
* - role: Filter by role
* - status: Filter by status (comma-separated for multiple)
* - dispensary_id: Filter by dispensary
* - worker_id: Filter by worker
* - limit: Max results (default 100)
* - offset: Pagination offset
*/
router.get('/', async (req: Request, res: Response) => {
try {
const filter: TaskFilter = {};
if (req.query.role) {
filter.role = req.query.role as TaskRole;
}
if (req.query.status) {
const statuses = (req.query.status as string).split(',') as TaskStatus[];
filter.status = statuses.length === 1 ? statuses[0] : statuses;
}
if (req.query.dispensary_id) {
filter.dispensary_id = parseInt(req.query.dispensary_id as string, 10);
}
if (req.query.worker_id) {
filter.worker_id = req.query.worker_id as string;
}
if (req.query.limit) {
filter.limit = parseInt(req.query.limit as string, 10);
}
if (req.query.offset) {
filter.offset = parseInt(req.query.offset as string, 10);
}
const tasks = await taskService.listTasks(filter);
res.json({ tasks, count: tasks.length });
} catch (error: unknown) {
console.error('Error listing tasks:', error);
res.status(500).json({ error: 'Failed to list tasks' });
}
});
/**
* GET /api/tasks/counts
* Get task counts by status
*/
router.get('/counts', async (_req: Request, res: Response) => {
try {
const counts = await taskService.getTaskCounts();
res.json(counts);
} catch (error: unknown) {
console.error('Error getting task counts:', error);
res.status(500).json({ error: 'Failed to get task counts' });
}
});
/**
* GET /api/tasks/capacity
* Get capacity metrics for all roles
*/
router.get('/capacity', async (_req: Request, res: Response) => {
try {
const metrics = await taskService.getCapacityMetrics();
res.json({ metrics });
} catch (error: unknown) {
console.error('Error getting capacity metrics:', error);
res.status(500).json({ error: 'Failed to get capacity metrics' });
}
});
/**
* GET /api/tasks/capacity/:role
* Get capacity metrics for a specific role
*/
router.get('/capacity/:role', async (req: Request, res: Response) => {
try {
const role = req.params.role as TaskRole;
const capacity = await taskService.getRoleCapacity(role);
if (!capacity) {
return res.status(404).json({ error: 'Role not found or no data' });
}
// Calculate workers needed for different SLAs
const workersFor1Hour = await taskService.calculateWorkersNeeded(role, 1);
const workersFor4Hours = await taskService.calculateWorkersNeeded(role, 4);
const workersFor8Hours = await taskService.calculateWorkersNeeded(role, 8);
res.json({
...capacity,
workers_needed: {
for_1_hour: workersFor1Hour,
for_4_hours: workersFor4Hours,
for_8_hours: workersFor8Hours,
},
});
} catch (error: unknown) {
console.error('Error getting role capacity:', error);
res.status(500).json({ error: 'Failed to get role capacity' });
}
});
/**
* GET /api/tasks/:id
* Get a specific task by ID
*/
router.get('/:id', async (req: Request, res: Response) => {
try {
const taskId = parseInt(req.params.id, 10);
const task = await taskService.getTask(taskId);
if (!task) {
return res.status(404).json({ error: 'Task not found' });
}
res.json(task);
} catch (error: unknown) {
console.error('Error getting task:', error);
res.status(500).json({ error: 'Failed to get task' });
}
});
/**
* DELETE /api/tasks/:id
* Delete a specific task by ID
* Only allows deletion of failed, completed, or pending tasks (not running)
*/
router.delete('/:id', async (req: Request, res: Response) => {
try {
const taskId = parseInt(req.params.id, 10);
// First check if task exists and its status
const task = await taskService.getTask(taskId);
if (!task) {
return res.status(404).json({ error: 'Task not found' });
}
// Don't allow deleting running tasks
if (task.status === 'running' || task.status === 'claimed') {
return res.status(400).json({ error: 'Cannot delete a running or claimed task' });
}
// Delete the task
await pool.query('DELETE FROM worker_tasks WHERE id = $1', [taskId]);
res.json({ success: true, message: `Task ${taskId} deleted` });
} catch (error: unknown) {
console.error('Error deleting task:', error);
res.status(500).json({ error: 'Failed to delete task' });
}
});
/**
* POST /api/tasks
* Create a new task
*
* Body:
* - role: TaskRole (required)
* - dispensary_id: number (optional)
* - platform: string (optional)
* - priority: number (optional, default 0)
* - scheduled_for: ISO date string (optional)
*/
router.post('/', async (req: Request, res: Response) => {
try {
const { role, dispensary_id, platform, priority, scheduled_for } = req.body;
if (!role) {
return res.status(400).json({ error: 'Role is required' });
}
// Check if store already has an active task
if (dispensary_id) {
const hasActive = await taskService.hasActiveTask(dispensary_id);
if (hasActive) {
return res.status(409).json({
error: 'Store already has an active task',
dispensary_id,
});
}
}
const task = await taskService.createTask({
role,
dispensary_id,
platform,
priority,
scheduled_for: scheduled_for ? new Date(scheduled_for) : undefined,
});
res.status(201).json(task);
} catch (error: unknown) {
console.error('Error creating task:', error);
res.status(500).json({ error: 'Failed to create task' });
}
});
/**
* POST /api/tasks/generate/resync
* Generate daily resync tasks for all active stores
*
* Body:
* - batches_per_day: number (optional, default 6 = every 4 hours)
* - date: ISO date string (optional, default today)
*/
router.post('/generate/resync', async (req: Request, res: Response) => {
try {
const { batches_per_day, date } = req.body;
const batchesPerDay = batches_per_day ?? 6;
const targetDate = date ? new Date(date) : new Date();
const createdCount = await taskService.generateDailyResyncTasks(
batchesPerDay,
targetDate
);
res.json({
success: true,
tasks_created: createdCount,
batches_per_day: batchesPerDay,
date: targetDate.toISOString().split('T')[0],
});
} catch (error: unknown) {
console.error('Error generating resync tasks:', error);
res.status(500).json({ error: 'Failed to generate resync tasks' });
}
});
/**
* POST /api/tasks/generate/discovery
* Generate store discovery tasks for a platform
*
* Body:
* - platform: string (required, e.g., 'dutchie')
* - state_code: string (optional, e.g., 'AZ')
* - priority: number (optional)
*/
router.post('/generate/discovery', async (req: Request, res: Response) => {
try {
const { platform, state_code, priority } = req.body;
if (!platform) {
return res.status(400).json({ error: 'Platform is required' });
}
const task = await taskService.createStoreDiscoveryTask(
platform,
state_code,
priority ?? 0
);
res.status(201).json(task);
} catch (error: unknown) {
console.error('Error creating discovery task:', error);
res.status(500).json({ error: 'Failed to create discovery task' });
}
});
/**
* POST /api/tasks/recover-stale
* Recover stale tasks from dead workers
*
* Body:
* - threshold_minutes: number (optional, default 10)
*/
router.post('/recover-stale', async (req: Request, res: Response) => {
try {
const { threshold_minutes } = req.body;
const recovered = await taskService.recoverStaleTasks(threshold_minutes ?? 10);
res.json({
success: true,
tasks_recovered: recovered,
});
} catch (error: unknown) {
console.error('Error recovering stale tasks:', error);
res.status(500).json({ error: 'Failed to recover stale tasks' });
}
});
/**
* GET /api/tasks/role/:role/last-completion
* Get the last completion time for a role
*/
router.get('/role/:role/last-completion', async (req: Request, res: Response) => {
try {
const role = req.params.role as TaskRole;
const lastCompletion = await taskService.getLastCompletion(role);
res.json({
role,
last_completion: lastCompletion?.toISOString() ?? null,
time_since: lastCompletion
? Math.floor((Date.now() - lastCompletion.getTime()) / 1000)
: null,
});
} catch (error: unknown) {
console.error('Error getting last completion:', error);
res.status(500).json({ error: 'Failed to get last completion' });
}
});
/**
* GET /api/tasks/role/:role/recent
* Get recent completions for a role
*/
router.get('/role/:role/recent', async (req: Request, res: Response) => {
try {
const role = req.params.role as TaskRole;
const limit = parseInt(req.query.limit as string, 10) || 10;
const tasks = await taskService.getRecentCompletions(role, limit);
res.json({ tasks });
} catch (error: unknown) {
console.error('Error getting recent completions:', error);
res.status(500).json({ error: 'Failed to get recent completions' });
}
});
/**
* GET /api/tasks/store/:dispensaryId/active
* Check if a store has an active task
*/
router.get('/store/:dispensaryId/active', async (req: Request, res: Response) => {
try {
const dispensaryId = parseInt(req.params.dispensaryId, 10);
const hasActive = await taskService.hasActiveTask(dispensaryId);
res.json({
dispensary_id: dispensaryId,
has_active_task: hasActive,
});
} catch (error: unknown) {
console.error('Error checking active task:', error);
res.status(500).json({ error: 'Failed to check active task' });
}
});
// ============================================================
// MIGRATION ROUTES - Disable old job systems
// ============================================================
/**
* GET /api/tasks/migration/status
* Get status of old job systems vs new task queue
*/
router.get('/migration/status', async (_req: Request, res: Response) => {
try {
// Get old job system counts
const [schedules, crawlJobs, rawPayloads, taskCounts] = await Promise.all([
pool.query(`
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE enabled = true) as enabled
FROM job_schedules
`),
pool.query(`
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE status = 'pending') as pending,
COUNT(*) FILTER (WHERE status = 'running') as running
FROM dispensary_crawl_jobs
`),
pool.query(`
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE processed = false) as unprocessed
FROM raw_payloads
`),
taskService.getTaskCounts(),
]);
res.json({
old_systems: {
job_schedules: {
total: parseInt(schedules.rows[0].total) || 0,
enabled: parseInt(schedules.rows[0].enabled) || 0,
},
dispensary_crawl_jobs: {
total: parseInt(crawlJobs.rows[0].total) || 0,
pending: parseInt(crawlJobs.rows[0].pending) || 0,
running: parseInt(crawlJobs.rows[0].running) || 0,
},
raw_payloads: {
total: parseInt(rawPayloads.rows[0].total) || 0,
unprocessed: parseInt(rawPayloads.rows[0].unprocessed) || 0,
},
},
new_task_queue: taskCounts,
recommendation: schedules.rows[0].enabled > 0
? 'Disable old job schedules before switching to new task queue'
: 'Ready to use new task queue',
});
} catch (error: unknown) {
console.error('Error getting migration status:', error);
res.status(500).json({ error: 'Failed to get migration status' });
}
});
/**
* POST /api/tasks/migration/disable-old-schedules
* Disable all old job schedules to prepare for new task queue
*/
router.post('/migration/disable-old-schedules', async (_req: Request, res: Response) => {
try {
const result = await pool.query(`
UPDATE job_schedules
SET enabled = false,
updated_at = NOW()
WHERE enabled = true
RETURNING id, job_name
`);
res.json({
success: true,
disabled_count: result.rowCount,
disabled_schedules: result.rows.map(r => ({ id: r.id, job_name: r.job_name })),
});
} catch (error: unknown) {
console.error('Error disabling old schedules:', error);
res.status(500).json({ error: 'Failed to disable old schedules' });
}
});
/**
* POST /api/tasks/migration/cancel-pending-crawl-jobs
* Cancel all pending crawl jobs from the old system
*/
router.post('/migration/cancel-pending-crawl-jobs', async (_req: Request, res: Response) => {
try {
const result = await pool.query(`
UPDATE dispensary_crawl_jobs
SET status = 'cancelled',
completed_at = NOW(),
updated_at = NOW()
WHERE status = 'pending'
RETURNING id
`);
res.json({
success: true,
cancelled_count: result.rowCount,
});
} catch (error: unknown) {
console.error('Error cancelling pending crawl jobs:', error);
res.status(500).json({ error: 'Failed to cancel pending crawl jobs' });
}
});
/**
* POST /api/tasks/migration/create-resync-tasks
* Create product_refresh tasks for all crawl-enabled dispensaries
*/
router.post('/migration/create-resync-tasks', async (req: Request, res: Response) => {
try {
const { priority = 0, state_code } = req.body;
let query = `
SELECT id, name FROM dispensaries
WHERE crawl_enabled = true
AND platform_dispensary_id IS NOT NULL
`;
const params: any[] = [];
if (state_code) {
query += `
AND state_id = (SELECT id FROM states WHERE code = $1)
`;
params.push(state_code.toUpperCase());
}
query += ` ORDER BY id`;
const dispensaries = await pool.query(query, params);
let created = 0;
for (const disp of dispensaries.rows) {
// Check if already has pending/running task
const hasActive = await taskService.hasActiveTask(disp.id);
if (!hasActive) {
await taskService.createTask({
role: 'product_refresh',
dispensary_id: disp.id,
platform: 'dutchie',
priority,
});
created++;
}
}
res.json({
success: true,
tasks_created: created,
dispensaries_checked: dispensaries.rows.length,
state_filter: state_code || 'all',
});
} catch (error: unknown) {
console.error('Error creating resync tasks:', error);
res.status(500).json({ error: 'Failed to create resync tasks' });
}
});
/**
* POST /api/tasks/migration/full-migrate
* One-click migration: disable old systems, create new tasks
*/
router.post('/migration/full-migrate', async (req: Request, res: Response) => {
try {
const results: any = {
success: true,
steps: [],
};
// Step 1: Disable old job schedules
const disableResult = await pool.query(`
UPDATE job_schedules
SET enabled = false, updated_at = NOW()
WHERE enabled = true
RETURNING id
`);
results.steps.push({
step: 'disable_job_schedules',
count: disableResult.rowCount,
});
// Step 2: Cancel pending crawl jobs
const cancelResult = await pool.query(`
UPDATE dispensary_crawl_jobs
SET status = 'cancelled', completed_at = NOW(), updated_at = NOW()
WHERE status = 'pending'
RETURNING id
`);
results.steps.push({
step: 'cancel_pending_crawl_jobs',
count: cancelResult.rowCount,
});
// Step 3: Generate initial resync tasks
const resyncCount = await taskService.generateDailyResyncTasks(6);
results.steps.push({
step: 'generate_resync_tasks',
count: resyncCount,
});
// Step 4: Create store discovery task
const discoveryTask = await taskService.createStoreDiscoveryTask('dutchie', undefined, 0);
results.steps.push({
step: 'create_discovery_task',
task_id: discoveryTask.id,
});
// Step 5: Create analytics refresh task
const analyticsTask = await taskService.createTask({
role: 'analytics_refresh',
priority: 0,
});
results.steps.push({
step: 'create_analytics_task',
task_id: analyticsTask.id,
});
results.message = 'Migration complete. New task workers will pick up tasks.';
res.json(results);
} catch (error: unknown) {
console.error('Error during full migration:', error);
res.status(500).json({ error: 'Failed to complete migration' });
}
});
export default router;

View File

@@ -14,23 +14,36 @@ router.get('/', async (req: AuthRequest, res) => {
try {
const { search, domain } = req.query;
let query = `
SELECT id, email, role, first_name, last_name, phone, domain, created_at, updated_at
FROM users
WHERE 1=1
`;
// Check which columns exist (schema-tolerant)
const columnsResult = await pool.query(`
SELECT column_name FROM information_schema.columns
WHERE table_name = 'users' AND column_name IN ('first_name', 'last_name', 'phone', 'domain')
`);
const existingColumns = new Set(columnsResult.rows.map((r: any) => r.column_name));
// Build column list based on what exists
const selectCols = ['id', 'email', 'role', 'created_at', 'updated_at'];
if (existingColumns.has('first_name')) selectCols.push('first_name');
if (existingColumns.has('last_name')) selectCols.push('last_name');
if (existingColumns.has('phone')) selectCols.push('phone');
if (existingColumns.has('domain')) selectCols.push('domain');
let query = `SELECT ${selectCols.join(', ')} FROM users WHERE 1=1`;
const params: any[] = [];
let paramIndex = 1;
// Search by email, first_name, or last_name
// Search by email (and optionally first_name, last_name if they exist)
if (search && typeof search === 'string') {
query += ` AND (email ILIKE $${paramIndex} OR first_name ILIKE $${paramIndex} OR last_name ILIKE $${paramIndex})`;
const searchClauses = ['email ILIKE $' + paramIndex];
if (existingColumns.has('first_name')) searchClauses.push('first_name ILIKE $' + paramIndex);
if (existingColumns.has('last_name')) searchClauses.push('last_name ILIKE $' + paramIndex);
query += ` AND (${searchClauses.join(' OR ')})`;
params.push(`%${search}%`);
paramIndex++;
}
// Filter by domain
if (domain && typeof domain === 'string') {
// Filter by domain (if column exists)
if (domain && typeof domain === 'string' && existingColumns.has('domain')) {
query += ` AND domain = $${paramIndex}`;
params.push(domain);
paramIndex++;
@@ -50,8 +63,22 @@ router.get('/', async (req: AuthRequest, res) => {
router.get('/:id', async (req: AuthRequest, res) => {
try {
const { id } = req.params;
// Check which columns exist (schema-tolerant)
const columnsResult = await pool.query(`
SELECT column_name FROM information_schema.columns
WHERE table_name = 'users' AND column_name IN ('first_name', 'last_name', 'phone', 'domain')
`);
const existingColumns = new Set(columnsResult.rows.map((r: any) => r.column_name));
const selectCols = ['id', 'email', 'role', 'created_at', 'updated_at'];
if (existingColumns.has('first_name')) selectCols.push('first_name');
if (existingColumns.has('last_name')) selectCols.push('last_name');
if (existingColumns.has('phone')) selectCols.push('phone');
if (existingColumns.has('domain')) selectCols.push('domain');
const result = await pool.query(`
SELECT id, email, role, first_name, last_name, phone, domain, created_at, updated_at
SELECT ${selectCols.join(', ')}
FROM users
WHERE id = $1
`, [id]);

View File

@@ -1,18 +1,32 @@
import { Router, Request, Response } from 'express';
import { readFileSync } from 'fs';
import { join } from 'path';
const router = Router();
// Read package.json version at startup
let packageVersion = 'unknown';
try {
const packageJson = JSON.parse(readFileSync(join(__dirname, '../../package.json'), 'utf-8'));
packageVersion = packageJson.version || 'unknown';
} catch {
// Fallback if package.json not found
}
/**
* GET /api/version
* Returns build version information for display in admin UI
*/
router.get('/', async (req: Request, res: Response) => {
try {
const gitSha = process.env.APP_GIT_SHA || 'unknown';
const versionInfo = {
build_version: process.env.APP_BUILD_VERSION || 'dev',
git_sha: process.env.APP_GIT_SHA || 'local',
build_time: process.env.APP_BUILD_TIME || new Date().toISOString(),
image_tag: process.env.CONTAINER_IMAGE_TAG || 'local',
version: packageVersion,
build_version: process.env.APP_BUILD_VERSION?.slice(0, 8) || 'dev',
git_sha: gitSha.slice(0, 8) || 'unknown',
git_sha_full: gitSha,
build_time: process.env.APP_BUILD_TIME || 'unknown',
image_tag: process.env.CONTAINER_IMAGE_TAG?.slice(0, 8) || 'local',
};
res.json(versionInfo);

View File

@@ -0,0 +1,675 @@
/**
* Worker Registry API Routes
*
* Dynamic worker management - workers register on startup, get assigned names,
* and report heartbeats. Everything is API-driven, no hardcoding.
*
* Endpoints:
* POST /api/worker-registry/register - Worker reports for duty
* POST /api/worker-registry/heartbeat - Worker heartbeat
* POST /api/worker-registry/deregister - Worker signing off
* GET /api/worker-registry/workers - List all workers (for dashboard)
* GET /api/worker-registry/workers/:id - Get specific worker
* POST /api/worker-registry/cleanup - Mark stale workers offline
*
* GET /api/worker-registry/names - List all names in pool
* POST /api/worker-registry/names - Add names to pool
* DELETE /api/worker-registry/names/:name - Remove name from pool
*
* GET /api/worker-registry/roles - List available task roles
* POST /api/worker-registry/roles - Add a new role (future)
*/
import { Router, Request, Response } from 'express';
import { pool } from '../db/pool';
import os from 'os';
const router = Router();
// ============================================================
// WORKER REGISTRATION
// ============================================================
/**
* POST /api/worker-registry/register
* Worker reports for duty - gets assigned a friendly name
*
* Body:
* - role: string (optional) - task role, or null for role-agnostic workers
* - worker_id: string (optional) - custom ID, auto-generated if not provided
* - pod_name: string (optional) - k8s pod name
* - hostname: string (optional) - machine hostname
* - metadata: object (optional) - additional worker info
*
* Returns:
* - worker_id: assigned worker ID
* - friendly_name: assigned name from pool
* - role: confirmed role (or null if agnostic)
* - message: welcome message
*/
router.post('/register', async (req: Request, res: Response) => {
try {
const {
role = null, // Role is now optional - null means agnostic
worker_id,
pod_name,
hostname,
ip_address,
metadata = {}
} = req.body;
// Generate worker_id if not provided
const finalWorkerId = worker_id || `worker-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
const finalHostname = hostname || os.hostname();
const clientIp = ip_address || req.ip || req.socket.remoteAddress;
// Check if worker already registered
const existing = await pool.query(
'SELECT id, friendly_name, status FROM worker_registry WHERE worker_id = $1',
[finalWorkerId]
);
if (existing.rows.length > 0) {
// Re-activate existing worker
const { rows } = await pool.query(`
UPDATE worker_registry
SET status = 'active',
role = $1,
pod_name = $2,
hostname = $3,
ip_address = $4,
last_heartbeat_at = NOW(),
started_at = NOW(),
metadata = $5,
updated_at = NOW()
WHERE worker_id = $6
RETURNING id, worker_id, friendly_name, role
`, [role, pod_name, finalHostname, clientIp, metadata, finalWorkerId]);
const worker = rows[0];
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
console.log(`[WorkerRegistry] Worker "${worker.friendly_name}" (${finalWorkerId}) re-registered ${roleMsg}`);
return res.json({
success: true,
worker_id: worker.worker_id,
friendly_name: worker.friendly_name,
role: worker.role,
message: role
? `Welcome back, ${worker.friendly_name}! You are assigned to ${role}.`
: `Welcome back, ${worker.friendly_name}! You are ready to take any task.`
});
}
// Assign a friendly name
const nameResult = await pool.query('SELECT assign_worker_name($1) as name', [finalWorkerId]);
const friendlyName = nameResult.rows[0].name;
// Register the worker
const { rows } = await pool.query(`
INSERT INTO worker_registry (
worker_id, friendly_name, role, pod_name, hostname, ip_address, status, metadata
) VALUES ($1, $2, $3, $4, $5, $6, 'active', $7)
RETURNING id, worker_id, friendly_name, role
`, [finalWorkerId, friendlyName, role, pod_name, finalHostname, clientIp, metadata]);
const worker = rows[0];
const roleMsg = role ? `for ${role}` : 'as role-agnostic';
console.log(`[WorkerRegistry] New worker "${friendlyName}" (${finalWorkerId}) reporting for duty ${roleMsg}`);
res.json({
success: true,
worker_id: worker.worker_id,
friendly_name: worker.friendly_name,
role: worker.role,
message: role
? `Hello ${friendlyName}! You are now registered for ${role}. Ready for work!`
: `Hello ${friendlyName}! You are ready to take any task from the pool.`
});
} catch (error: any) {
console.error('[WorkerRegistry] Registration error:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/worker-registry/heartbeat
* Worker sends heartbeat to stay alive
*
* Body:
* - worker_id: string (required)
* - current_task_id: number (optional) - task currently being processed
* - status: string (optional) - 'active', 'idle'
*/
router.post('/heartbeat', async (req: Request, res: Response) => {
try {
const { worker_id, current_task_id, status = 'active', resources } = req.body;
if (!worker_id) {
return res.status(400).json({ success: false, error: 'worker_id is required' });
}
// Store resources in metadata jsonb column
const { rows } = await pool.query(`
UPDATE worker_registry
SET last_heartbeat_at = NOW(),
current_task_id = $1,
status = $2,
metadata = COALESCE(metadata, '{}'::jsonb) || COALESCE($4::jsonb, '{}'::jsonb),
updated_at = NOW()
WHERE worker_id = $3
RETURNING id, friendly_name, status
`, [current_task_id || null, status, worker_id, resources ? JSON.stringify(resources) : null]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found - please register first' });
}
res.json({
success: true,
worker: rows[0]
});
} catch (error: any) {
console.error('[WorkerRegistry] Heartbeat error:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/worker-registry/task-completed
* Worker reports task completion
*
* Body:
* - worker_id: string (required)
* - success: boolean (required)
*/
router.post('/task-completed', async (req: Request, res: Response) => {
try {
const { worker_id, success } = req.body;
if (!worker_id) {
return res.status(400).json({ success: false, error: 'worker_id is required' });
}
const incrementField = success ? 'tasks_completed' : 'tasks_failed';
const { rows } = await pool.query(`
UPDATE worker_registry
SET ${incrementField} = ${incrementField} + 1,
last_task_at = NOW(),
current_task_id = NULL,
status = 'idle',
updated_at = NOW()
WHERE worker_id = $1
RETURNING id, friendly_name, tasks_completed, tasks_failed
`, [worker_id]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found' });
}
res.json({ success: true, worker: rows[0] });
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/worker-registry/deregister
* Worker signing off (graceful shutdown)
*
* Body:
* - worker_id: string (required)
*/
router.post('/deregister', async (req: Request, res: Response) => {
try {
const { worker_id } = req.body;
if (!worker_id) {
return res.status(400).json({ success: false, error: 'worker_id is required' });
}
// Release the name back to the pool
await pool.query('SELECT release_worker_name($1)', [worker_id]);
// Mark as terminated
const { rows } = await pool.query(`
UPDATE worker_registry
SET status = 'terminated',
current_task_id = NULL,
updated_at = NOW()
WHERE worker_id = $1
RETURNING id, friendly_name
`, [worker_id]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found' });
}
console.log(`[WorkerRegistry] Worker "${rows[0].friendly_name}" (${worker_id}) signed off`);
res.json({
success: true,
message: `Goodbye ${rows[0].friendly_name}! Thanks for your work.`
});
} catch (error: any) {
console.error('[WorkerRegistry] Deregister error:', error);
res.status(500).json({ success: false, error: error.message });
}
});
// ============================================================
// WORKER LISTING (for Dashboard)
// ============================================================
/**
* GET /api/worker-registry/workers
* List all workers (for dashboard)
*
* Query params:
* - status: filter by status (active, idle, offline, all)
* - role: filter by role
* - include_terminated: include terminated workers (default: false)
*/
router.get('/workers', async (req: Request, res: Response) => {
try {
// Check if worker_registry table exists
const tableCheck = await pool.query(`
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_name = 'worker_registry'
) as exists
`);
if (!tableCheck.rows[0].exists) {
// Return empty result if table doesn't exist yet
return res.json({
success: true,
workers: [],
summary: {
active_count: 0,
idle_count: 0,
offline_count: 0,
total_count: 0,
active_roles: 0
}
});
}
const { status, role, include_terminated = 'false' } = req.query;
let whereClause = include_terminated === 'true' ? 'WHERE 1=1' : "WHERE status != 'terminated'";
const params: any[] = [];
let paramIndex = 1;
if (status && status !== 'all') {
whereClause += ` AND status = $${paramIndex}`;
params.push(status);
paramIndex++;
}
if (role) {
whereClause += ` AND role = $${paramIndex}`;
params.push(role);
paramIndex++;
}
const { rows } = await pool.query(`
SELECT
id,
worker_id,
friendly_name,
role,
pod_name,
hostname,
ip_address,
status,
started_at,
last_heartbeat_at,
last_task_at,
tasks_completed,
tasks_failed,
current_task_id,
metadata,
EXTRACT(EPOCH FROM (NOW() - last_heartbeat_at)) as seconds_since_heartbeat,
CASE
WHEN status = 'offline' OR status = 'terminated' THEN status
WHEN last_heartbeat_at < NOW() - INTERVAL '2 minutes' THEN 'stale'
WHEN current_task_id IS NOT NULL THEN 'busy'
ELSE 'ready'
END as health_status,
created_at
FROM worker_registry
${whereClause}
ORDER BY
CASE status
WHEN 'active' THEN 1
WHEN 'idle' THEN 2
WHEN 'offline' THEN 3
ELSE 4
END,
last_heartbeat_at DESC
`, params);
// Get summary counts
const { rows: summary } = await pool.query(`
SELECT
COUNT(*) FILTER (WHERE status = 'active') as active_count,
COUNT(*) FILTER (WHERE status = 'idle') as idle_count,
COUNT(*) FILTER (WHERE status = 'offline') as offline_count,
COUNT(*) FILTER (WHERE status != 'terminated') as total_count,
COUNT(DISTINCT role) FILTER (WHERE status IN ('active', 'idle')) as active_roles
FROM worker_registry
`);
res.json({
success: true,
workers: rows,
summary: summary[0]
});
} catch (error: any) {
console.error('[WorkerRegistry] List workers error:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/worker-registry/workers/:workerId
* Get specific worker details
*/
router.get('/workers/:workerId', async (req: Request, res: Response) => {
try {
const { workerId } = req.params;
const { rows } = await pool.query(`
SELECT * FROM worker_registry WHERE worker_id = $1
`, [workerId]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found' });
}
res.json({ success: true, worker: rows[0] });
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
/**
* DELETE /api/worker-registry/workers/:workerId
* Remove a worker (admin action)
*/
router.delete('/workers/:workerId', async (req: Request, res: Response) => {
try {
const { workerId } = req.params;
// Release name
await pool.query('SELECT release_worker_name($1)', [workerId]);
// Delete worker
const { rows } = await pool.query(`
DELETE FROM worker_registry WHERE worker_id = $1 RETURNING friendly_name
`, [workerId]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found' });
}
res.json({ success: true, message: `Worker ${rows[0].friendly_name} removed` });
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/worker-registry/cleanup
* Mark stale workers as offline
*
* Body:
* - stale_threshold_minutes: number (default: 5)
*/
router.post('/cleanup', async (req: Request, res: Response) => {
try {
const { stale_threshold_minutes = 5 } = req.body;
const { rows } = await pool.query(
'SELECT mark_stale_workers($1) as count',
[stale_threshold_minutes]
);
res.json({
success: true,
stale_workers_marked: rows[0].count,
message: `Marked ${rows[0].count} stale workers as offline`
});
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
// ============================================================
// NAME POOL MANAGEMENT
// ============================================================
/**
* GET /api/worker-registry/names
* List all names in the pool
*/
router.get('/names', async (_req: Request, res: Response) => {
try {
const { rows } = await pool.query(`
SELECT
id,
name,
in_use,
assigned_to,
assigned_at
FROM worker_name_pool
ORDER BY in_use DESC, name ASC
`);
const { rows: summary } = await pool.query(`
SELECT
COUNT(*) as total,
COUNT(*) FILTER (WHERE in_use = true) as in_use,
COUNT(*) FILTER (WHERE in_use = false) as available
FROM worker_name_pool
`);
res.json({
success: true,
names: rows,
summary: summary[0]
});
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/worker-registry/names
* Add names to the pool
*
* Body:
* - names: string[] (required) - array of names to add
*/
router.post('/names', async (req: Request, res: Response) => {
try {
const { names } = req.body;
if (!names || !Array.isArray(names) || names.length === 0) {
return res.status(400).json({ success: false, error: 'names array is required' });
}
const values = names.map(n => `('${n.replace(/'/g, "''")}')`).join(', ');
const { rowCount } = await pool.query(`
INSERT INTO worker_name_pool (name)
VALUES ${values}
ON CONFLICT (name) DO NOTHING
`);
res.json({
success: true,
added: rowCount,
message: `Added ${rowCount} new names to the pool`
});
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
/**
* DELETE /api/worker-registry/names/:name
* Remove a name from the pool (only if not in use)
*/
router.delete('/names/:name', async (req: Request, res: Response) => {
try {
const { name } = req.params;
const { rows } = await pool.query(`
DELETE FROM worker_name_pool
WHERE name = $1 AND in_use = false
RETURNING name
`, [name]);
if (rows.length === 0) {
return res.status(400).json({
success: false,
error: 'Name not found or currently in use'
});
}
res.json({ success: true, message: `Name "${name}" removed from pool` });
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
// ============================================================
// ROLE MANAGEMENT
// ============================================================
/**
* GET /api/worker-registry/roles
* List available task roles
*/
router.get('/roles', async (_req: Request, res: Response) => {
// These are the roles the task handlers support
const roles = [
{
id: 'product_refresh',
name: 'Product Refresh',
description: 'Re-crawl dispensary products for price/stock changes',
handler: 'handleProductRefresh'
},
{
id: 'product_discovery',
name: 'Product Discovery',
description: 'Initial product discovery for new dispensaries',
handler: 'handleProductDiscovery'
},
{
id: 'store_discovery',
name: 'Store Discovery',
description: 'Discover new dispensary locations',
handler: 'handleStoreDiscovery'
},
{
id: 'entry_point_discovery',
name: 'Entry Point Discovery',
description: 'Resolve platform IDs from menu URLs',
handler: 'handleEntryPointDiscovery'
},
{
id: 'analytics_refresh',
name: 'Analytics Refresh',
description: 'Refresh materialized views and analytics',
handler: 'handleAnalyticsRefresh'
}
];
// Get active worker counts per role
try {
const { rows } = await pool.query(`
SELECT role, COUNT(*) as worker_count
FROM worker_registry
WHERE status IN ('active', 'idle')
GROUP BY role
`);
const countMap = new Map(rows.map(r => [r.role, parseInt(r.worker_count)]));
const rolesWithCounts = roles.map(r => ({
...r,
active_workers: countMap.get(r.id) || 0
}));
res.json({ success: true, roles: rolesWithCounts });
} catch {
// If table doesn't exist yet, just return roles without counts
res.json({ success: true, roles: roles.map(r => ({ ...r, active_workers: 0 })) });
}
});
/**
* GET /api/worker-registry/capacity
* Get capacity planning info
*/
router.get('/capacity', async (_req: Request, res: Response) => {
try {
// Get worker counts by role
const { rows: workerCounts } = await pool.query(`
SELECT role, COUNT(*) as count
FROM worker_registry
WHERE status IN ('active', 'idle')
GROUP BY role
`);
// Get pending task counts by role (if worker_tasks exists)
let taskCounts: any[] = [];
try {
const result = await pool.query(`
SELECT role, COUNT(*) as pending_count
FROM worker_tasks
WHERE status = 'pending'
GROUP BY role
`);
taskCounts = result.rows;
} catch {
// worker_tasks might not exist yet
}
// Get crawl-enabled store count
const storeCountResult = await pool.query(`
SELECT COUNT(*) as count
FROM dispensaries
WHERE crawl_enabled = true AND platform_dispensary_id IS NOT NULL
`);
const totalStores = parseInt(storeCountResult.rows[0].count);
const workerMap = new Map(workerCounts.map(r => [r.role, parseInt(r.count)]));
const taskMap = new Map(taskCounts.map(r => [r.role, parseInt(r.pending_count)]));
const roles = ['product_refresh', 'product_discovery', 'store_discovery', 'entry_point_discovery', 'analytics_refresh'];
const capacity = roles.map(role => ({
role,
active_workers: workerMap.get(role) || 0,
pending_tasks: taskMap.get(role) || 0,
// Rough estimate: 20 seconds per task, 4-hour cycle
tasks_per_worker_per_cycle: 720,
workers_needed_for_all_stores: Math.ceil(totalStores / 720)
}));
res.json({
success: true,
total_stores: totalStores,
capacity
});
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
export default router;

View File

@@ -24,6 +24,95 @@ import { pool } from '../db/pool';
const router = Router();
// ============================================================
// STATIC ROUTES (must come before parameterized routes)
// ============================================================
/**
* GET /api/workers/roles - List available worker roles
*/
router.get('/roles', async (_req: Request, res: Response) => {
const roles = [
{ id: 'product_sync', name: 'Product Sync', description: 'Crawls products from dispensary menus' },
{ id: 'store_discovery', name: 'Store Discovery', description: 'Discovers new dispensary locations' },
{ id: 'entry_point_finder', name: 'Entry Point Finder', description: 'Detects menu providers and resolves platform IDs' },
{ id: 'analytics_refresh', name: 'Analytics Refresh', description: 'Refreshes materialized views and analytics' },
{ id: 'price_monitor', name: 'Price Monitor', description: 'Monitors price changes and triggers alerts' },
{ id: 'inventory_sync', name: 'Inventory Sync', description: 'Syncs inventory levels' },
{ id: 'image_processor', name: 'Image Processor', description: 'Downloads and processes product images' },
{ id: 'data_validator', name: 'Data Validator', description: 'Validates data integrity' },
{ id: 'custom', name: 'Custom', description: 'Custom worker role' },
];
res.json({ success: true, roles });
});
/**
* GET /api/workers/states - List available states for assignment
*/
router.get('/states', async (_req: Request, res: Response) => {
try {
const { rows } = await pool.query(`
SELECT state_code, state_name, dispensary_count
FROM states
WHERE active = true
ORDER BY state_name ASC
`);
res.json({ success: true, states: rows });
} catch (error: any) {
// Fallback if states table doesn't exist
res.json({ success: true, states: [
{ state_code: 'AZ', state_name: 'Arizona', dispensary_count: 0 },
{ state_code: 'CA', state_name: 'California', dispensary_count: 0 },
{ state_code: 'CO', state_name: 'Colorado', dispensary_count: 0 },
{ state_code: 'MI', state_name: 'Michigan', dispensary_count: 0 },
{ state_code: 'NV', state_name: 'Nevada', dispensary_count: 0 },
]});
}
});
/**
* GET /api/workers/dispensaries - List dispensaries for assignment (paginated search)
*/
router.get('/dispensaries', async (req: Request, res: Response) => {
try {
const search = (req.query.search as string) || '';
const limit = parseInt(req.query.limit as string) || 50;
const { rows } = await pool.query(`
SELECT id, name, city, state_code
FROM dispensaries
WHERE ($1 = '' OR name ILIKE $2)
ORDER BY name ASC
LIMIT $3
`, [search, `%${search}%`, limit]);
res.json({ success: true, dispensaries: rows });
} catch (error: any) {
console.error('[Workers] Error fetching dispensaries:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* GET /api/workers/chains - List chains for assignment
*/
router.get('/chains', async (_req: Request, res: Response) => {
try {
const { rows } = await pool.query(`
SELECT DISTINCT chain_id as id, chain_name as name, COUNT(*) as dispensary_count
FROM dispensaries
WHERE chain_id IS NOT NULL AND chain_name IS NOT NULL
GROUP BY chain_id, chain_name
ORDER BY chain_name ASC
`);
res.json({ success: true, chains: rows });
} catch (error: any) {
// Fallback if chain columns don't exist
res.json({ success: true, chains: [] });
}
});
// ============================================================
// WORKER TYPES
// ============================================================
@@ -32,6 +121,7 @@ interface Worker {
id: number;
worker_name: string;
run_role: string;
job_name?: string;
scope: string[];
description: string;
enabled: boolean;
@@ -40,6 +130,8 @@ interface Worker {
next_run_at: string | null;
last_run_at: string | null;
last_status: string | null;
last_error_message?: string | null;
last_duration_ms?: number | null;
last_seen: string | null;
visibility_lost: number;
visibility_restored: number;
@@ -124,15 +216,20 @@ router.get('/', async (_req: Request, res: Response) => {
next_run_at,
last_run_at,
last_status,
job_config
last_error_message,
last_duration_ms,
job_config,
worker_name,
worker_role
FROM job_schedules
ORDER BY enabled DESC, last_run_at DESC NULLS LAST
`);
const workers: Worker[] = rows.map((row: any) => ({
id: row.id,
worker_name: extractWorkerName(row.job_name, row.job_config),
run_role: extractRunRole(row.job_name, row.job_config),
worker_name: row.worker_name || extractWorkerName(row.job_name, row.job_config),
run_role: row.worker_role || extractRunRole(row.job_name, row.job_config),
job_name: row.job_name,
scope: parseScope(row.job_config),
description: row.description || row.job_name,
enabled: row.enabled,
@@ -141,6 +238,8 @@ router.get('/', async (_req: Request, res: Response) => {
next_run_at: row.next_run_at?.toISOString() || null,
last_run_at: row.last_run_at?.toISOString() || null,
last_status: row.last_status,
last_error_message: row.last_error_message,
last_duration_ms: row.last_duration_ms,
last_seen: row.last_run_at?.toISOString() || null,
visibility_lost: 0,
visibility_restored: 0,
@@ -619,4 +718,323 @@ router.get('/summary', async (req: Request, res: Response) => {
}
});
// ============================================================
// WORKER CRUD ROUTES (using new workers table)
// ============================================================
/**
* GET /api/workers/definitions - List all worker definitions from workers table
*/
router.get('/definitions', async (_req: Request, res: Response) => {
try {
const { rows } = await pool.query(`
SELECT
w.*,
(SELECT COUNT(*) FROM dispensary_crawl_jobs j WHERE j.assigned_worker_id = w.id AND j.status = 'pending') as pending_jobs,
(SELECT COUNT(*) FROM dispensary_crawl_jobs j WHERE j.assigned_worker_id = w.id AND j.status = 'running') as running_jobs
FROM workers w
ORDER BY w.enabled DESC, w.name ASC
`);
res.json({ success: true, workers: rows });
} catch (error: any) {
console.error('[Workers] Error listing worker definitions:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/workers/definitions - Create a new worker definition
*/
router.post('/definitions', async (req: Request, res: Response) => {
try {
const {
name,
role,
description,
enabled = true,
schedule_type = 'interval',
interval_minutes = 240,
cron_expression,
jitter_minutes = 30,
assignment_type = 'all',
assigned_state_codes,
assigned_dispensary_ids,
assigned_chain_ids,
job_type = 'dutchie_product_crawl',
job_config = {},
priority = 0,
max_concurrent = 1
} = req.body;
if (!name || !role) {
return res.status(400).json({ success: false, error: 'name and role are required' });
}
const { rows } = await pool.query(`
INSERT INTO workers (
name, role, description, enabled,
schedule_type, interval_minutes, cron_expression, jitter_minutes,
assignment_type, assigned_state_codes, assigned_dispensary_ids, assigned_chain_ids,
job_type, job_config, priority, max_concurrent
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)
RETURNING *
`, [
name, role, description, enabled,
schedule_type, interval_minutes, cron_expression, jitter_minutes,
assignment_type, assigned_state_codes, assigned_dispensary_ids, assigned_chain_ids,
job_type, job_config, priority, max_concurrent
]);
// Also create a job_schedule entry for backwards compatibility
await pool.query(`
INSERT INTO job_schedules (job_name, description, enabled, base_interval_minutes, jitter_minutes, worker_name, worker_role, job_config)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (job_name) DO UPDATE SET
description = EXCLUDED.description,
enabled = EXCLUDED.enabled,
base_interval_minutes = EXCLUDED.base_interval_minutes,
jitter_minutes = EXCLUDED.jitter_minutes,
worker_name = EXCLUDED.worker_name,
worker_role = EXCLUDED.worker_role,
updated_at = NOW()
`, [
`worker_${name.toLowerCase().replace(/\s+/g, '_')}`,
description || `Worker: ${name}`,
enabled,
interval_minutes,
jitter_minutes,
name,
role,
job_config
]);
res.json({ success: true, worker: rows[0], message: 'Worker created' });
} catch (error: any) {
console.error('[Workers] Error creating worker:', error);
if (error.code === '23505') { // unique violation
return res.status(400).json({ success: false, error: 'Worker name already exists' });
}
res.status(500).json({ success: false, error: error.message });
}
});
/**
* PUT /api/workers/definitions/:id - Update a worker definition
*/
router.put('/definitions/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const {
name,
role,
description,
enabled,
schedule_type,
interval_minutes,
cron_expression,
jitter_minutes,
assignment_type,
assigned_state_codes,
assigned_dispensary_ids,
assigned_chain_ids,
job_type,
job_config,
priority,
max_concurrent
} = req.body;
const { rows } = await pool.query(`
UPDATE workers SET
name = COALESCE($1, name),
role = COALESCE($2, role),
description = COALESCE($3, description),
enabled = COALESCE($4, enabled),
schedule_type = COALESCE($5, schedule_type),
interval_minutes = COALESCE($6, interval_minutes),
cron_expression = COALESCE($7, cron_expression),
jitter_minutes = COALESCE($8, jitter_minutes),
assignment_type = COALESCE($9, assignment_type),
assigned_state_codes = COALESCE($10, assigned_state_codes),
assigned_dispensary_ids = COALESCE($11, assigned_dispensary_ids),
assigned_chain_ids = COALESCE($12, assigned_chain_ids),
job_type = COALESCE($13, job_type),
job_config = COALESCE($14, job_config),
priority = COALESCE($15, priority),
max_concurrent = COALESCE($16, max_concurrent),
updated_at = NOW()
WHERE id = $17
RETURNING *
`, [
name, role, description, enabled,
schedule_type, interval_minutes, cron_expression, jitter_minutes,
assignment_type, assigned_state_codes, assigned_dispensary_ids, assigned_chain_ids,
job_type, job_config, priority, max_concurrent,
id
]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found' });
}
res.json({ success: true, worker: rows[0], message: 'Worker updated' });
} catch (error: any) {
console.error('[Workers] Error updating worker:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* DELETE /api/workers/definitions/:id - Delete a worker definition
*/
router.delete('/definitions/:id', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { rows } = await pool.query(`
DELETE FROM workers WHERE id = $1 RETURNING name
`, [id]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found' });
}
res.json({ success: true, message: `Worker "${rows[0].name}" deleted` });
} catch (error: any) {
console.error('[Workers] Error deleting worker:', error);
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/workers/definitions/:id/toggle - Enable/disable worker
*/
router.post('/definitions/:id/toggle', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { rows } = await pool.query(`
UPDATE workers SET enabled = NOT enabled, updated_at = NOW()
WHERE id = $1
RETURNING id, name, enabled
`, [id]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found' });
}
res.json({ success: true, worker: rows[0], message: `Worker ${rows[0].enabled ? 'enabled' : 'disabled'}` });
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
/**
* POST /api/workers/definitions/:id/assign-dispensary - Assign dispensary to worker
*/
router.post('/definitions/:id/assign-dispensary', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const { dispensary_id } = req.body;
if (!dispensary_id) {
return res.status(400).json({ success: false, error: 'dispensary_id is required' });
}
const { rows } = await pool.query(`
UPDATE workers SET
assigned_dispensary_ids = array_append(
COALESCE(assigned_dispensary_ids, ARRAY[]::integer[]),
$1::integer
),
assignment_type = 'dispensary',
updated_at = NOW()
WHERE id = $2 AND NOT ($1 = ANY(COALESCE(assigned_dispensary_ids, ARRAY[]::integer[])))
RETURNING id, name, assigned_dispensary_ids
`, [dispensary_id, id]);
if (rows.length === 0) {
// Check if dispensary was already assigned
const existing = await pool.query(`
SELECT assigned_dispensary_ids FROM workers WHERE id = $1
`, [id]);
if (existing.rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found' });
}
return res.json({ success: true, message: 'Dispensary already assigned', worker: existing.rows[0] });
}
res.json({ success: true, worker: rows[0], message: 'Dispensary assigned to worker' });
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
/**
* DELETE /api/workers/definitions/:id/assign-dispensary/:dispensaryId - Remove dispensary from worker
*/
router.delete('/definitions/:id/assign-dispensary/:dispensaryId', async (req: Request, res: Response) => {
try {
const { id, dispensaryId } = req.params;
const { rows } = await pool.query(`
UPDATE workers SET
assigned_dispensary_ids = array_remove(assigned_dispensary_ids, $1::integer),
updated_at = NOW()
WHERE id = $2
RETURNING id, name, assigned_dispensary_ids
`, [dispensaryId, id]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Worker not found' });
}
res.json({ success: true, worker: rows[0], message: 'Dispensary removed from worker' });
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
/**
* PUT /api/workers/:id/schedule - Update worker schedule (for job_schedules table)
*/
router.put('/:id/schedule', async (req: Request, res: Response) => {
try {
const { id } = req.params;
const {
worker_name,
worker_role,
description,
enabled,
base_interval_minutes,
jitter_minutes,
job_config
} = req.body;
const { rows } = await pool.query(`
UPDATE job_schedules SET
worker_name = COALESCE($1, worker_name),
worker_role = COALESCE($2, worker_role),
description = COALESCE($3, description),
enabled = COALESCE($4, enabled),
base_interval_minutes = COALESCE($5, base_interval_minutes),
jitter_minutes = COALESCE($6, jitter_minutes),
job_config = COALESCE($7, job_config),
updated_at = NOW()
WHERE id = $8
RETURNING *
`, [worker_name, worker_role, description, enabled, base_interval_minutes, jitter_minutes, job_config, id]);
if (rows.length === 0) {
return res.status(404).json({ success: false, error: 'Schedule not found' });
}
res.json({ success: true, schedule: rows[0], message: 'Schedule updated' });
} catch (error: any) {
res.status(500).json({ success: false, error: error.message });
}
});
export default router;

View File

@@ -0,0 +1,353 @@
/**
* Canonical Database Pipeline
*
* Writes scraped products to the canonical tables:
* - store_products (current state)
* - store_product_snapshots (historical)
* - product_variants (per-weight pricing)
* - product_variant_snapshots (variant history)
*
* This replaces the legacy DatabasePipeline that wrote to `products` table.
*/
import { ItemPipeline, Product } from './types';
import { logger } from '../services/logger';
import { pool } from '../db/pool';
import { v4 as uuidv4 } from 'uuid';
interface VariantData {
option: string;
priceRec: number | null;
priceMed: number | null;
priceRecSpecial: number | null;
priceMedSpecial: number | null;
quantity: number | null;
inStock: boolean;
isOnSpecial: boolean;
}
/**
* Parse weight string like "1g", "3.5g", "1/8oz" into value and unit
*/
function parseWeight(option: string): { value: number | null; unit: string | null } {
if (!option) return { value: null, unit: null };
// Match patterns like "1g", "3.5g", "1/8oz", "100mg"
const match = option.match(/^([\d.\/]+)\s*(g|oz|mg|ml|each|pk|ct)?$/i);
if (!match) return { value: null, unit: null };
let value: number | null = null;
const rawValue = match[1];
const unit = match[2]?.toLowerCase() || null;
// Handle fractions like "1/8"
if (rawValue.includes('/')) {
const [num, denom] = rawValue.split('/');
value = parseFloat(num) / parseFloat(denom);
} else {
value = parseFloat(rawValue);
}
if (isNaN(value)) value = null;
return { value, unit };
}
/**
* Canonical Database Pipeline - saves items to canonical tables
*
* TABLES:
* - store_products: Current product state per store
* - store_product_snapshots: Historical snapshot per crawl
* - product_variants: Current variant state (per-weight pricing)
* - product_variant_snapshots: Historical variant snapshots
*/
export class CanonicalDatabasePipeline implements ItemPipeline<Product> {
name = 'CanonicalDatabasePipeline';
priority = 10; // Low priority - runs last
private crawlRunId: number | null = null;
setCrawlRunId(id: number): void {
this.crawlRunId = id;
}
async process(item: Product, spider: string): Promise<Product | null> {
const client = await pool.connect();
try {
// Extract metadata set by spider
const dispensaryId = (item as any).dispensaryId;
const categoryName = (item as any).categoryName;
const variants: VariantData[] = (item as any).variants || [];
if (!dispensaryId) {
logger.error('pipeline', `Missing dispensaryId for ${item.name}`);
return null;
}
const externalProductId = item.dutchieProductId || null;
const provider = 'dutchie';
// Determine stock status
const isInStock = (item as any).inStock !== false;
const stockQuantity = (item as any).stockQuantity || null;
// Extract pricing
const priceRec = item.price || null;
const priceMed = (item as any).priceMed || null;
let storeProductId: number | null = null;
let isNewProduct = false;
// ============================================================
// UPSERT store_products
// ============================================================
const upsertResult = await client.query(`
INSERT INTO store_products (
dispensary_id, provider, provider_product_id,
name_raw, brand_name_raw, category_raw,
price_rec, price_med,
thc_percent, cbd_percent,
is_in_stock, stock_quantity,
image_url, source_url,
raw_data,
first_seen_at, last_seen_at,
created_at, updated_at
) VALUES (
$1, $2, $3,
$4, $5, $6,
$7, $8,
$9, $10,
$11, $12,
$13, $14,
$15,
NOW(), NOW(),
NOW(), NOW()
)
ON CONFLICT (dispensary_id, provider, provider_product_id)
DO UPDATE SET
name_raw = EXCLUDED.name_raw,
brand_name_raw = EXCLUDED.brand_name_raw,
category_raw = EXCLUDED.category_raw,
price_rec = EXCLUDED.price_rec,
price_med = EXCLUDED.price_med,
thc_percent = EXCLUDED.thc_percent,
cbd_percent = EXCLUDED.cbd_percent,
is_in_stock = EXCLUDED.is_in_stock,
stock_quantity = EXCLUDED.stock_quantity,
image_url = COALESCE(EXCLUDED.image_url, store_products.image_url),
source_url = EXCLUDED.source_url,
raw_data = EXCLUDED.raw_data,
last_seen_at = NOW(),
updated_at = NOW()
RETURNING id, (xmax = 0) as is_new
`, [
dispensaryId, provider, externalProductId,
item.name, item.brand || null, categoryName || null,
priceRec, priceMed,
item.thcPercentage || null, item.cbdPercentage || null,
isInStock, stockQuantity,
item.imageUrl || null, item.dutchieUrl || null,
JSON.stringify(item.metadata || {}),
]);
storeProductId = upsertResult.rows[0].id;
isNewProduct = upsertResult.rows[0].is_new;
logger.debug('pipeline', `${isNewProduct ? 'Inserted' : 'Updated'} canonical product: ${item.name} (ID: ${storeProductId})`);
// ============================================================
// INSERT store_product_snapshots
// ============================================================
await client.query(`
INSERT INTO store_product_snapshots (
store_product_id, dispensary_id, crawl_run_id,
price_rec, price_med,
is_in_stock, stock_quantity,
is_present_in_feed,
captured_at, created_at
) VALUES (
$1, $2, $3,
$4, $5,
$6, $7,
TRUE,
NOW(), NOW()
)
ON CONFLICT (store_product_id, crawl_run_id) WHERE crawl_run_id IS NOT NULL
DO UPDATE SET
price_rec = EXCLUDED.price_rec,
price_med = EXCLUDED.price_med,
is_in_stock = EXCLUDED.is_in_stock,
stock_quantity = EXCLUDED.stock_quantity
`, [
storeProductId, dispensaryId, this.crawlRunId,
priceRec, priceMed,
isInStock, stockQuantity,
]);
// ============================================================
// UPSERT product_variants (if variants exist)
// ============================================================
if (variants.length > 0) {
for (const variant of variants) {
const { value: weightValue, unit: weightUnit } = parseWeight(variant.option);
const variantResult = await client.query(`
INSERT INTO product_variants (
store_product_id, dispensary_id,
option,
price_rec, price_med, price_rec_special, price_med_special,
quantity, quantity_available, in_stock, is_on_special,
weight_value, weight_unit,
first_seen_at, last_seen_at,
created_at, updated_at
) VALUES (
$1, $2,
$3,
$4, $5, $6, $7,
$8, $8, $9, $10,
$11, $12,
NOW(), NOW(),
NOW(), NOW()
)
ON CONFLICT (store_product_id, option)
DO UPDATE SET
price_rec = EXCLUDED.price_rec,
price_med = EXCLUDED.price_med,
price_rec_special = EXCLUDED.price_rec_special,
price_med_special = EXCLUDED.price_med_special,
quantity = EXCLUDED.quantity,
quantity_available = EXCLUDED.quantity_available,
in_stock = EXCLUDED.in_stock,
is_on_special = EXCLUDED.is_on_special,
weight_value = EXCLUDED.weight_value,
weight_unit = EXCLUDED.weight_unit,
last_seen_at = NOW(),
last_price_change_at = CASE
WHEN product_variants.price_rec IS DISTINCT FROM EXCLUDED.price_rec
OR product_variants.price_rec_special IS DISTINCT FROM EXCLUDED.price_rec_special
THEN NOW()
ELSE product_variants.last_price_change_at
END,
last_stock_change_at = CASE
WHEN product_variants.in_stock IS DISTINCT FROM EXCLUDED.in_stock
THEN NOW()
ELSE product_variants.last_stock_change_at
END,
updated_at = NOW()
RETURNING id
`, [
storeProductId, dispensaryId,
variant.option,
variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
variant.quantity, variant.inStock, variant.isOnSpecial,
weightValue, weightUnit,
]);
const variantId = variantResult.rows[0].id;
// Insert variant snapshot
await client.query(`
INSERT INTO product_variant_snapshots (
product_variant_id, store_product_id, dispensary_id, crawl_run_id,
option,
price_rec, price_med, price_rec_special, price_med_special,
quantity, in_stock, is_on_special,
is_present_in_feed,
captured_at, created_at
) VALUES (
$1, $2, $3, $4,
$5,
$6, $7, $8, $9,
$10, $11, $12,
TRUE,
NOW(), NOW()
)
`, [
variantId, storeProductId, dispensaryId, this.crawlRunId,
variant.option,
variant.priceRec, variant.priceMed, variant.priceRecSpecial, variant.priceMedSpecial,
variant.quantity, variant.inStock, variant.isOnSpecial,
]);
}
logger.debug('pipeline', `Upserted ${variants.length} variants for ${item.name}`);
}
// Attach metadata for stats tracking
(item as any).isNewProduct = isNewProduct;
(item as any).storeProductId = storeProductId;
return item;
} catch (error) {
logger.error('pipeline', `Failed to save canonical product ${item.name}: ${error}`);
return null;
} finally {
client.release();
}
}
}
/**
* Create a crawl run record before starting crawl
*/
export async function createCrawlRun(
dispensaryId: number,
provider: string = 'dutchie',
triggerType: string = 'manual'
): Promise<number> {
const result = await pool.query(`
INSERT INTO crawl_runs (
dispensary_id, provider,
started_at, status, trigger_type
) VALUES ($1, $2, NOW(), 'running', $3)
RETURNING id
`, [dispensaryId, provider, triggerType]);
return result.rows[0].id;
}
/**
* Complete a crawl run with stats
*/
export async function completeCrawlRun(
crawlRunId: number,
stats: {
productsFound: number;
productsNew: number;
productsUpdated: number;
snapshotsWritten: number;
variantsUpserted?: number;
status?: 'completed' | 'failed' | 'partial';
error?: string;
}
): Promise<void> {
await pool.query(`
UPDATE crawl_runs SET
finished_at = NOW(),
status = $2,
products_found = $3,
products_new = $4,
products_updated = $5,
snapshots_written = $6,
metadata = jsonb_build_object(
'variants_upserted', $7,
'error', $8
)
WHERE id = $1
`, [
crawlRunId,
stats.status || 'completed',
stats.productsFound,
stats.productsNew,
stats.productsUpdated,
stats.snapshotsWritten,
stats.variantsUpserted || 0,
stats.error || null,
]);
}

View File

@@ -2,6 +2,7 @@ import { RequestScheduler } from './scheduler';
import { Downloader } from './downloader';
import { MiddlewareEngine, UserAgentMiddleware, ProxyMiddleware, RateLimitMiddleware, RetryMiddleware, BotDetectionMiddleware, StealthMiddleware } from './middlewares';
import { PipelineEngine, ValidationPipeline, SanitizationPipeline, DeduplicationPipeline, ImagePipeline, DatabasePipeline, StatsPipeline } from './pipelines';
import { CanonicalDatabasePipeline, createCrawlRun, completeCrawlRun } from './canonical-pipeline';
import { ScraperRequest, ScraperResponse, ParseResult, Product, ScraperStats } from './types';
import { logger } from '../services/logger';
import { pool } from '../db/pool';
@@ -65,6 +66,9 @@ export class ScraperEngine {
this.pipelineEngine.use(new DeduplicationPipeline());
this.pipelineEngine.use(new ImagePipeline());
this.pipelineEngine.use(new StatsPipeline());
// Use canonical pipeline for writing to store_products/product_variants
this.pipelineEngine.use(new CanonicalDatabasePipeline());
// Keep legacy pipeline for backwards compatibility with existing stores table
this.pipelineEngine.use(new DatabasePipeline());
}

View File

@@ -39,6 +39,11 @@ export {
DatabasePipeline,
StatsPipeline
} from './pipelines';
export {
CanonicalDatabasePipeline,
createCrawlRun,
completeCrawlRun
} from './canonical-pipeline';
export * from './types';
// Main API functions

View File

@@ -0,0 +1,250 @@
#!/usr/bin/env npx tsx
/**
* Crawl Single Store - Verbose test showing each step
*
* Usage:
* DATABASE_URL="postgresql://dutchie:dutchie_local_pass@localhost:54320/dutchie_menus" \
* npx tsx src/scripts/crawl-single-store.ts <dispensaryId>
*
* Example:
* DATABASE_URL="..." npx tsx src/scripts/crawl-single-store.ts 112
*/
import { Pool } from 'pg';
import dotenv from 'dotenv';
import {
executeGraphQL,
startSession,
endSession,
getFingerprint,
GRAPHQL_HASHES,
DUTCHIE_CONFIG,
} from '../platforms/dutchie';
dotenv.config();
// ============================================================
// DATABASE CONNECTION
// ============================================================
function getConnectionString(): string {
if (process.env.DATABASE_URL) {
return process.env.DATABASE_URL;
}
if (process.env.CANNAIQ_DB_URL) {
return process.env.CANNAIQ_DB_URL;
}
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
const port = process.env.CANNAIQ_DB_PORT || '54320';
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
}
const pool = new Pool({ connectionString: getConnectionString() });
// ============================================================
// MAIN
// ============================================================
async function main() {
const dispensaryId = parseInt(process.argv[2], 10);
if (!dispensaryId) {
console.error('Usage: npx tsx src/scripts/crawl-single-store.ts <dispensaryId>');
console.error('Example: npx tsx src/scripts/crawl-single-store.ts 112');
process.exit(1);
}
console.log('');
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ SINGLE STORE CRAWL - VERBOSE OUTPUT ║');
console.log('╚════════════════════════════════════════════════════════════╝');
console.log('');
try {
// ============================================================
// STEP 1: Get dispensary info from database
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 1: Load Dispensary Info from Database │');
console.log('└─────────────────────────────────────────────────────────────┘');
const dispResult = await pool.query(`
SELECT
id,
name,
platform_dispensary_id,
menu_url,
menu_type,
city,
state
FROM dispensaries
WHERE id = $1
`, [dispensaryId]);
if (dispResult.rows.length === 0) {
throw new Error(`Dispensary ${dispensaryId} not found`);
}
const disp = dispResult.rows[0];
console.log(` Dispensary ID: ${disp.id}`);
console.log(` Name: ${disp.name}`);
console.log(` City, State: ${disp.city}, ${disp.state}`);
console.log(` Menu Type: ${disp.menu_type}`);
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
console.log(` Menu URL: ${disp.menu_url}`);
if (!disp.platform_dispensary_id) {
throw new Error('Dispensary does not have a platform_dispensary_id - cannot crawl');
}
// Extract cName from menu_url
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
console.log(` cName (derived): ${cName}`);
console.log('');
// ============================================================
// STEP 2: Start stealth session
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 2: Start Stealth Session │');
console.log('└─────────────────────────────────────────────────────────────┘');
// Use Arizona timezone for this store
const session = startSession(disp.state || 'AZ', 'America/Phoenix');
const fp = getFingerprint();
console.log(` Session ID: ${session.sessionId}`);
console.log(` User-Agent: ${fp.userAgent.slice(0, 60)}...`);
console.log(` Accept-Language: ${fp.acceptLanguage}`);
console.log(` Sec-CH-UA: ${fp.secChUa || '(not set)'}`);
console.log('');
// ============================================================
// STEP 3: Execute GraphQL query
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 3: Execute GraphQL Query (FilteredProducts) │');
console.log('└─────────────────────────────────────────────────────────────┘');
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: disp.platform_dispensary_id,
pricingType: 'rec',
Status: 'Active',
types: [],
useCache: true,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
},
page: 0,
perPage: 100,
};
console.log(` Endpoint: ${DUTCHIE_CONFIG.graphqlEndpoint}`);
console.log(` Operation: FilteredProducts`);
console.log(` Hash: ${GRAPHQL_HASHES.FilteredProducts.slice(0, 20)}...`);
console.log(` dispensaryId: ${variables.productsFilter.dispensaryId}`);
console.log(` pricingType: ${variables.productsFilter.pricingType}`);
console.log(` Status: ${variables.productsFilter.Status}`);
console.log(` perPage: ${variables.perPage}`);
console.log('');
console.log(' Sending request...');
const startTime = Date.now();
const result = await executeGraphQL(
'FilteredProducts',
variables,
GRAPHQL_HASHES.FilteredProducts,
{ cName, maxRetries: 3 }
);
const elapsed = Date.now() - startTime;
console.log(` Response time: ${elapsed}ms`);
console.log('');
// ============================================================
// STEP 4: Process response
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 4: Process Response │');
console.log('└─────────────────────────────────────────────────────────────┘');
const data = result?.data?.filteredProducts;
if (!data) {
console.log(' ERROR: No data returned from GraphQL');
console.log(' Raw result:', JSON.stringify(result, null, 2).slice(0, 500));
endSession();
return;
}
const products = data.products || [];
const totalCount = data.queryInfo?.totalCount || 0;
const totalPages = Math.ceil(totalCount / 100);
console.log(` Total products: ${totalCount}`);
console.log(` Products in page: ${products.length}`);
console.log(` Total pages: ${totalPages}`);
console.log('');
// Show first few products
console.log(' First 5 products:');
console.log(' ─────────────────────────────────────────────────────────');
for (let i = 0; i < Math.min(5, products.length); i++) {
const p = products[i];
const name = (p.name || 'Unknown').slice(0, 40);
const brand = (p.brand?.name || 'Unknown').slice(0, 15);
const price = p.Prices?.[0]?.price || p.medPrice || p.recPrice || 'N/A';
const category = p.type || p.category || 'N/A';
console.log(` ${i + 1}. ${name.padEnd(42)} | ${brand.padEnd(17)} | $${price}`);
}
console.log('');
// ============================================================
// STEP 5: End session
// ============================================================
console.log('┌─────────────────────────────────────────────────────────────┐');
console.log('│ STEP 5: End Session │');
console.log('└─────────────────────────────────────────────────────────────┘');
endSession();
console.log('');
// ============================================================
// SUMMARY
// ============================================================
console.log('╔════════════════════════════════════════════════════════════╗');
console.log('║ SUMMARY ║');
console.log('╠════════════════════════════════════════════════════════════╣');
console.log(`║ Store: ${disp.name.slice(0, 38).padEnd(38)}`);
console.log(`║ Products Found: ${String(totalCount).padEnd(38)}`);
console.log(`║ Response Time: ${(elapsed + 'ms').padEnd(38)}`);
console.log(`║ Status: ${'SUCCESS'.padEnd(38)}`);
console.log('╚════════════════════════════════════════════════════════════╝');
} catch (error: any) {
console.error('');
console.error('╔════════════════════════════════════════════════════════════╗');
console.error('║ ERROR ║');
console.error('╚════════════════════════════════════════════════════════════╝');
console.error(` ${error.message}`);
if (error.stack) {
console.error('');
console.error('Stack trace:');
console.error(error.stack.split('\n').slice(0, 5).join('\n'));
}
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -0,0 +1,385 @@
#!/usr/bin/env npx tsx
/**
* Discover All States - Sequential State-by-State Dutchie Discovery
*
* This script discovers all Dutchie dispensaries for every US state,
* processing one state at a time with delays between states.
*
* Progress is automatically saved to /tmp/discovery-progress.json
* so the script can resume from where it left off if interrupted.
*
* Usage:
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --dry-run
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --start-from CA
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --resume
* DATABASE_URL="..." npx tsx src/scripts/discover-all-states.ts --reset # Clear progress, start fresh
*
* Options:
* --dry-run Don't save to database, just show what would happen
* --start-from Start from a specific state (skip earlier states)
* --states Comma-separated list of specific states to run (e.g., AZ,CA,CO)
* --verbose Show detailed output
* --resume Auto-resume from last saved progress (default if progress file exists)
* --reset Clear progress file and start fresh
*/
import { Pool } from 'pg';
import * as fs from 'fs';
import * as path from 'path';
const PROGRESS_FILE = '/tmp/discovery-progress.json';
interface ProgressData {
lastCompletedState: string | null;
lastCompletedIndex: number;
startedAt: string;
updatedAt: string;
completedStates: string[];
}
function loadProgress(): ProgressData | null {
try {
if (fs.existsSync(PROGRESS_FILE)) {
const data = JSON.parse(fs.readFileSync(PROGRESS_FILE, 'utf-8'));
return data;
}
} catch (e) {
console.warn('[Progress] Could not load progress file:', e);
}
return null;
}
function saveProgress(progress: ProgressData): void {
try {
progress.updatedAt = new Date().toISOString();
fs.writeFileSync(PROGRESS_FILE, JSON.stringify(progress, null, 2));
} catch (e) {
console.warn('[Progress] Could not save progress:', e);
}
}
function clearProgress(): void {
try {
if (fs.existsSync(PROGRESS_FILE)) {
fs.unlinkSync(PROGRESS_FILE);
console.log('[Progress] Cleared progress file');
}
} catch (e) {
console.warn('[Progress] Could not clear progress:', e);
}
}
import { discoverState } from '../discovery';
// US states with legal cannabis (medical or recreational)
// Ordered roughly by market size / likelihood of Dutchie presence
const US_STATES = [
'AZ', // Arizona
'CA', // California
'CO', // Colorado
'FL', // Florida
'IL', // Illinois
'MA', // Massachusetts
'MI', // Michigan
'NV', // Nevada
'NJ', // New Jersey
'NY', // New York
'OH', // Ohio
'OR', // Oregon
'PA', // Pennsylvania
'WA', // Washington
'MD', // Maryland
'MO', // Missouri
'CT', // Connecticut
'NM', // New Mexico
'ME', // Maine
'VT', // Vermont
'MT', // Montana
'AK', // Alaska
'OK', // Oklahoma
'AR', // Arkansas
'ND', // North Dakota
'SD', // South Dakota
'MN', // Minnesota
'NH', // New Hampshire
'RI', // Rhode Island
'DE', // Delaware
'HI', // Hawaii
'WV', // West Virginia
'LA', // Louisiana
'UT', // Utah
'VA', // Virginia
'DC', // District of Columbia
];
interface DiscoveryResult {
stateCode: string;
citiesCrawled: number;
locationsFound: number;
locationsUpserted: number;
durationMs: number;
errors: string[];
}
function parseArgs() {
const args = process.argv.slice(2);
const flags: Record<string, string | boolean> = {};
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg.startsWith('--')) {
const [key, value] = arg.slice(2).split('=');
if (value !== undefined) {
flags[key] = value;
} else if (args[i + 1] && !args[i + 1].startsWith('--')) {
flags[key] = args[i + 1];
i++;
} else {
flags[key] = true;
}
}
}
return flags;
}
async function main() {
const flags = parseArgs();
const dryRun = Boolean(flags['dry-run']);
const verbose = Boolean(flags.verbose);
const reset = Boolean(flags.reset);
const resume = Boolean(flags.resume);
let startFrom = flags['start-from'] as string | undefined;
const specificStates = flags.states
? (flags.states as string).split(',').map((s) => s.trim().toUpperCase())
: null;
// Handle reset flag
if (reset) {
clearProgress();
}
// Determine which states to process
let statesToProcess = specificStates || US_STATES;
// Check for saved progress (auto-resume unless --reset or --start-from specified)
const savedProgress = loadProgress();
if (savedProgress && !reset && !startFrom && !specificStates) {
const nextIndex = savedProgress.lastCompletedIndex + 1;
if (nextIndex < US_STATES.length) {
startFrom = US_STATES[nextIndex];
console.log(`[Progress] Resuming from saved progress`);
console.log(`[Progress] Last completed: ${savedProgress.lastCompletedState} (${savedProgress.completedStates.length} states done)`);
console.log(`[Progress] Started at: ${savedProgress.startedAt}`);
console.log(`[Progress] Last update: ${savedProgress.updatedAt}`);
console.log('');
} else {
console.log(`[Progress] All states already completed! Use --reset to start over.`);
process.exit(0);
}
}
if (startFrom) {
const startIndex = statesToProcess.indexOf(startFrom.toUpperCase());
if (startIndex === -1) {
console.error(`ERROR: State ${startFrom} not found in list`);
process.exit(1);
}
statesToProcess = statesToProcess.slice(startIndex);
console.log(`Starting from ${startFrom}, ${statesToProcess.length} states remaining`);
}
// Initialize progress tracking
let progress: ProgressData = savedProgress || {
lastCompletedState: null,
lastCompletedIndex: -1,
startedAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
completedStates: [],
};
console.log('='.repeat(70));
console.log('DUTCHIE ALL-STATES DISCOVERY');
console.log('='.repeat(70));
console.log(`Mode: ${dryRun ? 'DRY RUN' : 'LIVE'}`);
console.log(`States to process: ${statesToProcess.length}`);
console.log(`States: ${statesToProcess.join(', ')}`);
console.log('');
// Create database pool
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
console.error('ERROR: DATABASE_URL environment variable is required');
process.exit(1);
}
const pool = new Pool({ connectionString });
const results: DiscoveryResult[] = [];
const startTime = Date.now();
try {
for (let i = 0; i < statesToProcess.length; i++) {
const stateCode = statesToProcess[i];
console.log('');
console.log('─'.repeat(70));
console.log(`[${i + 1}/${statesToProcess.length}] Discovering ${stateCode}...`);
console.log('─'.repeat(70));
try {
const result = await discoverState(pool, stateCode, {
dryRun,
verbose,
cityLimit: 200, // Allow up to 200 cities per state
});
const discoveryResult: DiscoveryResult = {
stateCode,
citiesCrawled: result.locations.length,
locationsFound: result.totalLocationsFound,
locationsUpserted: result.totalLocationsUpserted,
durationMs: result.durationMs,
errors: [],
};
// Collect errors from city results
result.locations.forEach((loc) => {
if (loc.errors && loc.errors.length > 0) {
discoveryResult.errors.push(...loc.errors);
}
});
results.push(discoveryResult);
// Save progress after each successful state
const stateIndex = US_STATES.indexOf(stateCode);
progress.lastCompletedState = stateCode;
progress.lastCompletedIndex = stateIndex;
if (!progress.completedStates.includes(stateCode)) {
progress.completedStates.push(stateCode);
}
saveProgress(progress);
console.log(`\n[${stateCode}] COMPLETE:`);
console.log(` Cities crawled: ${discoveryResult.citiesCrawled}`);
console.log(` Locations found: ${discoveryResult.locationsFound}`);
console.log(` Locations upserted: ${discoveryResult.locationsUpserted}`);
console.log(` Duration: ${(discoveryResult.durationMs / 1000).toFixed(1)}s`);
console.log(` Progress saved (${progress.completedStates.length}/${US_STATES.length} states)`);
if (discoveryResult.errors.length > 0) {
console.log(` Errors: ${discoveryResult.errors.length}`);
}
// Delay between states to avoid rate limiting
if (i < statesToProcess.length - 1) {
const delaySeconds = 5;
console.log(`\n Waiting ${delaySeconds}s before next state...`);
await new Promise((r) => setTimeout(r, delaySeconds * 1000));
}
} catch (error: any) {
console.error(`\n[${stateCode}] ERROR: ${error.message}`);
results.push({
stateCode,
citiesCrawled: 0,
locationsFound: 0,
locationsUpserted: 0,
durationMs: 0,
errors: [error.message],
});
// Continue to next state even on error
await new Promise((r) => setTimeout(r, 3000));
}
}
// Print summary
const totalDuration = Date.now() - startTime;
const totalLocations = results.reduce((sum, r) => sum + r.locationsFound, 0);
const totalUpserted = results.reduce((sum, r) => sum + r.locationsUpserted, 0);
const totalCities = results.reduce((sum, r) => sum + r.citiesCrawled, 0);
const statesWithErrors = results.filter((r) => r.errors.length > 0);
console.log('');
console.log('='.repeat(70));
console.log('DISCOVERY COMPLETE - SUMMARY');
console.log('='.repeat(70));
console.log(`Total states processed: ${results.length}`);
console.log(`Total cities crawled: ${totalCities}`);
console.log(`Total locations found: ${totalLocations}`);
console.log(`Total locations upserted: ${totalUpserted}`);
console.log(`Total duration: ${(totalDuration / 1000 / 60).toFixed(1)} minutes`);
console.log('');
if (statesWithErrors.length > 0) {
console.log('States with errors:');
statesWithErrors.forEach((r) => {
console.log(` ${r.stateCode}: ${r.errors.length} error(s)`);
});
console.log('');
}
// Print per-state breakdown
console.log('Per-state results:');
console.log('-'.repeat(70));
console.log('State\tCities\tFound\tUpserted\tDuration\tStatus');
console.log('-'.repeat(70));
results.forEach((r) => {
const status = r.errors.length > 0 ? 'ERRORS' : 'OK';
const duration = (r.durationMs / 1000).toFixed(1) + 's';
console.log(
`${r.stateCode}\t${r.citiesCrawled}\t${r.locationsFound}\t${r.locationsUpserted}\t\t${duration}\t\t${status}`
);
});
// Final count from database
console.log('');
console.log('='.repeat(70));
console.log('DATABASE TOTALS');
console.log('='.repeat(70));
const { rows: locationCounts } = await pool.query(`
SELECT
state_code,
COUNT(*) as count,
COUNT(CASE WHEN status = 'discovered' THEN 1 END) as discovered,
COUNT(CASE WHEN status = 'promoted' THEN 1 END) as promoted
FROM dutchie_discovery_locations
WHERE active = TRUE
GROUP BY state_code
ORDER BY count DESC
`);
console.log('State\tTotal\tDiscovered\tPromoted');
console.log('-'.repeat(50));
locationCounts.forEach((row: any) => {
console.log(`${row.state_code || 'N/A'}\t${row.count}\t${row.discovered}\t\t${row.promoted}`);
});
const { rows: totalRow } = await pool.query(`
SELECT COUNT(*) as total FROM dutchie_discovery_locations WHERE active = TRUE
`);
console.log('-'.repeat(50));
console.log(`TOTAL: ${totalRow[0].total} locations in discovery table`);
const { rows: dispRow } = await pool.query(`
SELECT COUNT(*) as total FROM dispensaries WHERE menu_type = 'dutchie'
`);
console.log(`DISPENSARIES: ${dispRow[0].total} Dutchie dispensaries in main table`);
// Clear progress file on successful completion of all states
if (results.length === US_STATES.length || (savedProgress && progress.completedStates.length === US_STATES.length)) {
clearProgress();
console.log('\n[Progress] All states completed! Progress file cleared.');
}
} finally {
await pool.end();
}
}
main().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});

View File

@@ -0,0 +1,173 @@
import axios from 'axios';
import { Pool } from 'pg';
const DUTCHIE_GRAPHQL_URL = 'https://dutchie.com/graphql';
const MENU_PRODUCTS_QUERY = `
query FilteredProducts($productsFilter: ProductFilterInput!) {
filteredProducts(productsFilter: $productsFilter) {
products {
id
name
brand
category
subcategory
strainType
description
image
images {
id
url
}
posId
potencyCbd {
formatted
range
unit
}
potencyThc {
formatted
range
unit
}
variants {
id
option
price
priceMed
priceRec
quantity
specialPrice
}
status
}
}
}
`;
function formatBytes(bytes: number): string {
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(2)} KB`;
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(2)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
}
async function measureRequest(dispensaryId: string, mode: 'A' | 'B') {
const variables: any = {
productsFilter: {
dispensaryId,
pricingType: 'rec',
Status: mode === 'A' ? 'Active' : null,
}
};
const requestBody = JSON.stringify({
query: MENU_PRODUCTS_QUERY,
variables,
});
const requestSize = Buffer.byteLength(requestBody, 'utf8');
try {
const response = await axios.post(DUTCHIE_GRAPHQL_URL, requestBody, {
headers: {
'Content-Type': 'application/json',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Origin': 'https://dutchie.com',
},
timeout: 30000,
});
const responseSize = Buffer.byteLength(JSON.stringify(response.data), 'utf8');
const productCount = response.data?.data?.filteredProducts?.products?.length || 0;
// Debug: show what we got
if (productCount === 0) {
console.log(` Response preview: ${JSON.stringify(response.data).slice(0, 300)}...`);
}
return { requestSize, responseSize, productCount };
} catch (error: any) {
console.error(` Error: ${error.message}`);
if (error.response) {
console.error(` Status: ${error.response.status}`);
console.error(` Data: ${JSON.stringify(error.response.data).slice(0, 200)}`);
}
return { requestSize, responseSize: 0, productCount: 0, error: error.message };
}
}
async function main() {
const pool = new Pool({ connectionString: process.env.DATABASE_URL });
// Get one store with products (use a known good ID)
const { rows } = await pool.query(`
SELECT d.platform_dispensary_id, d.name, COUNT(sp.id) as product_count
FROM dispensaries d
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
WHERE d.platform_dispensary_id IS NOT NULL
GROUP BY d.id
ORDER BY product_count DESC
LIMIT 1
`);
if (rows.length === 0) {
console.log('No crawlable stores found');
await pool.end();
return;
}
const store = rows[0];
console.log('=== Dutchie GraphQL Bandwidth for One Store ===\n');
console.log(`Store: ${store.name}`);
console.log(`Platform ID: ${store.platform_dispensary_id}`);
console.log(`Products in DB: ${store.product_count || 'unknown'}\n`);
// Mode A (Active products with pricing)
console.log('Fetching Mode A (Active products)...');
const modeA = await measureRequest(store.platform_dispensary_id, 'A');
// Mode B (All products)
console.log('Fetching Mode B (All products)...');
const modeB = await measureRequest(store.platform_dispensary_id, 'B');
console.log('\n=== Results for ONE STORE ===');
console.log('\nMode A (Active products with pricing):');
console.log(` Request size: ${formatBytes(modeA.requestSize)}`);
console.log(` Response size: ${formatBytes(modeA.responseSize)}`);
console.log(` Products: ${modeA.productCount}`);
if (modeA.productCount > 0) {
console.log(` Per product: ${formatBytes(modeA.responseSize / modeA.productCount)}`);
}
console.log('\nMode B (All products incl. OOS):');
console.log(` Request size: ${formatBytes(modeB.requestSize)}`);
console.log(` Response size: ${formatBytes(modeB.responseSize)}`);
console.log(` Products: ${modeB.productCount}`);
if (modeB.productCount > 0) {
console.log(` Per product: ${formatBytes(modeB.responseSize / modeB.productCount)}`);
}
console.log('\nDual-Mode Crawl (what we actually do):');
const totalRequest = modeA.requestSize + modeB.requestSize;
const totalResponse = modeA.responseSize + modeB.responseSize;
const totalBandwidth = totalRequest + totalResponse;
console.log(` Total request: ${formatBytes(totalRequest)}`);
console.log(` Total response: ${formatBytes(totalResponse)}`);
console.log(` TOTAL BANDWIDTH: ${formatBytes(totalBandwidth)}`);
// Per-product average
const avgProducts = Math.max(modeA.productCount, modeB.productCount);
const bytesPerProduct = avgProducts > 0 ? totalResponse / avgProducts : 0;
console.log('\n=== Quick Reference ===');
console.log(`Average bytes per product: ~${formatBytes(bytesPerProduct)}`);
console.log(`\nTypical store sizes:`);
console.log(` Small (100 products): ~${formatBytes(bytesPerProduct * 100 + totalRequest)}`);
console.log(` Medium (300 products): ~${formatBytes(bytesPerProduct * 300 + totalRequest)}`);
console.log(` Large (500 products): ~${formatBytes(bytesPerProduct * 500 + totalRequest)}`);
await pool.end();
}
main().catch(console.error);

View File

@@ -0,0 +1,137 @@
#!/usr/bin/env npx tsx
/**
* Retry resolving platform IDs for Dutchie stores that have menu_url but no platform_dispensary_id
*
* Usage:
* npx tsx src/scripts/retry-platform-ids.ts
*/
import { Pool } from 'pg';
import dotenv from 'dotenv';
import { resolveDispensaryIdWithDetails } from '../platforms/dutchie/queries';
dotenv.config();
const pool = new Pool({
connectionString: process.env.DATABASE_URL ||
`postgresql://${process.env.CANNAIQ_DB_USER || 'dutchie'}:${process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass'}@${process.env.CANNAIQ_DB_HOST || 'localhost'}:${process.env.CANNAIQ_DB_PORT || '54320'}/${process.env.CANNAIQ_DB_NAME || 'dutchie_menus'}`
});
interface DispensaryRow {
id: number;
name: string;
menu_url: string;
}
function extractSlugFromUrl(menuUrl: string): string | null {
// Extract slug from Dutchie URLs like:
// https://dutchie.com/stores/Nirvana-North-Phoenix
// https://dutchie.com/dispensary/curaleaf-dispensary-peoria
// https://dutchie.com/embedded-menu/some-slug
const patterns = [
/dutchie\.com\/stores\/([^/?]+)/i,
/dutchie\.com\/dispensary\/([^/?]+)/i,
/dutchie\.com\/embedded-menu\/([^/?]+)/i,
];
for (const pattern of patterns) {
const match = menuUrl.match(pattern);
if (match) {
return match[1];
}
}
return null;
}
async function main() {
console.log('='.repeat(60));
console.log('Retry Platform ID Resolution');
console.log('='.repeat(60));
console.log('');
// Get Dutchie dispensaries with menu_url but no platform_dispensary_id
const result = await pool.query<DispensaryRow>(`
SELECT id, name, menu_url
FROM dispensaries
WHERE menu_type = 'dutchie'
AND menu_url IS NOT NULL AND menu_url != ''
AND (platform_dispensary_id IS NULL OR platform_dispensary_id = '')
ORDER BY name
`);
console.log(`Found ${result.rows.length} stores to retry\n`);
if (result.rows.length === 0) {
console.log('No stores need platform ID resolution.');
await pool.end();
return;
}
const successes: { id: number; name: string; platformId: string }[] = [];
const failures: { id: number; name: string; slug: string | null; error: string }[] = [];
for (const row of result.rows) {
console.log(`\n[${row.id}] ${row.name}`);
console.log(` URL: ${row.menu_url}`);
const slug = extractSlugFromUrl(row.menu_url);
if (!slug) {
console.log(` ❌ Could not extract slug from URL`);
failures.push({ id: row.id, name: row.name, slug: null, error: 'Could not extract slug' });
continue;
}
console.log(` Slug: ${slug}`);
try {
const resolveResult = await resolveDispensaryIdWithDetails(slug);
if (resolveResult.dispensaryId) {
console.log(` ✅ Resolved: ${resolveResult.dispensaryId}`);
// Update database
await pool.query(
'UPDATE dispensaries SET platform_dispensary_id = $1 WHERE id = $2',
[resolveResult.dispensaryId, row.id]
);
console.log(` 💾 Updated database`);
successes.push({ id: row.id, name: row.name, platformId: resolveResult.dispensaryId });
} else {
const errorMsg = resolveResult.error || 'Unknown error';
console.log(` ❌ Failed: ${errorMsg}`);
failures.push({ id: row.id, name: row.name, slug, error: errorMsg });
}
} catch (error: any) {
console.log(` ❌ Error: ${error.message}`);
failures.push({ id: row.id, name: row.name, slug, error: error.message });
}
// Small delay between requests
await new Promise(r => setTimeout(r, 500));
}
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
console.log(`\n✅ Successes (${successes.length}):`);
for (const s of successes) {
console.log(` [${s.id}] ${s.name} -> ${s.platformId}`);
}
console.log(`\n❌ Failures (${failures.length}):`);
for (const f of failures) {
console.log(` [${f.id}] ${f.name} (slug: ${f.slug || 'N/A'})`);
console.log(` ${f.error}`);
}
await pool.end();
}
main().catch(e => {
console.error('Fatal error:', e);
process.exit(1);
});

View File

@@ -30,8 +30,8 @@ import {
discoverState,
getDiscoveryStats,
seedKnownCities,
ARIZONA_CITIES,
} from '../discovery';
import { getCitiesForState } from '../discovery/location-discovery';
// Parse command line arguments
function parseArgs() {
@@ -204,16 +204,22 @@ async function main() {
process.exit(1);
}
let cities: any[] = [];
if (stateCode.toUpperCase() === 'AZ') {
cities = ARIZONA_CITIES;
} else {
console.error(`No predefined cities for state: ${stateCode}`);
console.error('Add cities to city-discovery.ts ARIZONA_CITIES array (or add new state arrays)');
// Dynamically fetch cities from Dutchie
console.log(`\nFetching cities for ${stateCode} from Dutchie...\n`);
const cityNames = await getCitiesForState(stateCode.toUpperCase());
if (cityNames.length === 0) {
console.error(`No cities found for state: ${stateCode}`);
process.exit(1);
}
console.log(`\nSeeding ${cities.length} cities for ${stateCode}...\n`);
const cities = cityNames.map(name => ({
name,
slug: name.toLowerCase().replace(/\s+/g, '-').replace(/[^a-z0-9-]/g, ''),
stateCode: stateCode.toUpperCase(),
}));
console.log(`Seeding ${cities.length} cities for ${stateCode}...\n`);
const result = await seedKnownCities(pool, cities);
console.log(`Created: ${result.created} new cities`);
console.log(`Updated: ${result.updated} existing cities`);

View File

@@ -0,0 +1,277 @@
#!/usr/bin/env npx tsx
/**
* Test Script: Crawl a single dispensary and write to canonical tables
*
* This script:
* 1. Fetches products from Dutchie GraphQL
* 2. Normalizes via DutchieNormalizer
* 3. Writes to store_products, product_variants, snapshots via hydrateToCanonical
*
* Usage:
* npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>
* npx tsx src/scripts/test-crawl-to-canonical.ts 235
*/
import { Pool } from 'pg';
import dotenv from 'dotenv';
import {
executeGraphQL,
GRAPHQL_HASHES,
DUTCHIE_CONFIG,
} from '../platforms/dutchie';
import {
DutchieNormalizer,
hydrateToCanonical,
} from '../hydration';
import { initializeImageStorage } from '../utils/image-storage';
dotenv.config();
// ============================================================
// DATABASE CONNECTION
// ============================================================
function getConnectionString(): string {
if (process.env.CANNAIQ_DB_URL) {
return process.env.CANNAIQ_DB_URL;
}
if (process.env.DATABASE_URL) {
return process.env.DATABASE_URL;
}
const host = process.env.CANNAIQ_DB_HOST || 'localhost';
const port = process.env.CANNAIQ_DB_PORT || '54320';
const name = process.env.CANNAIQ_DB_NAME || 'dutchie_menus';
const user = process.env.CANNAIQ_DB_USER || 'dutchie';
const pass = process.env.CANNAIQ_DB_PASS || 'dutchie_local_pass';
return `postgresql://${user}:${pass}@${host}:${port}/${name}`;
}
const pool = new Pool({ connectionString: getConnectionString() });
// ============================================================
// FETCH PRODUCTS FROM DUTCHIE
// ============================================================
interface FetchResult {
products: any[];
totalPages: number;
totalProducts: number;
}
async function fetchAllProducts(platformDispensaryId: string, cName: string): Promise<FetchResult> {
const allProducts: any[] = [];
let page = 0;
let totalPages = 1;
let totalProducts = 0;
console.log(`[Fetch] Starting fetch for ${platformDispensaryId} (cName: ${cName})`);
while (page < totalPages && page < DUTCHIE_CONFIG.maxPages) {
const variables = {
includeEnterpriseSpecials: false,
productsFilter: {
dispensaryId: platformDispensaryId,
pricingType: 'rec',
Status: 'Active', // 'Active' = in-stock products with pricing
types: [],
useCache: true,
isDefaultSort: true,
sortBy: 'popularSortIdx',
sortDirection: 1,
bypassOnlineThresholds: true,
isKioskMenu: false,
removeProductsBelowOptionThresholds: false,
},
page,
perPage: DUTCHIE_CONFIG.perPage,
};
try {
const result = await executeGraphQL(
'FilteredProducts',
variables,
GRAPHQL_HASHES.FilteredProducts,
{ cName, maxRetries: 3 }
);
const data = result?.data?.filteredProducts;
if (!data) {
console.error(`[Fetch] No data returned for page ${page}`);
break;
}
const products = data.products || [];
totalProducts = data.queryInfo?.totalCount || 0;
totalPages = Math.ceil(totalProducts / DUTCHIE_CONFIG.perPage);
allProducts.push(...products);
console.log(`[Fetch] Page ${page + 1}/${totalPages}: ${products.length} products (total so far: ${allProducts.length})`);
page++;
if (page < totalPages) {
await new Promise(r => setTimeout(r, DUTCHIE_CONFIG.pageDelayMs));
}
} catch (error: any) {
console.error(`[Fetch] Error on page ${page}: ${error.message}`);
break;
}
}
return { products: allProducts, totalPages, totalProducts };
}
// ============================================================
// MAIN
// ============================================================
async function main() {
const dispensaryId = parseInt(process.argv[2], 10);
if (!dispensaryId) {
console.error('Usage: npx tsx src/scripts/test-crawl-to-canonical.ts <dispensaryId>');
console.error('Example: npx tsx src/scripts/test-crawl-to-canonical.ts 235');
process.exit(1);
}
console.log('============================================================');
console.log(`Test Crawl to Canonical - Dispensary ${dispensaryId}`);
console.log('============================================================\n');
// Initialize image storage
console.log('[Init] Initializing image storage...');
await initializeImageStorage();
console.log(' Image storage ready\n');
try {
// Step 1: Get dispensary info
console.log('[Step 1] Getting dispensary info...');
const dispResult = await pool.query(`
SELECT id, name, platform_dispensary_id, menu_url
FROM dispensaries
WHERE id = $1
`, [dispensaryId]);
if (dispResult.rows.length === 0) {
throw new Error(`Dispensary ${dispensaryId} not found`);
}
const disp = dispResult.rows[0];
console.log(` Name: ${disp.name}`);
console.log(` Platform ID: ${disp.platform_dispensary_id}`);
console.log(` Menu URL: ${disp.menu_url}`);
if (!disp.platform_dispensary_id) {
throw new Error('Dispensary does not have a platform_dispensary_id');
}
// Extract cName from menu_url
const cNameMatch = disp.menu_url?.match(/\/(?:embedded-menu|dispensary)\/([^/?]+)/);
const cName = cNameMatch ? cNameMatch[1] : 'dispensary';
console.log(` cName: ${cName}\n`);
// Step 2: Fetch products from Dutchie
console.log('[Step 2] Fetching products from Dutchie GraphQL...');
const fetchResult = await fetchAllProducts(disp.platform_dispensary_id, cName);
console.log(` Total products fetched: ${fetchResult.products.length}\n`);
if (fetchResult.products.length === 0) {
console.log('No products fetched. Exiting.');
process.exit(0);
}
// Step 3: Normalize
console.log('[Step 3] Normalizing products...');
const normalizer = new DutchieNormalizer();
// Construct a RawPayload structure that the normalizer expects
// The normalizer.normalize() expects: { raw_json, dispensary_id, ... }
const rawPayloadForValidation = {
products: fetchResult.products,
queryInfo: {
totalCount: fetchResult.totalProducts,
},
};
const validation = normalizer.validatePayload(rawPayloadForValidation);
if (!validation.valid) {
console.error(` Validation failed: ${validation.errors?.join(', ')}`);
process.exit(1);
}
console.log(` Validation: PASS`);
// Build proper RawPayload for normalize()
const rawPayload = {
id: `test-${Date.now()}`,
dispensary_id: dispensaryId,
crawl_run_id: null,
platform: 'dutchie',
payload_version: 1,
raw_json: rawPayloadForValidation,
product_count: fetchResult.totalProducts,
pricing_type: 'rec',
crawl_mode: 'active',
fetched_at: new Date(),
processed: false,
normalized_at: null,
hydration_error: null,
hydration_attempts: 0,
created_at: new Date(),
};
const normResult = normalizer.normalize(rawPayload);
console.log(` Normalized products: ${normResult.products.length}`);
console.log(` Brands extracted: ${normResult.brands.length}`);
console.log(` Sample product: ${normResult.products[0]?.name}\n`);
// Step 4: Write to canonical tables
console.log('[Step 4] Writing to canonical tables via hydrateToCanonical...');
const hydrateResult = await hydrateToCanonical(
pool,
dispensaryId,
normResult,
null // no crawl_run_id for this test
);
console.log(` Products upserted: ${hydrateResult.productsUpserted}`);
console.log(` Products new: ${hydrateResult.productsNew}`);
console.log(` Snapshots created: ${hydrateResult.snapshotsCreated}`);
console.log(` Variants upserted: ${hydrateResult.variantsUpserted}`);
console.log(` Brands created: ${hydrateResult.brandsCreated}\n`);
// Step 5: Verify
console.log('[Step 5] Verifying data in canonical tables...');
const productCount = await pool.query(`
SELECT COUNT(*) as count FROM store_products WHERE dispensary_id = $1
`, [dispensaryId]);
console.log(` store_products count: ${productCount.rows[0].count}`);
const variantCount = await pool.query(`
SELECT COUNT(*) as count FROM product_variants WHERE dispensary_id = $1
`, [dispensaryId]);
console.log(` product_variants count: ${variantCount.rows[0].count}`);
const snapshotCount = await pool.query(`
SELECT COUNT(*) as count FROM store_product_snapshots WHERE dispensary_id = $1
`, [dispensaryId]);
console.log(` store_product_snapshots count: ${snapshotCount.rows[0].count}`);
console.log('\n============================================================');
console.log('SUCCESS - Crawl and hydration complete!');
console.log('============================================================');
} catch (error: any) {
console.error('\n============================================================');
console.error('ERROR:', error.message);
console.error('============================================================');
if (error.stack) {
console.error(error.stack);
}
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env npx tsx
/**
* Test Image Proxy - Standalone test without backend
*
* Usage:
* npx tsx src/scripts/test-image-proxy.ts
*/
import express from 'express';
import imageProxyRoutes from '../routes/image-proxy';
const app = express();
const PORT = 3099;
// Mount the image proxy
app.use('/img', imageProxyRoutes);
// Start server
app.listen(PORT, async () => {
console.log(`Test image proxy running on http://localhost:${PORT}`);
console.log('');
console.log('Testing image proxy...');
console.log('');
const axios = require('axios');
// Test cases
const tests = [
{
name: 'Original image',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp',
},
{
name: 'Resize to 200px width',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200',
},
{
name: 'Resize to 100x100 cover',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=100&h=100&fit=cover',
},
{
name: 'Grayscale + blur',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&gray=1&blur=2',
},
{
name: 'Convert to JPEG',
url: '/img/products/az/az-deeply-rooted/clout-king/68b4b20a0f9ef3e90eb51e96/image-268a6e44.webp?w=200&format=jpeg&q=70',
},
{
name: 'Non-existent image',
url: '/img/products/az/nonexistent/image.webp',
},
];
for (const test of tests) {
try {
const response = await axios.get(`http://localhost:${PORT}${test.url}`, {
responseType: 'arraybuffer',
validateStatus: () => true,
});
const contentType = response.headers['content-type'];
const size = response.data.length;
const status = response.status;
console.log(`${test.name}:`);
console.log(` URL: ${test.url.slice(0, 80)}${test.url.length > 80 ? '...' : ''}`);
console.log(` Status: ${status}`);
console.log(` Content-Type: ${contentType}`);
console.log(` Size: ${(size / 1024).toFixed(1)} KB`);
console.log('');
} catch (error: any) {
console.log(`${test.name}: ERROR - ${error.message}`);
console.log('');
}
}
console.log('Tests complete!');
process.exit(0);
});

View File

@@ -0,0 +1,117 @@
/**
* Test script for stealth session management
*
* Tests:
* 1. Per-session fingerprint rotation
* 2. Geographic consistency (timezone → Accept-Language)
* 3. Proxy location loading from database
*
* Usage:
* npx tsx src/scripts/test-stealth-session.ts
*/
import {
startSession,
endSession,
getCurrentSession,
getFingerprint,
getRandomFingerprint,
getLocaleForTimezone,
buildHeaders,
} from '../platforms/dutchie';
console.log('='.repeat(60));
console.log('STEALTH SESSION TEST');
console.log('='.repeat(60));
// Test 1: Timezone to Locale mapping
console.log('\n[Test 1] Timezone to Locale Mapping:');
const testTimezones = [
'America/Phoenix',
'America/Los_Angeles',
'America/New_York',
'America/Chicago',
undefined,
'Invalid/Timezone',
];
for (const tz of testTimezones) {
const locale = getLocaleForTimezone(tz);
console.log(` ${tz || '(undefined)'}${locale}`);
}
// Test 2: Random fingerprint selection
console.log('\n[Test 2] Random Fingerprint Selection (5 samples):');
for (let i = 0; i < 5; i++) {
const fp = getRandomFingerprint();
console.log(` ${i + 1}. ${fp.userAgent.slice(0, 60)}...`);
}
// Test 3: Session Management
console.log('\n[Test 3] Session Management:');
// Before session - should use default fingerprint
console.log(' Before session:');
const beforeFp = getFingerprint();
console.log(` getFingerprint(): ${beforeFp.userAgent.slice(0, 50)}...`);
console.log(` getCurrentSession(): ${getCurrentSession()}`);
// Start session with Arizona timezone
console.log('\n Starting session (AZ, America/Phoenix):');
const session1 = startSession('AZ', 'America/Phoenix');
console.log(` Session ID: ${session1.sessionId}`);
console.log(` Fingerprint UA: ${session1.fingerprint.userAgent.slice(0, 50)}...`);
console.log(` Accept-Language: ${session1.fingerprint.acceptLanguage}`);
console.log(` Timezone: ${session1.timezone}`);
// During session - should use session fingerprint
console.log('\n During session:');
const duringFp = getFingerprint();
console.log(` getFingerprint(): ${duringFp.userAgent.slice(0, 50)}...`);
console.log(` Same as session? ${duringFp.userAgent === session1.fingerprint.userAgent}`);
// Test buildHeaders with session
console.log('\n buildHeaders() during session:');
const headers = buildHeaders('/embedded-menu/test-store');
console.log(` User-Agent: ${headers['user-agent'].slice(0, 50)}...`);
console.log(` Accept-Language: ${headers['accept-language']}`);
console.log(` Origin: ${headers['origin']}`);
console.log(` Referer: ${headers['referer']}`);
// End session
console.log('\n Ending session:');
endSession();
console.log(` getCurrentSession(): ${getCurrentSession()}`);
// Test 4: Multiple sessions should have different fingerprints
console.log('\n[Test 4] Multiple Sessions (fingerprint variety):');
const fingerprints: string[] = [];
for (let i = 0; i < 10; i++) {
const session = startSession('CA', 'America/Los_Angeles');
fingerprints.push(session.fingerprint.userAgent);
endSession();
}
const uniqueCount = new Set(fingerprints).size;
console.log(` 10 sessions created, ${uniqueCount} unique fingerprints`);
console.log(` Variety: ${uniqueCount >= 3 ? '✅ Good' : '⚠️ Low - may need more fingerprint options'}`);
// Test 5: Geographic consistency check
console.log('\n[Test 5] Geographic Consistency:');
const geoTests = [
{ state: 'AZ', tz: 'America/Phoenix' },
{ state: 'CA', tz: 'America/Los_Angeles' },
{ state: 'NY', tz: 'America/New_York' },
{ state: 'IL', tz: 'America/Chicago' },
];
for (const { state, tz } of geoTests) {
const session = startSession(state, tz);
const consistent = session.fingerprint.acceptLanguage.includes('en-US');
console.log(` ${state} (${tz}): Accept-Language=${session.fingerprint.acceptLanguage} ${consistent ? '✅' : '❌'}`);
endSession();
}
console.log('\n' + '='.repeat(60));
console.log('TEST COMPLETE');
console.log('='.repeat(60));

521
backend/src/seo/settings.ts Normal file
View File

@@ -0,0 +1,521 @@
/**
* SEO Settings Helper Module
*
* Provides functions for managing SEO configuration stored in seo_settings table.
*/
import { getPool } from '../db/pool';
// Default settings - used when table is empty or for reset
export const DEFAULT_SETTINGS: Record<string, any> = {
// Section 1: Global Content Generation Settings
primary_prompt_template: `You are a cannabis industry content expert creating SEO-optimized content for {{page_type}} pages.
Topic: {{subject}}
Focus Areas: {{focus_areas}}
Tone: {{tone}}
Length: {{length}}
Generate engaging, informative content that:
1. Uses natural keyword placement
2. Provides value to cannabis consumers
3. Maintains compliance with industry standards
4. Includes relevant local market data
5. Avoids technical jargon about data collection
Write content that feels authentic and helpful, not automated.`,
regeneration_template: `You are improving existing SEO content for a {{page_type}} page.
=== ORIGINAL CONTENT ===
{{original_content}}
=== IMPROVEMENT AREAS ===
{{improvement_areas}}
=== FRESH DATA ===
{{fresh_data}}
=== REQUIREMENTS ===
- Tone: {{tone}}
- Length: {{length}}
- Preserve accurate information from original
- Update outdated statistics with fresh data
- Improve SEO keyword density naturally
- Enhance readability and engagement
- Maintain compliance with cannabis industry standards
- Keep the same content structure unless improvement is needed
Generate the improved version, preserving what works while addressing the improvement areas.`,
default_content_length: 'medium',
tone_voice: 'informational',
// ============================================================================
// TEMPLATE LIBRARY - Complete Page Type Templates
// ============================================================================
state_page_template: `# {{state_name}} Dispensaries - Your Cannabis Guide
Explore **{{dispensary_count}} licensed dispensaries** across {{state_name}}. Our comprehensive directory features {{product_count}}+ products from {{brand_count}} trusted brands, with real-time menu updates and pricing.
## Why Shop Cannabis in {{state_name}}?
{{state_name}} offers a thriving cannabis market with diverse product selections and competitive pricing. Whether you're looking for premium flower, convenient vapes, or precisely dosed edibles, you'll find options to match your preferences.
## Top Cannabis Brands in {{state_name}}
{{top_brands}}
These brands are available at dispensaries across the state, known for quality, consistency, and consumer trust.
## Popular Product Categories
{{top_categories}}
Find everything from traditional flower to innovative concentrates and wellness-focused CBD products.
## {{state_name}} Cannabis Market Overview
| Metric | Value |
|--------|-------|
| Licensed Dispensaries | {{dispensary_count}} |
| Products Available | {{product_count}}+ |
| Active Brands | {{brand_count}} |
| Average Price | \${{avg_price}} |
## Finding the Right Dispensary
Use our search tools to filter by location, product availability, and store hours. Compare menus across dispensaries to find the best selection for your needs.
---
*Market data continuously updated. Last refresh: {{last_updated}}*`,
city_page_template: `# {{city_name}}, {{state_code}} Cannabis Dispensaries
Discover **{{dispensary_count}} dispensaries** in {{city_name}}, {{state_name}}. Browse {{product_count}} products from {{brand_count}} local and national brands.
## Cannabis Shopping in {{city_name}}
{{city_name}} offers convenient access to quality cannabis products through licensed retail locations. Our directory helps you find the perfect dispensary based on location, selection, and reviews.
## Featured Dispensaries in {{city_name}}
{{popular_dispensaries}}
## Explore Nearby Cities
Looking for more options? Check out dispensaries in these nearby areas:
{{nearby_cities}}
## {{city_name}} Market Snapshot
- **Local Stores**: {{dispensary_count}}
- **Products Available**: {{product_count}}
- **Average Price**: \${{avg_price}}
## What to Expect
{{city_name}} dispensaries offer a range of experiences from boutique shops to high-volume retail stores. First-time visitors should bring valid ID and check store hours before visiting.
---
*Find your local dispensary and start shopping today.*`,
category_page_template: `# {{category_name}} Products in {{state_name}}
Explore **{{product_count}} {{category_name}} products** from {{brand_count}} trusted brands across {{state_name}} dispensaries.
## About {{category_name}}
{{category_name}} remains one of the most popular cannabis product categories, offering options for every preference and experience level.
## Popular {{category_name}} Varieties
{{top_strains}}
## Browse by Type
{{subcategories}}
## {{category_name}} Pricing in {{state_name}}
- **Average Price**: \${{avg_price}}
- **Budget Options**: Starting under $25
- **Premium Selection**: $50+
## How to Choose {{category_name}}
Consider potency levels, terpene profiles, and intended effects when selecting {{category_name}} products. Our filters help you narrow down options by THC/CBD content, brand, and price range.
## Shop {{category_name}} Near You
Find {{category_name}} products at dispensaries across {{state_name}}. Use our location search to find stores with current inventory.`,
brand_page_template: `# {{brand_name}} - Cannabis Products & Store Locator
{{description}}
## Where to Find {{brand_name}}
{{brand_name}} products are available at **{{store_count}} dispensaries** across multiple states:
{{state_presence}}
## {{brand_name}} Product Categories
{{categories}}
## Brand Statistics
| Metric | Value |
|--------|-------|
| Total Products | {{product_count}} |
| Retail Partners | {{store_count}} |
| Average Price | \${{avg_price}} |
## Why Choose {{brand_name}}?
Consumers trust {{brand_name}} for consistent quality, transparent lab testing, and innovative product development. Whether you're a long-time fan or discovering them for the first time, explore their full lineup at dispensaries near you.
## Shop {{brand_name}} Products
Find {{brand_name}} at a dispensary near you. Compare prices and availability across stores to get the best deal.`,
product_page_template: `# {{product_name}}
**{{brand_name}}** | {{category}}
## Product Details
| Attribute | Value |
|-----------|-------|
| THC Content | {{thc_percent}}% |
| CBD Content | {{cbd_percent}}% |
| Category | {{category}} |
| Brand | {{brand_name}} |
## Availability
{{#if in_stock}}
**In Stock** at {{dispensary_name}}
{{else}}
**Currently Unavailable** at {{dispensary_name}}
{{/if}}
📍 {{dispensary_city}}, {{state_name}}
## Pricing
**\${{price}}**
*Prices may vary by location. Check dispensary menu for current pricing.*
## About This Product
{{product_name}} from {{brand_name}} offers a quality {{category}} experience. Visit {{dispensary_name}} to learn more about this product and explore similar options.
## Find More {{brand_name}} Products
Browse the complete {{brand_name}} lineup and find products at dispensaries across {{state_name}}.`,
search_results_template: `# Search Results: "{{query}}"
Found **{{result_count}} results** across {{state_name}} dispensaries.
## Results Overview
| Category | Count |
|----------|-------|
| Products | {{product_results}} |
| Dispensaries | {{dispensary_results}} |
| Brands | {{brand_results}} |
## Top Categories for "{{query}}"
{{top_categories}}
## Refine Your Search
Use our filters to narrow results by:
- **Category**: Flower, Vape, Edibles, Concentrates, and more
- **Price Range**: Budget-friendly to premium options
- **Brand**: Shop your favorite brands
- **Location**: Find nearby dispensaries
## Popular Related Searches
Explore related products and categories to find exactly what you're looking for.
---
*Can't find what you need? Try broadening your search terms or browse by category.*`,
// ============================================================================
// Section 2: Automatic Refresh Rules
// ============================================================================
auto_refresh_interval: 'weekly',
trigger_pct_product_change: true,
trigger_pct_brand_change: true,
trigger_new_stores: true,
trigger_market_shift: false,
webhook_url: '',
notify_on_trigger: false,
// Section 3: Page-Level Defaults
default_title_template: '{{state_name}} Dispensaries | Find Cannabis Near You | CannaiQ',
default_meta_description_template: 'Discover the best dispensaries in {{state_name}}. Browse {{dispensary_count}}+ licensed retailers, compare prices, and find cannabis products near you.',
default_slug_template: 'dispensaries-{{state_code_lower}}',
default_og_image_template: '/images/seo/og-{{state_code_lower}}.jpg',
enable_ai_images: false,
// Section 4: Crawl / Dataset Configuration
primary_data_provider: 'cannaiq',
fallback_data_provider: 'dutchie',
min_data_freshness_hours: 24,
stale_data_behavior: 'allow_with_warning',
};
/**
* Get a single setting by key
*/
export async function getSetting(key: string): Promise<any> {
const pool = getPool();
try {
const result = await pool.query(
'SELECT value FROM seo_settings WHERE key = $1',
[key]
);
if (result.rows.length === 0) {
// Return default if not found
return DEFAULT_SETTINGS[key] ?? null;
}
return result.rows[0].value;
} catch (error: any) {
console.error(`[SEO Settings] Error getting setting "${key}":`, error.message);
// Return default on error
return DEFAULT_SETTINGS[key] ?? null;
}
}
/**
* Set a single setting
*/
export async function setSetting(key: string, value: any): Promise<void> {
const pool = getPool();
try {
await pool.query(
`INSERT INTO seo_settings (key, value, updated_at)
VALUES ($1, $2, NOW())
ON CONFLICT (key) DO UPDATE SET
value = EXCLUDED.value,
updated_at = NOW()`,
[key, JSON.stringify(value)]
);
} catch (error: any) {
console.error(`[SEO Settings] Error setting "${key}":`, error.message);
throw error;
}
}
/**
* Get all settings as a key/value object
*/
export async function getAllSettings(): Promise<Record<string, any>> {
const pool = getPool();
try {
const result = await pool.query('SELECT key, value FROM seo_settings');
// Start with defaults
const settings: Record<string, any> = { ...DEFAULT_SETTINGS };
// Override with stored values
for (const row of result.rows) {
settings[row.key] = row.value;
}
return settings;
} catch (error: any) {
console.error('[SEO Settings] Error getting all settings:', error.message);
// Return defaults on error
return { ...DEFAULT_SETTINGS };
}
}
/**
* Set multiple settings at once
*/
export async function setMultipleSettings(settings: Record<string, any>): Promise<void> {
const pool = getPool();
try {
// Use a transaction for bulk updates
await pool.query('BEGIN');
for (const [key, value] of Object.entries(settings)) {
await pool.query(
`INSERT INTO seo_settings (key, value, updated_at)
VALUES ($1, $2, NOW())
ON CONFLICT (key) DO UPDATE SET
value = EXCLUDED.value,
updated_at = NOW()`,
[key, JSON.stringify(value)]
);
}
await pool.query('COMMIT');
} catch (error: any) {
await pool.query('ROLLBACK');
console.error('[SEO Settings] Error setting multiple settings:', error.message);
throw error;
}
}
/**
* Reset all settings to defaults
*/
export async function resetToDefaults(): Promise<Record<string, any>> {
const pool = getPool();
try {
await pool.query('BEGIN');
// Delete all existing settings
await pool.query('DELETE FROM seo_settings');
// Insert all defaults
for (const [key, value] of Object.entries(DEFAULT_SETTINGS)) {
await pool.query(
`INSERT INTO seo_settings (key, value, created_at, updated_at)
VALUES ($1, $2, NOW(), NOW())`,
[key, JSON.stringify(value)]
);
}
await pool.query('COMMIT');
return { ...DEFAULT_SETTINGS };
} catch (error: any) {
await pool.query('ROLLBACK');
console.error('[SEO Settings] Error resetting to defaults:', error.message);
throw error;
}
}
/**
* Ensure settings table exists and has defaults
* Call this on app startup
*/
export async function ensureSettingsExist(): Promise<void> {
const pool = getPool();
try {
// Check if table exists
const tableCheck = await pool.query(`
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_name = 'seo_settings'
)
`);
if (!tableCheck.rows[0].exists) {
// Create table
await pool.query(`
CREATE TABLE IF NOT EXISTS seo_settings (
id SERIAL PRIMARY KEY,
key TEXT UNIQUE NOT NULL,
value JSONB NOT NULL,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
)
`);
}
// Check if settings exist
const countResult = await pool.query('SELECT COUNT(*) FROM seo_settings');
const count = parseInt(countResult.rows[0].count, 10);
if (count === 0) {
// Seed with defaults
for (const [key, value] of Object.entries(DEFAULT_SETTINGS)) {
await pool.query(
`INSERT INTO seo_settings (key, value)
VALUES ($1, $2)
ON CONFLICT (key) DO NOTHING`,
[key, JSON.stringify(value)]
);
}
console.log('[SEO Settings] Seeded default settings');
}
} catch (error: any) {
console.error('[SEO Settings] Error ensuring settings exist:', error.message);
}
}
/**
* Build a prompt using settings and template variables
*/
export function buildPrompt(
template: string,
variables: Record<string, string>
): string {
let result = template;
for (const [key, value] of Object.entries(variables)) {
result = result.replace(new RegExp(`{{${key}}}`, 'g'), value);
}
return result;
}
/**
* Get content generation settings as a structured object
*/
export async function getContentGenerationSettings(): Promise<{
promptTemplate: string;
regenerationTemplate: string;
contentLength: 'short' | 'medium' | 'long';
tone: 'neutral' | 'informational' | 'consumer' | 'authoritative';
}> {
const settings = await getAllSettings();
return {
promptTemplate: settings.primary_prompt_template,
regenerationTemplate: settings.regeneration_prompt_template,
contentLength: settings.default_content_length,
tone: settings.tone_voice,
};
}
/**
* Check if data is stale based on settings
*/
export async function checkDataFreshness(lastCrawlAt: Date | null): Promise<{
isFresh: boolean;
behavior: 'block_generation' | 'allow_with_warning' | 'auto_trigger_crawl';
hoursStale: number;
}> {
const settings = await getAllSettings();
const maxHours = settings.min_data_freshness_hours || 24;
const behavior = settings.stale_data_behavior || 'allow_with_warning';
if (!lastCrawlAt) {
return {
isFresh: false,
behavior,
hoursStale: Infinity,
};
}
const hoursStale = (Date.now() - lastCrawlAt.getTime()) / (1000 * 60 * 60);
return {
isFresh: hoursStale <= maxHours,
behavior,
hoursStale: Math.round(hoursStale),
};
}

View File

@@ -0,0 +1,369 @@
/**
* SEO Template Engine
*
* Handles template selection, variable injection, and content generation
* for different page types (state, city, category, brand, product, search).
*/
import { getAllSettings, getSetting } from './settings';
// Page types supported by the template engine
export type PageType = 'state' | 'city' | 'category' | 'brand' | 'product' | 'search';
// Template keys mapping
export const TEMPLATE_KEYS: Record<PageType, string> = {
state: 'state_page_template',
city: 'city_page_template',
category: 'category_page_template',
brand: 'brand_page_template',
product: 'product_page_template',
search: 'search_results_template',
};
// Sample mock data for previews
export const MOCK_DATA: Record<PageType, Record<string, any>> = {
state: {
state_name: 'Arizona',
state_code: 'AZ',
state_code_lower: 'az',
dispensary_count: 156,
product_count: 12450,
brand_count: 287,
category_count: 8,
top_brands: ['Raw Garden', 'Stiiizy', 'Select', 'Pax', 'Bloom'],
top_categories: ['Flower', 'Vape', 'Edibles', 'Concentrate', 'Pre-rolls'],
avg_price: 42.50,
last_updated: new Date().toISOString().split('T')[0],
},
city: {
city_name: 'Phoenix',
state_name: 'Arizona',
state_code: 'AZ',
dispensary_count: 45,
product_count: 3200,
brand_count: 120,
nearby_cities: ['Scottsdale', 'Tempe', 'Mesa', 'Glendale'],
popular_dispensaries: ['Harvest', 'Curaleaf', 'Zen Leaf'],
avg_price: 40.00,
},
category: {
category_name: 'Flower',
category_slug: 'flower',
product_count: 4500,
brand_count: 95,
state_name: 'Arizona',
avg_price: 35.00,
top_strains: ['Blue Dream', 'OG Kush', 'Girl Scout Cookies'],
subcategories: ['Indica', 'Sativa', 'Hybrid'],
},
brand: {
brand_name: 'Raw Garden',
brand_slug: 'raw-garden',
product_count: 156,
state_presence: ['AZ', 'CA', 'NV', 'CO'],
store_count: 89,
avg_price: 45.00,
categories: ['Concentrate', 'Vape', 'Live Resin'],
description: 'Premium cannabis products from California',
},
product: {
product_name: 'Blue Dream Cartridge',
brand_name: 'Select',
category: 'Vape',
thc_percent: 85.5,
cbd_percent: 0.5,
price: 45.00,
dispensary_name: 'Harvest HOC',
dispensary_city: 'Phoenix',
state_name: 'Arizona',
in_stock: true,
},
search: {
query: 'live resin',
result_count: 245,
product_results: 180,
dispensary_results: 45,
brand_results: 20,
state_name: 'Arizona',
top_categories: ['Concentrate', 'Vape'],
},
};
/**
* Apply template variables to a template string
* Replaces {{variable}} with values from data object
*
* Rules:
* - Replace {{variable}} occurrences
* - Leave unknown variables unchanged
* - Prevent undefined values (replace with empty string)
* - Support arrays by joining with comma
*/
export function applyTemplateVariables(
template: string,
data: Record<string, any>
): string {
if (!template) return '';
let result = template;
// Find all {{variable}} patterns
const variablePattern = /\{\{(\w+)\}\}/g;
let match;
while ((match = variablePattern.exec(template)) !== null) {
const fullMatch = match[0];
const variableName = match[1];
if (variableName in data) {
let value = data[variableName];
// Handle different value types
if (value === undefined || value === null) {
value = '';
} else if (Array.isArray(value)) {
value = value.join(', ');
} else if (typeof value === 'object') {
value = JSON.stringify(value);
} else {
value = String(value);
}
// Replace all occurrences of this variable
result = result.split(fullMatch).join(value);
}
// Leave unknown variables unchanged
}
return result;
}
/**
* Get the correct template for a page type
* Uses case-insensitive matching
*/
export async function getTemplateForPageType(pageType: string): Promise<string> {
const normalizedType = pageType.toLowerCase().trim() as PageType;
const templateKey = TEMPLATE_KEYS[normalizedType];
if (!templateKey) {
console.warn(`[TemplateEngine] Unknown page type: ${pageType}, falling back to state template`);
return getSetting('state_page_template');
}
return getSetting(templateKey);
}
/**
* Get regeneration template
*/
export async function getRegenerationTemplate(): Promise<string> {
return getSetting('regeneration_template');
}
/**
* Generate content for a page using the appropriate template
*/
export async function generatePageContent(
pageType: string,
data: Record<string, any>
): Promise<{
content: string;
templateUsed: string;
variablesApplied: string[];
}> {
const template = await getTemplateForPageType(pageType);
const content = applyTemplateVariables(template, data);
// Extract which variables were actually used
const variablePattern = /\{\{(\w+)\}\}/g;
const variablesInTemplate: string[] = [];
let match;
while ((match = variablePattern.exec(template)) !== null) {
if (!variablesInTemplate.includes(match[1])) {
variablesInTemplate.push(match[1]);
}
}
const variablesApplied = variablesInTemplate.filter(v => v in data);
return {
content,
templateUsed: TEMPLATE_KEYS[pageType.toLowerCase() as PageType] || 'state_page_template',
variablesApplied,
};
}
/**
* Generate a preview with mock data
*/
export async function generatePreview(
pageType: string,
customTemplate?: string
): Promise<{
preview: string;
template: string;
mockData: Record<string, any>;
availableVariables: string[];
}> {
const normalizedType = (pageType?.toLowerCase().trim() || 'state') as PageType;
const template = customTemplate || await getTemplateForPageType(normalizedType);
const mockData = MOCK_DATA[normalizedType] || MOCK_DATA.state;
const preview = applyTemplateVariables(template, mockData);
return {
preview,
template,
mockData,
availableVariables: Object.keys(mockData),
};
}
/**
* Regenerate content using regeneration template
*/
export async function regenerateContent(
pageType: string,
originalContent: string,
newData: Record<string, any>,
improvementAreas?: string[]
): Promise<{
content: string;
regenerationPrompt: string;
}> {
const regenerationTemplate = await getRegenerationTemplate();
const settings = await getAllSettings();
// Build regeneration context
const regenerationData = {
...newData,
original_content: originalContent,
page_type: pageType,
improvement_areas: improvementAreas?.join(', ') || 'SEO keywords, local relevance, data freshness',
tone: settings.tone_voice || 'informational',
length: settings.default_content_length || 'medium',
};
const regenerationPrompt = applyTemplateVariables(regenerationTemplate, regenerationData);
// Generate new content using the page template
const pageTemplate = await getTemplateForPageType(pageType);
const content = applyTemplateVariables(pageTemplate, newData);
return {
content,
regenerationPrompt,
};
}
/**
* Get all available templates and their metadata
*/
export async function getAllTemplates(): Promise<Record<string, {
key: string;
template: string;
description: string;
availableVariables: string[];
}>> {
const settings = await getAllSettings();
return {
state: {
key: 'state_page_template',
template: settings.state_page_template || '',
description: 'Template for state landing pages (e.g., "Arizona Dispensaries")',
availableVariables: Object.keys(MOCK_DATA.state),
},
city: {
key: 'city_page_template',
template: settings.city_page_template || '',
description: 'Template for city landing pages (e.g., "Phoenix Dispensaries")',
availableVariables: Object.keys(MOCK_DATA.city),
},
category: {
key: 'category_page_template',
template: settings.category_page_template || '',
description: 'Template for category pages (e.g., "Flower", "Edibles")',
availableVariables: Object.keys(MOCK_DATA.category),
},
brand: {
key: 'brand_page_template',
template: settings.brand_page_template || '',
description: 'Template for brand pages (e.g., "Raw Garden Products")',
availableVariables: Object.keys(MOCK_DATA.brand),
},
product: {
key: 'product_page_template',
template: settings.product_page_template || '',
description: 'Template for individual product pages',
availableVariables: Object.keys(MOCK_DATA.product),
},
search: {
key: 'search_results_template',
template: settings.search_results_template || '',
description: 'Template for search results pages',
availableVariables: Object.keys(MOCK_DATA.search),
},
regeneration: {
key: 'regeneration_template',
template: settings.regeneration_template || '',
description: 'Template used when regenerating/improving existing content',
availableVariables: ['original_content', 'page_type', 'improvement_areas', 'tone', 'length', '...page-specific variables'],
},
};
}
/**
* Validate a template string
*/
export function validateTemplate(template: string): {
valid: boolean;
variables: string[];
unknownVariables: string[];
errors: string[];
} {
const errors: string[] = [];
const variables: string[] = [];
// Find all variables
const variablePattern = /\{\{(\w+)\}\}/g;
let match;
while ((match = variablePattern.exec(template)) !== null) {
if (!variables.includes(match[1])) {
variables.push(match[1]);
}
}
// Check for unclosed brackets
const openBrackets = (template.match(/\{\{/g) || []).length;
const closeBrackets = (template.match(/\}\}/g) || []).length;
if (openBrackets !== closeBrackets) {
errors.push('Mismatched template brackets: {{ and }} counts do not match');
}
// Check for empty variable names
if (template.includes('{{}}')) {
errors.push('Empty variable name found: {{}}');
}
// Get all known variables
const allKnownVariables = new Set<string>();
Object.values(MOCK_DATA).forEach(data => {
Object.keys(data).forEach(key => allKnownVariables.add(key));
});
allKnownVariables.add('original_content');
allKnownVariables.add('page_type');
allKnownVariables.add('improvement_areas');
allKnownVariables.add('tone');
allKnownVariables.add('length');
const unknownVariables = variables.filter(v => !allKnownVariables.has(v));
return {
valid: errors.length === 0,
variables,
unknownVariables,
errors,
};
}

View File

@@ -115,7 +115,7 @@ export class LegalStateService {
}
/**
* Get all states with dispensary counts
* Get all states with dispensary counts (active/crawlable dispensaries only)
*/
async getAllStatesWithDispensaryCounts(): Promise<StateWithDispensaryCount[]> {
const { rows } = await this.pool.query<StateWithDispensaryCount>(`
@@ -127,6 +127,8 @@ export class LegalStateService {
SELECT state_id, COUNT(*) AS cnt
FROM dispensaries
WHERE state_id IS NOT NULL
AND menu_type = 'dutchie'
AND platform_dispensary_id IS NOT NULL
GROUP BY state_id
) d ON d.state_id = s.id
ORDER BY s.name ASC
@@ -324,6 +326,8 @@ export class LegalStateService {
SELECT state_id, COUNT(*) AS cnt
FROM dispensaries
WHERE state_id IS NOT NULL
AND menu_type = 'dutchie'
AND platform_dispensary_id IS NOT NULL
GROUP BY state_id
) d ON d.state_id = s.id
ORDER BY s.name ASC

View File

@@ -26,6 +26,8 @@ import {
PenetrationDataPoint,
BrandMarketPosition,
BrandRecVsMedFootprint,
BrandPromotionalSummary,
BrandPromotionalEvent,
} from './types';
export class BrandPenetrationService {
@@ -44,16 +46,17 @@ export class BrandPenetrationService {
// Get current brand presence
const currentResult = await this.pool.query(`
SELECT
sp.brand_name,
sp.brand_name_raw AS brand_name,
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries,
COUNT(*) AS total_skus,
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus_per_dispensary,
ARRAY_AGG(DISTINCT s.code) FILTER (WHERE s.code IS NOT NULL) AS states_present
FROM store_products sp
LEFT JOIN states s ON s.id = sp.state_id
WHERE sp.brand_name = $1
JOIN dispensaries d ON d.id = sp.dispensary_id
LEFT JOIN states s ON s.id = d.state_id
WHERE sp.brand_name_raw = $1
AND sp.is_in_stock = TRUE
GROUP BY sp.brand_name
GROUP BY sp.brand_name_raw
`, [brandName]);
if (currentResult.rows.length === 0) {
@@ -72,7 +75,7 @@ export class BrandPenetrationService {
DATE(sps.captured_at) AS date,
COUNT(DISTINCT sps.dispensary_id) AS dispensary_count
FROM store_product_snapshots sps
WHERE sps.brand_name = $1
WHERE sps.brand_name_raw = $1
AND sps.captured_at >= $2
AND sps.captured_at <= $3
AND sps.is_in_stock = TRUE
@@ -123,8 +126,9 @@ export class BrandPenetrationService {
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
COUNT(*) AS sku_count
FROM store_products sp
JOIN states s ON s.id = sp.state_id
WHERE sp.brand_name = $1
JOIN dispensaries d ON d.id = sp.dispensary_id
JOIN states s ON s.id = d.state_id
WHERE sp.brand_name_raw = $1
AND sp.is_in_stock = TRUE
GROUP BY s.code, s.name, s.recreational_legal, s.medical_legal
),
@@ -133,7 +137,8 @@ export class BrandPenetrationService {
s.code AS state_code,
COUNT(DISTINCT sp.dispensary_id) AS total_dispensaries
FROM store_products sp
JOIN states s ON s.id = sp.state_id
JOIN dispensaries d ON d.id = sp.dispensary_id
JOIN states s ON s.id = d.state_id
WHERE sp.is_in_stock = TRUE
GROUP BY s.code
)
@@ -169,7 +174,7 @@ export class BrandPenetrationService {
let filters = '';
if (options.category) {
filters += ` AND sp.category = $${paramIdx}`;
filters += ` AND sp.category_raw = $${paramIdx}`;
params.push(options.category);
paramIdx++;
}
@@ -183,31 +188,33 @@ export class BrandPenetrationService {
const result = await this.pool.query(`
WITH brand_metrics AS (
SELECT
sp.brand_name,
sp.category,
sp.brand_name_raw AS brand_name,
sp.category_raw AS category,
s.code AS state_code,
COUNT(*) AS sku_count,
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
AVG(sp.price_rec) AS avg_price
FROM store_products sp
JOIN states s ON s.id = sp.state_id
WHERE sp.brand_name = $1
JOIN dispensaries d ON d.id = sp.dispensary_id
JOIN states s ON s.id = d.state_id
WHERE sp.brand_name_raw = $1
AND sp.is_in_stock = TRUE
AND sp.category IS NOT NULL
AND sp.category_raw IS NOT NULL
${filters}
GROUP BY sp.brand_name, sp.category, s.code
GROUP BY sp.brand_name_raw, sp.category_raw, s.code
),
category_totals AS (
SELECT
sp.category,
sp.category_raw AS category,
s.code AS state_code,
COUNT(*) AS total_skus,
AVG(sp.price_rec) AS category_avg_price
FROM store_products sp
JOIN states s ON s.id = sp.state_id
JOIN dispensaries d ON d.id = sp.dispensary_id
JOIN states s ON s.id = d.state_id
WHERE sp.is_in_stock = TRUE
AND sp.category IS NOT NULL
GROUP BY sp.category, s.code
AND sp.category_raw IS NOT NULL
GROUP BY sp.category_raw, s.code
)
SELECT
bm.*,
@@ -243,8 +250,9 @@ export class BrandPenetrationService {
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
FROM store_products sp
JOIN states s ON s.id = sp.state_id
WHERE sp.brand_name = $1
JOIN dispensaries d ON d.id = sp.dispensary_id
JOIN states s ON s.id = d.state_id
WHERE sp.brand_name_raw = $1
AND sp.is_in_stock = TRUE
AND s.recreational_legal = TRUE
),
@@ -255,8 +263,9 @@ export class BrandPenetrationService {
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
ROUND(COUNT(*)::NUMERIC / NULLIF(COUNT(DISTINCT sp.dispensary_id), 0), 2) AS avg_skus
FROM store_products sp
JOIN states s ON s.id = sp.state_id
WHERE sp.brand_name = $1
JOIN dispensaries d ON d.id = sp.dispensary_id
JOIN states s ON s.id = d.state_id
WHERE sp.brand_name_raw = $1
AND sp.is_in_stock = TRUE
AND s.medical_legal = TRUE
AND (s.recreational_legal = FALSE OR s.recreational_legal IS NULL)
@@ -311,23 +320,24 @@ export class BrandPenetrationService {
}
if (category) {
filters += ` AND sp.category = $${paramIdx}`;
filters += ` AND sp.category_raw = $${paramIdx}`;
params.push(category);
paramIdx++;
}
const result = await this.pool.query(`
SELECT
sp.brand_name,
sp.brand_name_raw AS brand_name,
COUNT(DISTINCT sp.dispensary_id) AS dispensary_count,
COUNT(*) AS sku_count,
COUNT(DISTINCT s.code) AS state_count
FROM store_products sp
LEFT JOIN states s ON s.id = sp.state_id
WHERE sp.brand_name IS NOT NULL
JOIN dispensaries d ON d.id = sp.dispensary_id
LEFT JOIN states s ON s.id = d.state_id
WHERE sp.brand_name_raw IS NOT NULL
AND sp.is_in_stock = TRUE
${filters}
GROUP BY sp.brand_name
GROUP BY sp.brand_name_raw
ORDER BY dispensary_count DESC, sku_count DESC
LIMIT $1
`, params);
@@ -358,23 +368,23 @@ export class BrandPenetrationService {
const result = await this.pool.query(`
WITH start_counts AS (
SELECT
brand_name,
brand_name_raw AS brand_name,
COUNT(DISTINCT dispensary_id) AS dispensary_count
FROM store_product_snapshots
WHERE captured_at >= $1 AND captured_at < $1 + INTERVAL '1 day'
AND brand_name IS NOT NULL
AND brand_name_raw IS NOT NULL
AND is_in_stock = TRUE
GROUP BY brand_name
GROUP BY brand_name_raw
),
end_counts AS (
SELECT
brand_name,
brand_name_raw AS brand_name,
COUNT(DISTINCT dispensary_id) AS dispensary_count
FROM store_product_snapshots
WHERE captured_at >= $2 - INTERVAL '1 day' AND captured_at <= $2
AND brand_name IS NOT NULL
AND brand_name_raw IS NOT NULL
AND is_in_stock = TRUE
GROUP BY brand_name
GROUP BY brand_name_raw
)
SELECT
COALESCE(sc.brand_name, ec.brand_name) AS brand_name,
@@ -401,6 +411,225 @@ export class BrandPenetrationService {
change_percent: row.change_percent ? parseFloat(row.change_percent) : 0,
}));
}
/**
* Get brand promotional history
*
* Tracks when products went on special, how long, what discount,
* and estimated quantity sold during the promotion.
*/
async getBrandPromotionalHistory(
brandName: string,
options: { window?: TimeWindow; customRange?: DateRange; stateCode?: string; category?: string } = {}
): Promise<BrandPromotionalSummary> {
const { window = '90d', customRange, stateCode, category } = options;
const { start, end } = getDateRangeFromWindow(window, customRange);
// Build filters
const params: any[] = [brandName, start, end];
let paramIdx = 4;
let filters = '';
if (stateCode) {
filters += ` AND s.code = $${paramIdx}`;
params.push(stateCode);
paramIdx++;
}
if (category) {
filters += ` AND sp.category_raw = $${paramIdx}`;
params.push(category);
paramIdx++;
}
// Find promotional events by detecting when is_on_special transitions to TRUE
// and tracking until it transitions back to FALSE
const eventsResult = await this.pool.query(`
WITH snapshot_with_lag AS (
SELECT
sps.id,
sps.store_product_id,
sps.dispensary_id,
sps.brand_name_raw,
sps.name_raw,
sps.category_raw,
sps.is_on_special,
sps.price_rec,
sps.price_rec_special,
sps.stock_quantity,
sps.captured_at,
LAG(sps.is_on_special) OVER (
PARTITION BY sps.store_product_id
ORDER BY sps.captured_at
) AS prev_is_on_special,
LAG(sps.stock_quantity) OVER (
PARTITION BY sps.store_product_id
ORDER BY sps.captured_at
) AS prev_stock_quantity
FROM store_product_snapshots sps
JOIN store_products sp ON sp.id = sps.store_product_id
JOIN dispensaries dd ON dd.id = sp.dispensary_id
LEFT JOIN states s ON s.id = dd.state_id
WHERE sps.brand_name_raw = $1
AND sps.captured_at >= $2
AND sps.captured_at <= $3
${filters}
),
special_starts AS (
-- Find when specials START (transition from not-on-special to on-special)
SELECT
store_product_id,
dispensary_id,
name_raw,
category_raw,
captured_at AS special_start,
price_rec AS regular_price,
price_rec_special AS special_price,
stock_quantity AS quantity_at_start
FROM snapshot_with_lag
WHERE is_on_special = TRUE
AND (prev_is_on_special = FALSE OR prev_is_on_special IS NULL)
AND price_rec_special IS NOT NULL
AND price_rec IS NOT NULL
),
special_ends AS (
-- Find when specials END (transition from on-special to not-on-special)
SELECT
store_product_id,
captured_at AS special_end,
prev_stock_quantity AS quantity_at_end
FROM snapshot_with_lag
WHERE is_on_special = FALSE
AND prev_is_on_special = TRUE
),
matched_events AS (
SELECT
ss.store_product_id,
ss.dispensary_id,
ss.name_raw AS product_name,
ss.category_raw AS category,
ss.special_start,
se.special_end,
ss.regular_price,
ss.special_price,
ss.quantity_at_start,
COALESCE(se.quantity_at_end, ss.quantity_at_start) AS quantity_at_end
FROM special_starts ss
LEFT JOIN special_ends se ON se.store_product_id = ss.store_product_id
AND se.special_end > ss.special_start
AND se.special_end = (
SELECT MIN(se2.special_end)
FROM special_ends se2
WHERE se2.store_product_id = ss.store_product_id
AND se2.special_end > ss.special_start
)
)
SELECT
me.store_product_id,
me.dispensary_id,
d.name AS dispensary_name,
s.code AS state_code,
me.product_name,
me.category,
me.special_start,
me.special_end,
EXTRACT(DAY FROM COALESCE(me.special_end, NOW()) - me.special_start)::INT AS duration_days,
me.regular_price,
me.special_price,
ROUND(((me.regular_price - me.special_price) / NULLIF(me.regular_price, 0)) * 100, 1) AS discount_percent,
me.quantity_at_start,
me.quantity_at_end,
GREATEST(0, COALESCE(me.quantity_at_start, 0) - COALESCE(me.quantity_at_end, 0)) AS quantity_sold_estimate
FROM matched_events me
JOIN dispensaries d ON d.id = me.dispensary_id
LEFT JOIN states s ON s.id = d.state_id
ORDER BY me.special_start DESC
`, params);
const events: BrandPromotionalEvent[] = eventsResult.rows.map((row: any) => ({
product_name: row.product_name,
store_product_id: parseInt(row.store_product_id),
dispensary_id: parseInt(row.dispensary_id),
dispensary_name: row.dispensary_name,
state_code: row.state_code || 'Unknown',
category: row.category,
special_start: row.special_start.toISOString().split('T')[0],
special_end: row.special_end ? row.special_end.toISOString().split('T')[0] : null,
duration_days: row.duration_days ? parseInt(row.duration_days) : null,
regular_price: parseFloat(row.regular_price) || 0,
special_price: parseFloat(row.special_price) || 0,
discount_percent: parseFloat(row.discount_percent) || 0,
quantity_at_start: row.quantity_at_start ? parseInt(row.quantity_at_start) : null,
quantity_at_end: row.quantity_at_end ? parseInt(row.quantity_at_end) : null,
quantity_sold_estimate: row.quantity_sold_estimate ? parseInt(row.quantity_sold_estimate) : null,
}));
// Calculate summary stats
const totalEvents = events.length;
const uniqueProducts = new Set(events.map(e => e.store_product_id)).size;
const uniqueDispensaries = new Set(events.map(e => e.dispensary_id)).size;
const uniqueStates = [...new Set(events.map(e => e.state_code))];
const avgDiscount = totalEvents > 0
? events.reduce((sum, e) => sum + e.discount_percent, 0) / totalEvents
: 0;
const durations = events.filter(e => e.duration_days !== null).map(e => e.duration_days!);
const avgDuration = durations.length > 0
? durations.reduce((sum, d) => sum + d, 0) / durations.length
: null;
const totalQuantitySold = events
.filter(e => e.quantity_sold_estimate !== null)
.reduce((sum, e) => sum + (e.quantity_sold_estimate || 0), 0);
// Calculate frequency
const windowDays = Math.ceil((end.getTime() - start.getTime()) / (1000 * 60 * 60 * 24));
const weeklyAvg = windowDays > 0 ? (totalEvents / windowDays) * 7 : 0;
const monthlyAvg = windowDays > 0 ? (totalEvents / windowDays) * 30 : 0;
// Group by category
const categoryMap = new Map<string, { count: number; discounts: number[]; quantity: number }>();
for (const event of events) {
const cat = event.category || 'Uncategorized';
if (!categoryMap.has(cat)) {
categoryMap.set(cat, { count: 0, discounts: [], quantity: 0 });
}
const entry = categoryMap.get(cat)!;
entry.count++;
entry.discounts.push(event.discount_percent);
if (event.quantity_sold_estimate !== null) {
entry.quantity += event.quantity_sold_estimate;
}
}
const byCategory = Array.from(categoryMap.entries()).map(([category, data]) => ({
category,
event_count: data.count,
avg_discount_percent: data.discounts.length > 0
? Math.round((data.discounts.reduce((a, b) => a + b, 0) / data.discounts.length) * 10) / 10
: 0,
quantity_sold_estimate: data.quantity > 0 ? data.quantity : null,
})).sort((a, b) => b.event_count - a.event_count);
return {
brand_name: brandName,
window,
total_promotional_events: totalEvents,
total_products_on_special: uniqueProducts,
total_dispensaries_with_specials: uniqueDispensaries,
states_with_specials: uniqueStates,
avg_discount_percent: Math.round(avgDiscount * 10) / 10,
avg_duration_days: avgDuration !== null ? Math.round(avgDuration * 10) / 10 : null,
total_quantity_sold_estimate: totalQuantitySold > 0 ? totalQuantitySold : null,
promotional_frequency: {
weekly_avg: Math.round(weeklyAvg * 10) / 10,
monthly_avg: Math.round(monthlyAvg * 10) / 10,
},
by_category: byCategory,
events,
};
}
}
export default BrandPenetrationService;

View File

@@ -259,6 +259,122 @@ export class StoreAnalyticsService {
}));
}
/**
* Get quantity changes for a store (increases/decreases)
* Useful for estimating sales (decreases) or restocks (increases)
*
* @param direction - 'decrease' for likely sales, 'increase' for restocks, 'all' for both
*/
async getQuantityChanges(
dispensaryId: number,
options: {
window?: TimeWindow;
customRange?: DateRange;
direction?: 'increase' | 'decrease' | 'all';
limit?: number;
} = {}
): Promise<{
dispensary_id: number;
window: TimeWindow;
direction: string;
total_changes: number;
total_units_decreased: number;
total_units_increased: number;
changes: Array<{
store_product_id: number;
product_name: string;
brand_name: string | null;
category: string | null;
old_quantity: number;
new_quantity: number;
quantity_delta: number;
direction: 'increase' | 'decrease';
captured_at: string;
}>;
}> {
const { window = '7d', customRange, direction = 'all', limit = 100 } = options;
const { start, end } = getDateRangeFromWindow(window, customRange);
// Build direction filter
let directionFilter = '';
if (direction === 'decrease') {
directionFilter = 'AND qty_delta < 0';
} else if (direction === 'increase') {
directionFilter = 'AND qty_delta > 0';
}
const result = await this.pool.query(`
WITH qty_changes AS (
SELECT
sps.store_product_id,
sp.name_raw AS product_name,
sp.brand_name_raw AS brand_name,
sp.category_raw AS category,
LAG(sps.stock_quantity) OVER w AS old_quantity,
sps.stock_quantity AS new_quantity,
sps.stock_quantity - LAG(sps.stock_quantity) OVER w AS qty_delta,
sps.captured_at
FROM store_product_snapshots sps
JOIN store_products sp ON sp.id = sps.store_product_id
WHERE sps.dispensary_id = $1
AND sps.captured_at >= $2
AND sps.captured_at <= $3
AND sps.stock_quantity IS NOT NULL
WINDOW w AS (PARTITION BY sps.store_product_id ORDER BY sps.captured_at)
)
SELECT *
FROM qty_changes
WHERE old_quantity IS NOT NULL
AND qty_delta != 0
${directionFilter}
ORDER BY captured_at DESC
LIMIT $4
`, [dispensaryId, start, end, limit]);
// Calculate totals
const totalsResult = await this.pool.query(`
WITH qty_changes AS (
SELECT
sps.stock_quantity - LAG(sps.stock_quantity) OVER w AS qty_delta
FROM store_product_snapshots sps
WHERE sps.dispensary_id = $1
AND sps.captured_at >= $2
AND sps.captured_at <= $3
AND sps.stock_quantity IS NOT NULL
AND sps.store_product_id IS NOT NULL
WINDOW w AS (PARTITION BY sps.store_product_id ORDER BY sps.captured_at)
)
SELECT
COUNT(*) FILTER (WHERE qty_delta != 0) AS total_changes,
COALESCE(SUM(ABS(qty_delta)) FILTER (WHERE qty_delta < 0), 0) AS units_decreased,
COALESCE(SUM(qty_delta) FILTER (WHERE qty_delta > 0), 0) AS units_increased
FROM qty_changes
WHERE qty_delta IS NOT NULL
`, [dispensaryId, start, end]);
const totals = totalsResult.rows[0] || {};
return {
dispensary_id: dispensaryId,
window,
direction,
total_changes: parseInt(totals.total_changes) || 0,
total_units_decreased: parseInt(totals.units_decreased) || 0,
total_units_increased: parseInt(totals.units_increased) || 0,
changes: result.rows.map((row: any) => ({
store_product_id: row.store_product_id,
product_name: row.product_name,
brand_name: row.brand_name,
category: row.category,
old_quantity: row.old_quantity,
new_quantity: row.new_quantity,
quantity_delta: row.qty_delta,
direction: row.qty_delta > 0 ? 'increase' : 'decrease',
captured_at: row.captured_at?.toISOString() || null,
})),
};
}
/**
* Get store inventory composition (categories and brands breakdown)
*/

View File

@@ -322,3 +322,48 @@ export interface RecVsMedPriceComparison {
};
price_diff_percent: number | null;
}
// ============================================================
// BRAND PROMOTIONAL ANALYTICS TYPES
// ============================================================
export interface BrandPromotionalEvent {
product_name: string;
store_product_id: number;
dispensary_id: number;
dispensary_name: string;
state_code: string;
category: string | null;
special_start: string; // ISO date when special started
special_end: string | null; // ISO date when special ended (null if ongoing)
duration_days: number | null;
regular_price: number;
special_price: number;
discount_percent: number;
quantity_at_start: number | null;
quantity_at_end: number | null;
quantity_sold_estimate: number | null; // quantity_at_start - quantity_at_end
}
export interface BrandPromotionalSummary {
brand_name: string;
window: TimeWindow;
total_promotional_events: number;
total_products_on_special: number;
total_dispensaries_with_specials: number;
states_with_specials: string[];
avg_discount_percent: number;
avg_duration_days: number | null;
total_quantity_sold_estimate: number | null;
promotional_frequency: {
weekly_avg: number;
monthly_avg: number;
};
by_category: Array<{
category: string;
event_count: number;
avg_discount_percent: number;
quantity_sold_estimate: number | null;
}>;
events: BrandPromotionalEvent[];
}

View File

@@ -61,6 +61,13 @@ export interface Proxy {
failureCount: number;
successCount: number;
avgResponseTimeMs: number | null;
maxConnections: number; // Number of concurrent connections allowed (for rotating proxies)
// Location info (if known)
city?: string;
state?: string;
country?: string;
countryCode?: string;
timezone?: string;
}
export interface ProxyStats {
@@ -109,18 +116,27 @@ export class ProxyRotator {
username,
password,
protocol,
is_active as "isActive",
last_used_at as "lastUsedAt",
active as "isActive",
last_tested_at as "lastUsedAt",
failure_count as "failureCount",
success_count as "successCount",
avg_response_time_ms as "avgResponseTimeMs"
0 as "successCount",
response_time_ms as "avgResponseTimeMs",
COALESCE(max_connections, 1) as "maxConnections",
city,
state,
country,
country_code as "countryCode",
timezone
FROM proxies
WHERE is_active = true
ORDER BY failure_count ASC, last_used_at ASC NULLS FIRST
WHERE active = true
ORDER BY failure_count ASC, last_tested_at ASC NULLS FIRST
`);
this.proxies = result.rows;
console.log(`[ProxyRotator] Loaded ${this.proxies.length} active proxies`);
// Calculate total concurrent capacity
const totalCapacity = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0);
console.log(`[ProxyRotator] Loaded ${this.proxies.length} active proxies (${totalCapacity} max concurrent connections)`);
} catch (error) {
// Table might not exist - that's okay
console.warn(`[ProxyRotator] Could not load proxies: ${error}`);
@@ -192,11 +208,11 @@ export class ProxyRotator {
UPDATE proxies
SET
failure_count = failure_count + 1,
last_failure_at = NOW(),
last_error = $2,
is_active = CASE WHEN failure_count >= 4 THEN false ELSE is_active END
updated_at = NOW(),
test_result = $2,
active = CASE WHEN failure_count >= 4 THEN false ELSE active END
WHERE id = $1
`, [proxyId, error || null]);
`, [proxyId, error || 'failed']);
} catch (err) {
console.error(`[ProxyRotator] Failed to update proxy ${proxyId}:`, err);
}
@@ -226,12 +242,13 @@ export class ProxyRotator {
await this.pool.query(`
UPDATE proxies
SET
success_count = success_count + 1,
last_used_at = NOW(),
avg_response_time_ms = CASE
WHEN avg_response_time_ms IS NULL THEN $2
ELSE (avg_response_time_ms * 0.8) + ($2 * 0.2)
END
last_tested_at = NOW(),
test_result = 'success',
response_time_ms = CASE
WHEN response_time_ms IS NULL THEN $2
ELSE (response_time_ms * 0.8 + $2 * 0.2)::integer
END,
updated_at = NOW()
WHERE id = $1
`, [proxyId, responseTimeMs || null]);
} catch (err) {
@@ -255,7 +272,7 @@ export class ProxyRotator {
*/
getStats(): ProxyStats {
const totalProxies = this.proxies.length;
const activeProxies = this.proxies.filter(p => p.isActive).length;
const activeProxies = this.proxies.reduce((sum, p) => sum + p.maxConnections, 0); // Total concurrent capacity
const blockedProxies = this.proxies.filter(p => p.failureCount >= 5).length;
const successRates = this.proxies
@@ -268,7 +285,7 @@ export class ProxyRotator {
return {
totalProxies,
activeProxies,
activeProxies, // Total concurrent capacity across all proxies
blockedProxies,
avgSuccessRate,
};
@@ -402,6 +419,26 @@ export class CrawlRotator {
await this.proxy.markFailed(current.id, error);
}
}
/**
* Get current proxy location info (for reporting)
* Note: For rotating proxies (like IPRoyal), the actual exit location varies per request
*/
getProxyLocation(): { city?: string; state?: string; country?: string; timezone?: string; isRotating: boolean } | null {
const current = this.proxy.getCurrent();
if (!current) return null;
// Check if this is a rotating proxy (max_connections > 1 usually indicates rotating)
const isRotating = current.maxConnections > 1;
return {
city: current.city,
state: current.state,
country: current.country,
timezone: current.timezone,
isRotating
};
}
}
// ============================================================

View File

@@ -0,0 +1,134 @@
/**
* IP2Location Service
*
* Uses local IP2Location LITE DB3 database for IP geolocation.
* No external API calls, no rate limits.
*
* Database: IP2Location LITE DB3 (free, monthly updates)
* Fields: country, region, city, latitude, longitude
*/
import path from 'path';
import fs from 'fs';
// @ts-ignore - no types for ip2location-nodejs
const { IP2Location } = require('ip2location-nodejs');
const DB_PATH = process.env.IP2LOCATION_DB_PATH ||
path.join(__dirname, '../../data/ip2location/IP2LOCATION-LITE-DB5.BIN');
let ip2location: any = null;
let dbLoaded = false;
/**
* Initialize IP2Location database
*/
export function initIP2Location(): boolean {
if (dbLoaded) return true;
try {
if (!fs.existsSync(DB_PATH)) {
console.warn(`IP2Location database not found at: ${DB_PATH}`);
console.warn('Run: ./scripts/download-ip2location.sh to download');
return false;
}
ip2location = new IP2Location();
ip2location.open(DB_PATH);
dbLoaded = true;
console.log('IP2Location database loaded successfully');
return true;
} catch (err) {
console.error('Failed to load IP2Location database:', err);
return false;
}
}
/**
* Close IP2Location database
*/
export function closeIP2Location(): void {
if (ip2location) {
ip2location.close();
ip2location = null;
dbLoaded = false;
}
}
export interface GeoLocation {
city: string | null;
state: string | null;
stateCode: string | null;
country: string | null;
countryCode: string | null;
lat: number | null;
lng: number | null;
}
/**
* Lookup IP address location
*
* @param ip - IPv4 or IPv6 address
* @returns Location data or null if not found
*/
export function lookupIP(ip: string): GeoLocation | null {
// Skip private/localhost IPs
if (!ip || ip === '127.0.0.1' || ip === '::1' ||
ip.startsWith('192.168.') || ip.startsWith('10.') ||
ip.startsWith('172.16.') || ip.startsWith('172.17.') ||
ip.startsWith('::ffff:127.') || ip.startsWith('::ffff:192.168.') ||
ip.startsWith('::ffff:10.')) {
return null;
}
// Strip IPv6 prefix if present
const cleanIP = ip.replace(/^::ffff:/, '');
// Initialize on first use if not already loaded
if (!dbLoaded) {
if (!initIP2Location()) {
return null;
}
}
try {
const result = ip2location.getAll(cleanIP);
if (!result || result.ip === '?' || result.countryShort === '-') {
return null;
}
// DB3 LITE doesn't include lat/lng - would need DB5+ for that
const lat = typeof result.latitude === 'number' && result.latitude !== 0 ? result.latitude : null;
const lng = typeof result.longitude === 'number' && result.longitude !== 0 ? result.longitude : null;
return {
city: result.city !== '-' ? result.city : null,
state: result.region !== '-' ? result.region : null,
stateCode: null, // DB3 doesn't include state codes
country: result.countryLong !== '-' ? result.countryLong : null,
countryCode: result.countryShort !== '-' ? result.countryShort : null,
lat,
lng,
};
} catch (err) {
console.error('IP2Location lookup error:', err);
return null;
}
}
/**
* Check if IP2Location database is available
*/
export function isIP2LocationAvailable(): boolean {
if (dbLoaded) return true;
return fs.existsSync(DB_PATH);
}
// Export singleton-style interface
export default {
init: initIP2Location,
close: closeIP2Location,
lookup: lookupIP,
isAvailable: isIP2LocationAvailable,
};

View File

@@ -276,7 +276,6 @@ export async function addProxiesFromList(proxies: Array<{
await pool.query(`
INSERT INTO proxies (host, port, protocol, username, password, active)
VALUES ($1, $2, $3, $4, $5, false)
ON CONFLICT (host, port, protocol) DO NOTHING
`, [
proxy.host,
proxy.port,
@@ -285,28 +284,10 @@ export async function addProxiesFromList(proxies: Array<{
proxy.password
]);
// Check if it was actually inserted
const result = await pool.query(`
SELECT id FROM proxies
WHERE host = $1 AND port = $2 AND protocol = $3
`, [proxy.host, proxy.port, proxy.protocol]);
if (result.rows.length > 0) {
// Check if it was just inserted (no last_tested_at means new)
const checkResult = await pool.query(`
SELECT last_tested_at FROM proxies
WHERE host = $1 AND port = $2 AND protocol = $3
`, [proxy.host, proxy.port, proxy.protocol]);
if (checkResult.rows[0].last_tested_at === null) {
added++;
if (added % 100 === 0) {
console.log(`📥 Imported ${added} proxies...`);
}
} else {
duplicates++;
}
}
} catch (error: any) {
failed++;
const errorMsg = `${proxy.host}:${proxy.port} - ${error.message}`;

View File

@@ -8,8 +8,12 @@ interface ProxyTestJob {
tested_proxies: number;
passed_proxies: number;
failed_proxies: number;
mode?: string; // 'all' | 'failed' | 'inactive'
}
// Concurrency settings
const DEFAULT_CONCURRENCY = 10; // Test 10 proxies at a time
// Simple in-memory queue - could be replaced with Bull/Bee-Queue for production
const activeJobs = new Map<number, { cancelled: boolean }>();
@@ -33,18 +37,40 @@ export async function cleanupOrphanedJobs(): Promise<void> {
}
}
export async function createProxyTestJob(): Promise<number> {
export type ProxyTestMode = 'all' | 'failed' | 'inactive';
export interface CreateJobResult {
jobId: number;
totalProxies: number;
}
export async function createProxyTestJob(mode: ProxyTestMode = 'all', concurrency: number = DEFAULT_CONCURRENCY): Promise<CreateJobResult> {
// Check for existing running jobs first
const existingJob = await getActiveProxyTestJob();
if (existingJob) {
throw new Error('A proxy test job is already running. Please cancel it first.');
}
const result = await pool.query(`
SELECT COUNT(*) as count FROM proxies
`);
// Get count based on mode
let countQuery: string;
switch (mode) {
case 'failed':
countQuery = `SELECT COUNT(*) as count FROM proxies WHERE test_result = 'failed' OR active = false`;
break;
case 'inactive':
countQuery = `SELECT COUNT(*) as count FROM proxies WHERE active = false`;
break;
default:
countQuery = `SELECT COUNT(*) as count FROM proxies`;
}
const result = await pool.query(countQuery);
const totalProxies = parseInt(result.rows[0].count);
if (totalProxies === 0) {
throw new Error(`No proxies to test with mode '${mode}'`);
}
const jobResult = await pool.query(`
INSERT INTO proxy_test_jobs (status, total_proxies)
VALUES ('pending', $1)
@@ -53,12 +79,12 @@ export async function createProxyTestJob(): Promise<number> {
const jobId = jobResult.rows[0].id;
// Start job in background
runProxyTestJob(jobId).catch(err => {
// Start job in background with mode and concurrency
runProxyTestJob(jobId, mode, concurrency).catch(err => {
console.error(`❌ Proxy test job ${jobId} failed:`, err);
});
return jobId;
return { jobId, totalProxies };
}
export async function getProxyTestJob(jobId: number): Promise<ProxyTestJob | null> {
@@ -111,7 +137,7 @@ export async function cancelProxyTestJob(jobId: number): Promise<boolean> {
return result.rows.length > 0;
}
async function runProxyTestJob(jobId: number): Promise<void> {
async function runProxyTestJob(jobId: number, mode: ProxyTestMode = 'all', concurrency: number = DEFAULT_CONCURRENCY): Promise<void> {
// Register job as active
activeJobs.set(jobId, { cancelled: false });
@@ -125,20 +151,30 @@ async function runProxyTestJob(jobId: number): Promise<void> {
WHERE id = $1
`, [jobId]);
console.log(`🔍 Starting proxy test job ${jobId}...`);
console.log(`🔍 Starting proxy test job ${jobId} (mode: ${mode}, concurrency: ${concurrency})...`);
// Get all proxies
const result = await pool.query(`
SELECT id, host, port, protocol, username, password
FROM proxies
ORDER BY id
`);
// Get proxies based on mode
let query: string;
switch (mode) {
case 'failed':
query = `SELECT id, host, port, protocol, username, password FROM proxies WHERE test_result = 'failed' OR active = false ORDER BY id`;
break;
case 'inactive':
query = `SELECT id, host, port, protocol, username, password FROM proxies WHERE active = false ORDER BY id`;
break;
default:
query = `SELECT id, host, port, protocol, username, password FROM proxies ORDER BY id`;
}
const result = await pool.query(query);
const proxies = result.rows;
let tested = 0;
let passed = 0;
let failed = 0;
for (const proxy of result.rows) {
// Process proxies in batches for parallel testing
for (let i = 0; i < proxies.length; i += concurrency) {
// Check if job was cancelled
const jobControl = activeJobs.get(jobId);
if (jobControl?.cancelled) {
@@ -146,7 +182,11 @@ async function runProxyTestJob(jobId: number): Promise<void> {
break;
}
// Test the proxy
const batch = proxies.slice(i, i + concurrency);
// Test batch in parallel
const batchResults = await Promise.all(
batch.map(async (proxy) => {
const testResult = await testProxy(
proxy.host,
proxy.port,
@@ -158,12 +198,19 @@ async function runProxyTestJob(jobId: number): Promise<void> {
// Save result
await saveProxyTestResult(proxy.id, testResult);
return testResult.success;
})
);
// Count results
for (const success of batchResults) {
tested++;
if (testResult.success) {
if (success) {
passed++;
} else {
failed++;
}
}
// Update job progress
await pool.query(`
@@ -175,10 +222,8 @@ async function runProxyTestJob(jobId: number): Promise<void> {
WHERE id = $4
`, [tested, passed, failed, jobId]);
// Log progress every 10 proxies
if (tested % 10 === 0) {
console.log(`📊 Job ${jobId}: ${tested}/${result.rows.length} proxies tested (${passed} passed, ${failed} failed)`);
}
// Log progress
console.log(`📊 Job ${jobId}: ${tested}/${proxies.length} proxies tested (${passed} passed, ${failed} failed)`);
}
// Mark job as completed

View File

@@ -3,7 +3,7 @@ import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import { Browser, Page } from 'puppeteer';
import { SocksProxyAgent } from 'socks-proxy-agent';
import { pool } from '../db/pool';
import { uploadImageFromUrl, getImageUrl } from '../utils/minio';
import { downloadProductImageLegacy } from '../utils/image-storage';
import { logger } from './logger';
import { registerScraper, updateScraperStats, completeScraper } from '../routes/scraper-monitor';
import { incrementProxyFailure, getActiveProxy, isBotDetectionError, putProxyInTimeout } from './proxy';
@@ -767,7 +767,8 @@ export async function saveProducts(storeId: number, categoryId: number, products
if (product.imageUrl && !localImagePath) {
try {
localImagePath = await uploadImageFromUrl(product.imageUrl, productId);
const result = await downloadProductImageLegacy(product.imageUrl, 0, productId);
localImagePath = result.urls?.original || null;
await client.query(`
UPDATE products
SET local_image_path = $1

Some files were not shown because too many files have changed in this diff Show More