Compare commits

...

198 Commits

Author SHA1 Message Date
Kelly
754a46c56f chore: trigger CI rebuild
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2025-12-16 09:19:52 -07:00
Kelly
e450d2e99e fix(ci): use local registry mirror instead of mirror.gcr.io
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
Switch Kaniko registry-mirror from mirror.gcr.io to 10.100.9.70:5000
to pull base images from local registry instead of GCR.
2025-12-16 09:09:15 -07:00
Kelly
205a8b3159 chore: retry CI for visibility-events fix
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-16 08:56:59 -07:00
Kelly
8bd29d11bb fix: Use correct column names in visibility-events query
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
Changed name -> name_raw and brand -> brand_name_raw to match
store_products table schema.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-16 02:21:01 -07:00
Kelly
4e7b3d2336 fix: Update DATABASE_URL to point to primary PostgreSQL server
Changed from 10.100.6.50 (secondary/replica in read-only mode) to
10.100.7.50 (primary) to fix read-only transaction errors.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-16 01:13:49 -07:00
Kelly
849123693a fix(ci): Use unquoted heredoc for kubeconfig token injection
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
- Changed heredoc from 'KUBEEOF' (quoted) to KUBEEOF (unquoted)
- This allows shell variable expansion of $K8S_TOKEN directly
- Removed sed replacement step that was failing due to YAML escaping issues
2025-12-15 21:55:52 -07:00
Kelly
a1227f77b9 chore: retry CI
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 21:53:00 -07:00
Kelly
415e89a012 chore: retry CI with k8s_token secret
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 21:26:06 -07:00
Kelly
45844c6281 ci: Embed kubeconfig, use k8s_token secret for token only 2025-12-15 21:19:26 -07:00
Kelly
24c9586d81 ci: Skip base64 - use raw kubeconfig in secret
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 21:09:54 -07:00
Kelly
f8d61446d5 chore: retry CI with correct kubeconfig
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 20:57:19 -07:00
Kelly
0f859d1c75 chore: retry CI after kubeconfig fix
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 20:43:40 -07:00
Kelly
52dc669782 ci: Remove clone/volume config (requires admin trust)
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
Woodpecker doesn't allow custom clone or volumes without elevated trust.
Kaniko layer caching (--cache-repo) still works (registry-based).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 20:16:18 -07:00
Kelly
2e47996354 ci: Add shallow git clone (depth: 1)
Only fetch latest commit instead of full history.
Reduces checkout time and bandwidth.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 20:05:33 -07:00
Kelly
f25d4eaf27 ci: Add npm and Docker layer caching
- PR steps: shared npm-cache volume for faster npm ci
- Docker builds: --cache-repo to local registry for layer caching
- Kaniko will reuse npm install layer when package.json unchanged

First build populates cache, subsequent builds much faster.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 20:04:15 -07:00
Kelly
61a6be888c ci: Consolidate back to 4 docker steps
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
- Remove separate build steps (didn't save time)
- Use original multi-stage Dockerfiles
- Delete unused Dockerfile.ci files
- 4 parallel docker builds + deploy

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 20:01:11 -07:00
Kelly
09c2b3a0e1 ci: Use node:22 instead of node:22-alpine for builds
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
Alpine uses musl libc which breaks Rollup's native bindings.
Debian-based node:22 uses glibc and works correctly.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 19:56:48 -07:00
Kelly
cec34198c7 ci: Add slim Dockerfile.ci files for faster CI builds
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
- Add Dockerfile.ci for backend, cannaiq, findadispo, findagram
- Frontend Dockerfiles just copy pre-built assets to nginx
- Backend Dockerfile copies pre-built dist/node_modules
- Reduces Docker build time by doing npm ci/build in CI step

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 19:43:08 -07:00
Kelly
3c10e07e45 feat(ci): Push built images to local registry for faster K8s pulls
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
- Build images push to 10.100.9.70:5000/cannaiq/*
- Deploy pulls from local registry (no external network)
- Removed git.spdy.io registry auth (not needed for local)
- Added --insecure-registry for HTTP local registry
2025-12-15 19:16:16 -07:00
Kelly
3582c2e9e2 fix(k8s): Use external Postgres/Redis/MinIO services
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
- Update secrets.yaml with correct MinIO credentials
- Add Redis connection details
- Remove postgres.yaml (use external 10.100.6.50)
- Remove redis.yaml (use external 10.100.9.50)
2025-12-15 19:03:05 -07:00
Kelly
c6874977ee docs: Add spdy.io infrastructure credentials
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 18:59:18 -07:00
Kelly
68430f5c22 fix(ci): Use mirror.gcr.io as registry mirror for Kaniko
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 18:52:02 -07:00
Kelly
ccefd325aa fix(ci): Use hardcoded Woodpecker workspace path for Kaniko
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 18:49:56 -07:00
Kelly
e119c5af53 chore: trigger CI
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 18:44:24 -07:00
Kelly
e61224aaed fix(ci): Use CI_WORKSPACE for Kaniko context paths
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 18:42:33 -07:00
Kelly
7cf1b7643f feat(ci): Use local registry 10.100.9.70:5000 for base images
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 18:28:20 -07:00
Kelly
74f813d68f feat(ci): Switch to Kaniko for Docker builds (no daemon, better DNS)
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 18:20:53 -07:00
Kelly
f38f1024de fix(docker): Use mirror.gcr.io in all Dockerfiles to avoid rate limits
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 18:19:18 -07:00
Kelly
358099c58a chore: trigger CI
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 18:12:41 -07:00
Kelly
7fdcfc4fc4 fix(ci): Use mirror.gcr.io to avoid Docker Hub rate limits
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 18:11:16 -07:00
Kelly
541b461283 fix(ci): Use public node:20 image for typecheck steps
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 18:08:46 -07:00
Kelly
8f25cf10ab chore: retry CI
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 17:06:42 -07:00
Kelly
79e434212f chore: retry CI
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 16:45:05 -07:00
Kelly
600172eff6 chore: retry CI
Some checks are pending
ci/woodpecker/push/woodpecker Pipeline is running
2025-12-15 15:51:40 -07:00
Kelly
4c12763fa1 chore: retry CI
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 13:18:53 -07:00
Kelly
2cb9a093f4 chore: retry CI
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 12:29:45 -07:00
Kelly
15ab40a820 chore: trigger CI build
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 12:14:14 -07:00
Kelly
2708fbe319 feat(brands): Add calculated tags with configurable thresholds
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
Tags assigned per store:
- must_win: High-revenue store with room to grow SKUs
- at_risk: High OOS% (losing shelf presence)
- top_performer: High sales + good inventory management
- growth: Above-average velocity
- low_inventory: Low days on hand

Configurable via query params:
- ?must_win_max_skus=5
- ?at_risk_oos_pct=30
- ?top_performer_max_oos=15
- ?low_inventory_days=7

Response includes tag_thresholds showing applied values.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 12:06:44 -07:00
Kelly
231d49e3e8 feat(brands): Add margin estimation to stores/performance endpoint
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
- Add ?margin_pct query param (default 50% industry standard)
- Returns margin_pct and margin_est per store
- Includes margin_pct_assumed in response metadata

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 12:02:36 -07:00
Kelly
17defa046c feat(api): Add /api/brands/:brand/stores/performance endpoint
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
Add comprehensive per-store performance endpoint for Cannabrands integration.
Returns all metrics in one call for easy merging with internal order data.

Response includes per store:
- active_skus, oos_skus, total_skus, oos_pct
- avg_daily_units (velocity from inventory deltas)
- avg_days_on_hand (stock / daily velocity)
- total_sales_est (units × price × days)
- lost_opportunity (OOS days × velocity × price)
- categories breakdown (JSON object)
- avg_price, total_stock

Query params: ?days=28&state=AZ&limit=100&offset=0

Matches Hoodie Analytics columns for Order Management view.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 11:57:38 -07:00
Kelly
d76a5fb3c5 feat(api): Add brand analytics API endpoints
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
Add comprehensive brand-level analytics endpoints at /api/brands:

Brand Discovery:
- GET /api/brands - List all brands with summary metrics
- GET /api/brands/search - Search brands by name
- GET /api/brands/top - Top brands by distribution

Brand Overview:
- GET /api/brands/:brand - Full brand intelligence dashboard
- GET /api/brands/:brand/analytics - Alias for overview

Sales & Velocity:
- GET /api/brands/:brand/sales - Sales data (4wk, daily avg)
- GET /api/brands/:brand/velocity - Units/day by SKU
- GET /api/brands/:brand/trends - Weekly sales trends

Inventory & Stock:
- GET /api/brands/:brand/inventory - Current stock levels
- GET /api/brands/:brand/oos - Out-of-stock products
- GET /api/brands/:brand/low-stock - Products below threshold

Pricing:
- GET /api/brands/:brand/pricing - Current prices
- GET /api/brands/:brand/price-history - Price changes over time

Distribution:
- GET /api/brands/:brand/distribution - Store count, market coverage
- GET /api/brands/:brand/stores - Stores carrying brand
- GET /api/brands/:brand/gaps - Whitespace opportunities

Events & Alerts:
- GET /api/brands/:brand/events - Visibility events
- POST /api/brands/:brand/events/:id/ack - Acknowledge alert

Products:
- GET /api/brands/:brand/products - All SKUs with metrics
- GET /api/brands/:brand/products/:sku - Single product deep dive

All endpoints support ?state=XX, ?days=N, and ?category=X filters.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 11:06:23 -07:00
Kelly
f19fc59583 chore: retry CI
Some checks are pending
ci/woodpecker/push/woodpecker Pipeline is running
2025-12-15 09:59:11 -07:00
Kelly
4c183c87a9 chore: retry CI after registry fix
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 09:35:05 -07:00
Kelly
ffa05f89c4 chore: trigger CI on develop
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 09:06:43 -07:00
Kelly
9aa885211e ci: Allow builds from develop branch
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 09:00:42 -07:00
Kelly
b24b2fbc89 chore: trigger CI
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 08:55:00 -07:00
Kelly
f7371de3d1 fix: Use public Docker Hub images instead of private registry
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
Switch FROM directives to use public Docker Hub images:
- node:20-slim (instead of git.spdy.io/creationshop/node:20-slim)
- nginx:alpine (instead of git.spdy.io/creationshop/nginx:alpine)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 08:23:35 -07:00
Kelly
98970acf13 fix: Update Dockerfiles to use git.spdy.io registry
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 08:18:53 -07:00
Kelly
f0e636ac70 chore: trigger CI
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
2025-12-15 08:16:02 -07:00
Kelly
138ade07e1 chore: test CI 2025-12-15 08:11:33 -07:00
Kelly
728168e799 chore: trigger CI 2025-12-15 08:10:06 -07:00
Kelly
42c6bb7424 chore: trigger CI 2025-12-15 08:06:31 -07:00
Kelly
b32e847270 chore: test CI agent 2025-12-15 08:03:39 -07:00
Kelly
287627195c chore: trigger CI 2025-12-15 08:01:56 -07:00
Kelly
bfb965fa44 chore: trigger CI 2025-12-15 07:58:10 -07:00
Kelly
7bbc77a854 chore: trigger CI 2025-12-15 07:57:40 -07:00
Kelly
39ba522643 chore: trigger CI 2025-12-15 07:57:08 -07:00
Kelly
6ea4cd0d05 chore: trigger CI 2025-12-15 07:56:50 -07:00
Kelly
520cba9d31 chore: trigger CI 2025-12-15 07:56:44 -07:00
Kelly
331b6273ac chore: trigger build 2025-12-15 07:53:42 -07:00
Kelly
d4a18cc3ce chore: test CI agent 2025-12-15 07:51:47 -07:00
Kelly
977803d862 chore: trigger CI build 2025-12-15 07:48:39 -07:00
Kelly
48c640aae5 chore: trigger CI 2025-12-15 07:44:38 -07:00
Kelly
918a1c6b26 chore: trigger CI after Woodpecker activation 2025-12-15 07:26:25 -07:00
Kelly
c7541ec2eb chore: Rename all references from dispensary-scraper to cannaiq 2025-12-15 07:19:33 -07:00
Kelly
8676762d6b chore: trigger CI build 2025-12-15 06:51:58 -07:00
Kelly
3f393ef77f fix: Correct repo name in auto-merge URL 2025-12-15 06:42:16 -07:00
Kelly
a8360c7260 feat: Migrate to spdy.io infrastructure
- Namespace: dispensary-scraper → cannaiq
- Registry: code.cannabrands.app → git.spdy.io
- Database: External PostgreSQL at 10.100.6.50
- MinIO: Internal at 10.100.9.80:9000
- CI: ci.spdy.io

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-15 06:40:48 -07:00
Kelly
0979c9c37a Revert "feat(scheduler): Support sub-hour interval_minutes in task_schedules"
This reverts commit b607fd7f44.
2025-12-14 18:50:25 -07:00
Kelly
b607fd7f44 feat(scheduler): Support sub-hour interval_minutes in task_schedules
- Add interval_minutes column to TaskSchedule interface
- Prefer interval_minutes over interval_hours when calculating next_run_at
- Add jitter (0-20% of interval) for sub-hour schedules to prevent detection
- Update getSchedules() to include interval_minutes and dispensary_name
- Update updateSchedule() to allow setting interval_minutes
- Add migration 121 for interval_minutes column

Part of Real-Time Inventory Tracking feature.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 18:22:55 -07:00
Kelly
bf988529eb fix(ci): switch from buildx to regular docker plugin
BuildKit container driver has sysctl permission issues in LXC.
Using plugins/docker instead of woodpeckerci/plugin-docker-buildx.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 17:05:06 -07:00
Kelly
04153a2efa chore: retry CI after docker update 2025-12-14 17:01:05 -07:00
Kelly
a1a6876064 chore: retry CI after docker restart 2025-12-14 16:55:33 -07:00
Kelly
83466a03c3 chore: retry CI build 2025-12-14 16:40:52 -07:00
Kelly
35d6a17740 feat: Add daily baseline payload logic (12:01 AM - 3:00 AM window)
- Replace saveRawPayload with saveDailyBaseline in all handlers
- Full payloads only saved once per day per store during window
- Inventory snapshots still saved every crawl (lightweight tracking)
- Add last_baseline_at column to dispensaries table
- Show baseline status in Per-Store Schedules dashboard
- Display baseline window info (12:01 AM - 3:00 AM) in UI

Reduces storage ~95% for high-frequency stores while maintaining
full audit capability via daily baselines.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 16:24:41 -07:00
Kelly
294d3db7a2 fix: Remove NOW() from partial indexes (not immutable)
🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 15:58:05 -07:00
Kelly
bbbd21ba94 chore: Ignore test scripts and .claude directory 2025-12-14 15:57:27 -07:00
Kelly
3496be3064 feat(treez): Fetch all products with match_all query (+19% more)
- Update buildProductQuery() to use match_all by default
- Captures hidden, below-threshold, and out-of-stock products
- Add extractPrimaryImage() and extractImages() to normalizer
- Add product_refresh_treez handler for platform-specific refresh
- Add product_refresh_treez to TaskRole type

Best Dispensary: 228 → 271 products (+43)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 15:56:06 -07:00
Kelly
af859a85f9 feat: Add Real-Time Inventory Tracking infrastructure
Implements per-store high-frequency crawl scheduling and inventory
snapshot tracking for sales velocity estimation (Hoodie Analytics parity).

Database migrations:
- 117: Per-store crawl_interval_minutes and next_crawl_at columns
- 118: inventory_snapshots table (30-day retention)
- 119: product_visibility_events table for OOS/brand alerts (90-day)

Backend changes:
- inventory-snapshots.ts: Shared utility normalizing Dutchie/Jane/Treez
- visibility-events.ts: Detects OOS, price changes, brand drops
- task-scheduler.ts: checkHighFrequencyStores() runs every 60s
- Handler updates: 2-line additions to save snapshots/events

API endpoints:
- GET /api/tasks/schedules/high-frequency
- PUT /api/tasks/schedules/high-frequency/:id
- DELETE /api/tasks/schedules/high-frequency/:id

Frontend:
- TasksDashboard: Per-Store Schedules section with stats

Features:
- Per-store intervals (15/30/60 min configurable)
- Jitter (0-20%) to avoid detection patterns
- Cross-platform support (Dutchie, Jane, Treez)
- No crawler core changes - scheduling/post-crawl only

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 15:53:04 -07:00
Kelly
d3f5e4ef4b feat(nav): Add Payloads menu item to admin sidebar
🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 14:26:37 -07:00
Kelly
abef265ae9 feat(workers): Add platform badge (D/J/T) to active tasks display
- Add PlatformBadge component showing D=Dutchie, J=Jane, T=Treez
- Include platform field in worker-registry API response
- Fix null running_seconds displaying as "nulls"

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 12:21:23 -07:00
Kelly
b28a91fca5 fix: Task completion result and null duration display bugs
1. task-worker.ts: Pass full result object to completeTask instead of
   non-existent result.data property (was causing {} to be stored)

2. WorkersDashboard.tsx: Handle null running_seconds in formatSecondsToTime
   (was displaying "nulls" due to JS type coercion)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 12:02:05 -07:00
Kelly
60b221e7fb feat: Add payloads dashboard, disable snapshots, fix scheduler
Frontend:
- Add PayloadsDashboard page with search, filter, view, and diff
- Update TasksDashboard default sort: pending → claimed → completed
- Add payload API methods to api.ts

Backend:
- Disable snapshot creation in product-refresh handler
- Remove product_refresh from schedule role options
- Disable compression in payload-storage (plain JSON for debugging)
- Fix task-scheduler: map 'embedded' menu_type to 'dutchie' platform
- Fix task-scheduler: use schedule.interval_hours as skipRecentHours

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 11:54:25 -07:00
Kelly
15cb657f13 fix(docker): Revert to libasound2 for Debian bookworm
- libasound2t64 is for Debian trixie (13), not bookworm (12)
- Keep build tools fix for native modules

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 11:07:03 -07:00
Kelly
f15920e508 fix(docker): Add build tools for native modules and fix Debian package name
- Add python3 and build-essential to builder stage for bcrypt/sharp compilation
- Change libasound2 to libasound2t64 for Debian bookworm compatibility
- Copy pre-built node_modules from builder instead of re-running npm install
- Prune dev dependencies in builder for smaller production image

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 11:01:22 -07:00
Kelly
9518ca48a5 feat(tasks): Task tracking, IP-per-store, and schedule edit fixes
- Add task completion verification with DB and output layers
- Add reconciliation loop to sync worker memory with DB state
- Implement IP-per-store-per-platform conflict detection
- Add task ID hash to MinIO payload filenames for traceability
- Fix schedule edit modal with dispensary info in API responses
- Add task ID display after dispensary name in worker dashboard
- Add migrations for proxy_ip and source tracking columns

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 10:49:21 -07:00
Kelly
3e9667571f fix(ui): Restore round worker slot circles with hover tooltips 2025-12-14 09:58:24 -07:00
Kelly
8f6efd377b fix(ui): Remove 'test' from fingerprint tooltip 2025-12-14 03:40:17 -07:00
Kelly
83e9718d78 fix(ui): Worker slot preflight checklist and fingerprint hover
- Fix fingerprint tooltip to use actual API field names (browserName, deviceCategory, detectedTimezone)
- Show real preflight steps: HTTP Preflight, Geo Session, Pool Ready
- Checkmarks appear as each step completes, spinners while in progress

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 03:38:46 -07:00
Kelly
f5cb17e1d4 feat(dutchie): Full payload with specials and all product statuses
- Set includeEnterpriseSpecials: true to get BOGO/sale deal names
- Set Status: 'All' to capture both Active and Inactive (sold out) products
- Make schedules query backward-compatible for missing pool_id column

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 03:35:25 -07:00
Kelly
f48a503e82 fix(tasks): Filter out disabled dispensaries in createStaggeredTasks
Tasks were being created for dispensaries with crawl_enabled=false
(duplicates, deprecated stores). Added EXISTS check to filter only
crawl_enabled=true stores before creating tasks.

This prevents errors like:
"Dispensary 207 not found or not crawl_enabled"

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 03:07:03 -07:00
Kelly
e7b392141a feat(ui): Task pool toggle, sortable columns, worker slot visualization
Tasks Dashboard:
- Add clickable Pool Open/Paused toggle button in header
- Add sortable columns (ID, Role, Store, Status, Worker, Duration, Created)
- Show menu_type and pool badges under Store column
- Add Pool column to Schedules table
- Filter stores by platform in Create Task modal

Workers Dashboard:
- Redesign pod visualization to show 3 worker slots per pod
- Each slot shows preflight checklist (Overload? Terminating? Pool Query?)
- Once qualified, shows City/State, Proxy IP, Antidetect status
- Hover shows full fingerprint data (browser, platform, bot detection)

Backend:
- Add menu_type to listTasks query
- Add pool_id/pool_name to schedules query with task_pools JOIN
- Migration 114: Add pool_id column to task_schedules table

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 03:00:19 -07:00
Kelly
15a5a4239e fix(tasks): Make pool JOIN defensive when table doesn't exist
Auto-migrate fails early, so task_pools may not exist yet.
Check table existence before including pool columns/joins.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 02:29:07 -07:00
Kelly
20d7534b93 fix(ci): prefix docker tags with sha- to prevent scientific notation parsing
Git SHAs like 1861e183 or 698995e4 get parsed as scientific notation
by JSON parsers, breaking Docker tag creation.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 02:10:17 -07:00
Kelly
698995e46f chore: bump task worker version comment
Force new git SHA to avoid CI scientific notation bug.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 02:02:30 -07:00
Kelly
1861e18396 feat(workers): Implement geo-based task pools
Workers now follow the correct flow:
1. Check what pools have pending tasks
2. Claim a pool (e.g., Phoenix AZ)
3. Get Evomi proxy for that geo
4. Run preflight with geo proxy
5. Pull tasks from pool (up to 6 stores)
6. Execute tasks
7. Release pool when exhausted (6 stores visited)

Task pools group dispensaries by metro area (100mi radius):
- Phoenix AZ, Tucson AZ
- Los Angeles CA, San Francisco CA, San Diego CA, Sacramento CA
- Denver CO, Chicago IL, Boston MA, Detroit MI
- Las Vegas NV, Reno NV, Newark NJ, New York NY
- Oklahoma City OK, Tulsa OK, Portland OR, Seattle WA

Benefits:
- Workers know geo BEFORE getting proxy (no more "No geo assigned")
- IP diversity within metro area (Phoenix worker can use Tempe IP)
- Simpler worker logic - just match pool geo
- Pre-organized tasks, not grouped at claim time

New files:
- migrations/113_task_pools.sql - schema, seed data, functions
- src/services/task-pool.ts - TypeScript service

Env vars:
- USE_TASK_POOLS=true (new system)
- USE_IDENTITY_POOL=false (disabled)

🤖 Generated with [Claude Code](https://claude.ai/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-14 01:41:52 -07:00
Kelly
eedc027ff6 fix(workers): Report geo to worker_registry when identity claimed
Workers were showing "No geo assigned" on dashboard because geo info
was set internally but never reported to worker_registry after
identity pool claim.

Now updates current_state and current_city columns when identity
is claimed, so dashboard shows correct geo assignment.

Also documents CI/CD batching rule to minimize build time.

🤖 Generated with [Claude Code](https://claude.ai/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-14 01:14:31 -07:00
Kelly
ec5fcd9bc4 fix(proxy): Use rotating IPs instead of sticky sessions
Removes session parameter from Evomi proxy URL so each request
gets a different IP. Prevents all workers from sharing same IP.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 00:48:04 -07:00
Kelly
58150dafa6 docs: Add CI/CD workflow rule - commit and wait
🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 00:46:43 -07:00
Kelly
06adab7225 fix(preflight): Add state fallback when IP lookup fails
- Try ip-api.com first, then ipapi.co as fallback
- If both fail, use state coords from targetState param
- Prevents workers from getting stuck in preflight loop

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-14 00:24:31 -07:00
Kelly
38d7678a2e feat(antidetect): Use actual proxy IP location for browser fingerprint
- Replace hardcoded state coords with IP geolocation lookup via ip-api.com
- Browser timezone and geolocation now match actual proxy IP location
- City-level proxy targeting already in place via Evomi _city- parameter
- Add browser-factory.ts shared utility for antidetect setup

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 23:49:25 -07:00
Kelly
aac1181f3d perf(analytics): Fix 7.5s national summary endpoint
- Use denormalized d.product_count instead of JOIN to store_products
- Remove expensive per-product aggregations (avg_price, brand counts, stock)
- Query now runs in <100ms instead of 7.5s

The massive JOIN between dispensaries and store_products was causing
the slow load. State metrics now use pre-computed product_count column.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 23:31:10 -07:00
Kelly
4eaf7e50d7 feat(ui): Add dropdown for Add User/Origin button
- Single dropdown button shows both options
- Selecting an option switches to that tab and opens modal
- Cleaner UX than separate buttons per tab

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 23:06:42 -07:00
Kelly
4cb4e1c502 feat(workers): Session pool system - claim tasks first, then get IP
New worker flow (enabled via USE_SESSION_POOL=true):
1. Worker claims up to 6 tasks for same geo (atomically marked claimed)
2. Gets Evomi proxy for that geo
3. Checks IP availability (not in use, not in 8hr cooldown)
4. Locks IP exclusively to this worker
5. Runs preflight with locked IP
6. Executes tasks (3 concurrent)
7. After 6 tasks, retires session (8hr IP cooldown)
8. Repeats with new IP

Key files:
- migrations/112_worker_session_pool.sql: Session table + atomic claiming
- services/worker-session.ts: Session lifecycle management
- tasks/task-worker.ts: sessionPoolMainLoop() with new flow
- services/crawl-rotator.ts: setFixedProxy() for session locking

Failed tasks return to pending for retry by another worker.
No two workers can share same IP simultaneously.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 22:54:45 -07:00
Kelly
f0bb454ca2 fix(workers): Require geo for worker qualification status
- Workers without geo now show orange "NO GEO" badge instead of gold qualified
- Orange ring + X badge on avatar when preflight OK but no geo
- Gold ring + checkmark only when fully qualified (preflight + geo)
- Add VenetianMask icon for antidetect status indicator
- Lock K8s replica count at exactly 8 pods in CLAUDE.md

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 21:58:47 -07:00
Kelly
b8bdc48c1e fix: Remove non-existent progress columns from worker tasks query 2025-12-13 21:24:55 -07:00
Kelly
8173fd2845 fix: Rename duplicate formatDuration function 2025-12-13 21:03:15 -07:00
Kelly
3921e66933 perf: Use denormalized product_count in pipeline and favorites routes
- pipeline.ts: Replace correlated subquery with d.product_count
- consumer-favorites.ts: Replace correlated subquery with d.product_count

Correlated subqueries were causing N+1 query patterns. Using the
denormalized column is O(1) lookup per row.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 20:45:14 -07:00
Kelly
ad79605961 perf(dashboard): Fix slow activity endpoint with denormalized column + cache
- Use dispensaries.product_count instead of correlated subquery
- Add 1 minute in-memory cache for /dashboard/activity
- Reduces query time from ~30s to <100ms

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 20:43:12 -07:00
Kelly
6439de5cd4 feat(ui): Nested task slots in worker dashboard
Backend:
- Add active_tasks array to GET /worker-registry/workers response
- Include task details: role, dispensary, running_seconds, progress

Frontend:
- Show nested task list under each worker with duration
- Display empty slots when worker has capacity
- Update pod visualization to show 3 task slot nodes
- Active slots pulse blue, empty slots gray
- Hover for task details (dispensary, duration, progress)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 20:40:15 -07:00
Kelly
b51ba17d32 fix(ui): Fallback to fingerprint region for worker geo display
geoState was only using current_state column which is often null.
Now falls back to fingerprint.detectedLocation.region like geoCity does.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 20:34:50 -07:00
Kelly
2d631dfad0 feat(ui): Auto-detect trusted origin type from URL/pattern
- Remove Type dropdown from trusted origins form
- Auto-detect domain, IP, or regex from input
- Convert *.domain.com wildcards to proper regex
- Simplify form to just Name, URL/Pattern, Description

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 20:23:03 -07:00
Kelly
072388ffb2 fix(identity): Use unique session IDs for proxy rotation + add task pool gate
- Fix buildEvomiProxyUrl to use passed session ID from identity pool
  instead of truncating to worker+region (causing same IP for all workers)
- Add task pool gate feature with database-backed state
- Add /tasks/pool/toggle endpoint and UI toggle button
- Fix isTaskPoolPaused() missing await in claimTask

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 20:17:52 -07:00
Kelly
b456fe5097 feat: Add trusted origins management UI at /users
- Create trusted_origins table for DB-backed origin management
- Add API routes for CRUD operations on trusted origins
- Add tabbed interface on /users page with Users and Trusted Origins tabs
- Seeds default trusted origins (cannaiq.co, findadispo.com, findagram.co, etc.)
- Fix TypeScript error in WorkersDashboard fingerprint type

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 19:54:26 -07:00
Kelly
eb5b2a876e feat(pwa): Add update prompt for new versions
- Change registerType from 'autoUpdate' to 'prompt'
- Add UpdatePrompt component that shows when new version available
- Users see banner with "Update" or "Later" buttons
- Service worker checks for updates every hour

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-13 19:52:51 -07:00
Kelly
ad09aadcc9 feat(cannaiq): Add PWA support with vite-plugin-pwa
- Add vite-plugin-pwa for service worker and manifest generation
- Configure workbox for asset caching and API runtime caching
- Add sharp for icon generation from SVG
- Create generate-icons.js script to create 192x192 and 512x512 PNGs
- Update build script to auto-generate icons before build

App is now installable as a PWA with offline support.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-13 19:49:15 -07:00
Kelly
a020e31a46 feat(treez): CDP interception client for Elasticsearch API capture
Rewrites Treez platform client to use CDP (Chrome DevTools Protocol)
interception instead of DOM scraping. Key changes:

- Uses Puppeteer Stealth plugin to bypass headless detection
- Intercepts Elasticsearch API responses via CDP Network.responseReceived
- Captures full product data including inventory levels (availableUnits)
- Adds comprehensive TypeScript types for all Treez data structures
- Updates queries.ts with automatic session management
- Fixes product-discovery-treez handler for new API shape

Tested with Best Dispensary: 142 products across 10 categories captured
with inventory data, pricing, and lab results.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-13 19:25:49 -07:00
Kelly
83f629fec4 feat: Add identity pool for diverse IP/fingerprint rotation
- Add worker_identities table and metro_areas for city groupings
- Create IdentityPoolService for claiming/releasing identities
- Each identity used for 3-5 tasks, then 2-3 hour cooldown
- Integrate with task-worker via USE_IDENTITY_POOL feature flag
- Update puppeteer-preflight to accept custom proxy URLs

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 18:46:58 -07:00
Kelly
d810592bf2 fix(ui): Show city from fingerprint data in worker dashboard
City was captured in preflight fingerprint JSON but not displayed.
Now falls back to fingerprint.detectedLocation.city if current_city is null.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 18:08:41 -07:00
Kelly
d02c347ef6 fix(proxy): Use correct Evomi host rp.evomi.com
Was using rpc.evomi.com which doesn't exist.
Residential proxies use rp.evomi.com:1000

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 17:35:43 -07:00
Kelly
d779a08bbf fix(preflight): Use Evomi proxy API before falling back to DB pool
The preflight was only checking DB proxy pool which was empty.
Now checks Evomi API first (when configured), then falls back to DB.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 17:14:23 -07:00
Kelly
1490c60d2a fix(ui): Update TasksDashboard badges for consistency
- Platform badge now shows green (emerald) for dutchie even when null/undefined
- State badge shows "ALL" (uppercase) with indigo color when no state specified
- Remove "(HTTP transport)" from store discovery description

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 17:10:57 -07:00
Kelly
ba15802a77 perf(puppeteer): Block analytics/tracking domains to save proxy bandwidth
Block requests to non-essential domains:
- googletagmanager.com, google-analytics.com (analytics)
- launchdarkly.com (feature flags)
- assets2.dutchie.com (CDN assets - we only need GraphQL)
- sentry.io (error tracking)
- segment.io/segment.com, amplitude.com, mixpanel.com (analytics)
- hotjar.com, fullstory.com (session recording)

Applied to both product-discovery-dutchie.ts and puppeteer-preflight.ts

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 16:50:28 -07:00
Kelly
d8a22fba53 docs: Add Evomi residential proxy API documentation
- Document priority order (Evomi API first, DB fallback)
- List environment variables and defaults
- Show K8s secret location
- Explain proxy URL format with geo targeting

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 16:47:58 -07:00
Kelly
cf99ef9e09 fix(worker): Use Evomi API first, DB proxies as fallback
- Check Evomi API availability before waiting for DB proxies
- If EVOMI_USER/EVOMI_PASS configured, proceed immediately
- Only fall back to DB proxy polling if Evomi not configured
- Added clear comments explaining proxy initialization order

This fixes workers getting stuck waiting for DB proxies when
Evomi API is available for on-demand geo-targeted proxies.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 16:45:52 -07:00
Kelly
3d0ea21007 style(dashboard): Remove 'scraper-' prefix from worker IDs
Display 'worker-...' instead of 'scraper-worker-...' in admin view

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 16:41:33 -07:00
Kelly
023cfc127f fix(preflight): Apply stored fingerprint to task browser
- Add WorkerFingerprint interface with timezone, city, state, ip, locale
- Store fingerprint in TaskWorker after preflight passes
- Pass fingerprint through TaskContext to handlers
- Apply timezone via CDP and locale via Accept-Language header
- Ensures browser fingerprint matches proxy IP location

This fixes anti-detect detection where timezone/locale mismatch
with proxy IP was getting blocked by Cloudflare.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 16:40:52 -07:00
Kelly
5ea92e25af fix(dashboard): Restore preflight display with gold shield + geo
- Show gold shield icon with city/state for qualified workers
- Restore IP address, fingerprint, and antidetect status rows
- Keep geo session fields in worker interface

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 16:31:06 -07:00
Kelly
3b8171d94e feat(admin): Add platform badge and selector to schedules
- Add Platform column to schedules table with colored badges
  - Dutchie: emerald, Jane: pink, Treez: amber
- Add platform dropdown to schedule edit modal
- Add platform selector buttons to create task modal
- Store discovery tasks now pass state in payload

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 16:09:53 -07:00
Kelly
d7da0b938d feat(jane): Direct Algolia product fetch and multi-platform product-refresh
- Add fetchProductsByStoreIdDirect() for reliable Algolia product fetching
- Update product-discovery-jane to use direct Algolia instead of network interception
- Fix product-refresh handler to support both Dutchie and Jane payloads
  - Handle both `products` (Dutchie) and `hits` (Jane) formats
  - Use platform-appropriate raw_json structure for normalizers
  - Fix consecutive_misses tracking to use correct provider
  - Extract product IDs correctly (Dutchie _id vs Jane product_id)
- Add store discovery deduplication (prefer REC over MED at same location)
- Add storeTypes field to DiscoveredStore interface
- Add scripts: run-jane-store-discovery.ts, run-jane-product-discovery.ts

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 16:05:50 -07:00
Kelly
88e590d026 feat: Worker geo sessions for state-based task assignment
Workers are now geo-locked to a specific state for their session:
- Session = 60 minutes OR 7 store visits (whichever comes first)
- Workers ONLY claim tasks matching their assigned state
- State assignment prioritizes: most pending tasks, fewest workers

Changes:
- Migration 108: geo session columns, claim_task with geo filter,
  assign_worker_geo(), check_worker_geo_session(), worker_state_capacity view
- task-worker.ts: ensureGeoSession() method before task claiming
- worker-registry.ts: /state-capacity and /geo-sessions API endpoints
- WorkersDashboard: Show qualified icon + geo state in Preflight column

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 16:00:09 -07:00
Kelly
c215d11a84 feat: Platform isolation, Evomi geo-targeting, proxy management
Platform isolation:
- Rename handlers to {task}-{platform}.ts convention
- Deprecate -curl variants (now _deprecated-*)
- Platform-based routing in task-worker.ts
- Add Jane platform handlers and client

Evomi geo-targeting:
- Add dynamic proxy URL builder with state/city targeting
- Session stickiness per worker per state (30 min)
- Fallback to static proxy table when API unavailable
- Add proxy tracking columns to worker_tasks

Proxy management:
- New /proxies admin page for visibility
- Track proxy_ip, proxy_geo, proxy_source per task
- Show active sessions and task history

Validation filtering:
- Filter by validated stores (platform_dispensary_id + menu_url)
- Mark incomplete stores as deprecated
- Update all dashboard/stats queries

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 15:16:48 -07:00
Kelly
59e0e45f8f feat(discovery): Add self-healing and rename schedule
- Rename 'store_discovery_dutchie' to 'Store Discovery' (platform badge via platform field)
- Add self-healing: scan for stores missing payloads and queue product_discovery
- Catches stores added before chaining was implemented
- Limits to 50 stores per run to avoid overwhelming the system

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 14:14:21 -07:00
Kelly
e9a688fbb3 feat(api): Add stage field to dispensary PUT endpoint
Allows updating dispensary stage via API for better data management.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 13:37:47 -07:00
Kelly
8b3ae40089 feat: Remove Run Now, add source tracking, optimize dashboard
- Remove /run-now endpoint (use task priority instead)
- Add source tracking to worker_tasks (source, source_schedule_id, source_metadata)
- Parallelize dashboard API calls (Promise.all)
- Add 1-5 min caching to /markets/dashboard and /national/summary
- Add performance indexes for dashboard queries

Migrations:
- 104: Task source tracking columns
- 105: Dashboard performance indexes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 13:23:35 -07:00
Kelly
a8fec97bcb feat: Support per-dispensary schedules (not just per-state)
- Add dispensary_id column to task_schedules table
- Update scheduler to handle single-dispensary schedules
- Update run-now endpoint to handle single-dispensary schedules
- Update frontend modal to pass dispensary_id when 1 store selected
- Fix existing "Deeply Rooted Hourly" schedule with dispensary_id=112

Now when you select ONE store and check "Make recurring", it creates
a schedule that runs for that specific store every interval.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 12:03:08 -07:00
Kelly
c969c7385b fix: Handle product_refresh and payload_fetch in run-now endpoint
The run-now endpoint only fanned out to stores for product_discovery
schedules, not product_refresh or payload_fetch. This caused single
tasks to be created without dispensary_id, which then failed.

Now all crawl roles (product_discovery, product_refresh, payload_fetch)
with state_code properly fan out to individual store tasks.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 03:49:10 -07:00
Kelly
5084cb1a85 fix: Block images/fonts/media in Puppeteer to save bandwidth
Add request interception to all Puppeteer handlers to block unnecessary
resources (images, fonts, media, stylesheets). We only need HTML/JS for
the session cookie, then the GraphQL JSON response.

This was causing 2.4GB of bandwidth from assets2.dutchie.com - every
page visit downloaded all product thumbnails, logos, etc.

Files updated:
- product-discovery-http.ts
- entry-point-discovery.ts
- store-discovery-http.ts
- store-discovery-state.ts
- puppeteer-preflight.ts

Note: Product images from payload are still downloaded once to MinIO
via image-storage.ts - this only blocks browser-rendered page images.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 03:28:12 -07:00
Kelly
ec6843dfd6 feat: Add working hours for natural traffic patterns
Workers check their timezone (from preflight IP geolocation) and current
hour's weight probability to determine availability. This creates natural
traffic patterns - more workers active during peak hours, fewer during
off-peak. Tasks queue up at night and drain during the day.

Migrations:
- 099: working_hours table with hourly weights by profile
- 100: Add timezone column to worker_registry
- 101: Store timezone from preflight IP geolocation
- 102: check_working_hours() function with probability roll

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 03:24:42 -07:00
Kelly
268429b86c feat: Use MinIO for permanent product image storage
- Rewrite image-storage.ts to use MinIO instead of ephemeral local filesystem
- Images downloaded ONCE from Dutchie CDN, stored permanently in MinIO
- Check MinIO before downloading (skipIfExists) to avoid re-downloads
- Convert images to webp before storage
- Storage path: images/products/<state>/<store>/<brand>/<product>/image-<hash>.webp
- Public URL: https://cdn.cannabrands.app/cannaiq/images/...

This fixes the 2.4GB bandwidth issue from repeatedly downloading images
that were lost when K8s pods restarted.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 03:24:22 -07:00
Kelly
5c08135007 feat(plugin): Add Elementor dynamic tags and product loop widget v1.7.0
WordPress Plugin:
- Add dynamic tags for all product payload fields (name, brand, price, THC, effects, etc.)
- Add Product Loop widget with filtering, sorting, and layout options
- Register CannaIQ widget category in Elementor
- Update build script to auto-upload to MinIO CDN
- Remove legacy dutchie references
- Bump version to 1.7.0

Backend:
- Redirect /downloads/* to CDN instead of serving from local filesystem

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 03:10:01 -07:00
Kelly
9f0d68d4c9 Revert "feat: Store full Dutchie payload in latest_raw_payload"
This reverts commit e11400566e.
2025-12-13 02:33:30 -07:00
Kelly
e11400566e feat: Store full Dutchie payload in latest_raw_payload
Now stores the complete raw product JSON from Dutchie on every
product refresh. This enables querying any Dutchie field
(terpenes, effects, description, etc.) without schema changes.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 02:31:00 -07:00
Kelly
987ed062d5 feat(tasks): Add "Make recurring" toggle to Create Task modal
- Add checkbox to convert one-time task into recurring schedule
- When enabled, shows schedule name, interval, and state filter options
- Schedule runs immediately after creation so tasks appear right away
- Update button text to "Create Schedule & Run" when recurring
- Remove separate "New Schedule" button from schedules section
- Update empty state text to guide users to new flow

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 02:27:39 -07:00
Kelly
e50f54e621 revert: Remove brand aliasing migrations
Dutchie already provides unique brand UUIDs (provider_brand_id).
No need for separate brand normalization/aliasing logic.
Use provider_brand_id for brand grouping instead.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 02:16:05 -07:00
Kelly
983cd71fc2 feat: Performance optimizations and preflight improvements
- Add missing /api/analytics/national/summary endpoint
- Optimize dashboard activity queries (subquery vs JOIN+GROUP BY)
- Add PreflightSummary component to Workers page with gold qualified badge
- Add preflight retry logic - workers retry every 30s until qualified
- Run stale task cleanup on ALL workers (not just worker-0)
- Add preflight fields to worker-registry API (ip, fingerprint, is_qualified)

Database indexes added:
- idx_store_products_created_at (for recent products)
- idx_dispensaries_last_crawl_at (for recent scrapes)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 02:06:33 -07:00
Kelly
7849ee0256 feat: Add POST /api/tasks/fix-null-methods endpoint
Updates null method tasks to 'http' for proper worker qualification

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 01:33:09 -07:00
Kelly
432842f442 fix: Ensure all crawl tasks use method='http' transport
- product_discovery → product_refresh now sets method: 'http'
- product_refresh → entry_point_discovery now sets method: 'http'
- All crawl tasks now require HTTP preflight to claim

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 01:31:22 -07:00
Kelly
94ebbb2497 fix: State dropdown and locked platform in schedule modal
- State Code → State dropdown with available states
- Platform field locked to 'dutchie' (read-only)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 01:26:58 -07:00
Kelly
e826a4dd3e fix: Add consecutive_failures column to migration
🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 01:20:59 -07:00
Kelly
b7e96359ef feat: Auto-retry failed tasks with exponential backoff
- Hard failures now auto-retry up to 3 times
- Exponential backoff: 5, 10, 20 minutes
- Only permanently fails after max retries exceeded
- Soft failures still requeue immediately

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 01:19:48 -07:00
Kelly
b1c1955082 feat: Add POST /api/tasks/retry-failed endpoint
Resets failed tasks back to pending for retry.
Options: role (filter), max_age_hours (default 24), limit (default 100)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 01:18:07 -07:00
Kelly
95c23fcdff fix: Add consecutive_successes column to dispensaries table
Required for stage checkpoint tracking in task handlers

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 01:15:59 -07:00
Kelly
7067db68fc feat: Add server-side brand search to Intelligence page
- Backend: Add 'search' param to /api/admin/intelligence/brands
- Frontend: Debounced search triggers server-side query
- Now searches ALL brands, not just top 500

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 01:14:02 -07:00
Kelly
271faf0f00 perf: Optimize dashboard queries for faster load times
- Use pg_stat for approximate product count (instant vs full scan)
- LIMIT on DISTINCT queries for brand/category counts
- Single combined query (reduces round trips)
- Add index on store_product_snapshots.captured_at
- Add index on worker_tasks.worker_id and created_at

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 01:09:02 -07:00
Kelly
291a8279bd fix(entry-point-discovery): Self-healing duplicate detection
When resolving platform_dispensary_id, check if it already exists on
another dispensary. If so, mark current dispensary as duplicate instead
of failing with unique constraint violation.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 00:46:10 -07:00
Kelly
b69d03c02f feat: Add stage checkpoints to task handlers and fix worker name display
Stage checkpoints (observational, non-blocking):
- product_refresh: success → 'production', failure tracking → 'failing' after 3
- product_discovery: success → 'hydrating', failure tracking
- entry_point_discovery: success → 'promoted', failure tracking

Worker name fix:
- Join worker_registry in tasks query to get friendly_name directly
- Update TasksDashboard to use worker_name from joined query
- Fallback to registry lookup then pod ID suffix

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 00:43:00 -07:00
Kelly
54f59c6082 fix(analytics): Fix market-summary store count and add search indexes
- market-summary now counts from store_products table (not product_variants)
- Added trigram indexes for fast ILIKE product searches

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 00:35:17 -07:00
Kelly
c16c3083b1 fix(cannaiq): Fix TasksDashboard worker API call
- Add getWorkerRegistry() method to API client
- Change TasksDashboard to use getWorkerRegistry() instead of non-existent getWorkers()

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 00:30:01 -07:00
Kelly
656b00332e fix: Show worker friendly names and calculate duration in Tasks page
- Fetch workers list to get friendly names
- Create workerNameMap lookup for friendly names
- Show friendly name (e.g., "Yuki") instead of pod suffix
- Calculate duration from started_at/completed_at when duration_sec is null
- Show elapsed time with "..." suffix for running tasks
- Also search by worker friendly name

🤖 Generated with [Claude Code](https://claude.com/claude-code)
2025-12-13 00:21:02 -07:00
Kelly
843f6ded75 fix: Remove deploy status, autorefresh, and refresh button from Orchestrator
- Remove DeployStatus component import and usage
- Remove autoRefresh checkbox and refreshing state
- Remove refresh button
- Simplify header layout

🤖 Generated with [Claude Code](https://claude.com/claude-code)
2025-12-13 00:19:03 -07:00
Kelly
0175a6817e fix: Fix chain badges and table styling in IntelligenceStores
- Add whitespace-nowrap to badges to prevent text wrapping
- Add border to chain badges for consistency
- Update table styling to match Workers/Tasks pages:
  - Uppercase headers with proper tracking
  - Consistent padding and text sizes
  - Right-align numeric columns (SKUs, Snapshots)
  - Purple Dashboard button matching Workers page style
- Increase table max-height for better visibility

🤖 Generated with [Claude Code](https://claude.com/claude-code)
2025-12-13 00:17:58 -07:00
Kelly
24dd301d84 fix: State stores endpoint returns only Dutchie stores with products
- Filter by menu_type = 'dutchie'
- Use INNER JOIN + HAVING to only return stores with products
- Stores without product discovery are excluded

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 00:15:23 -07:00
Kelly
1d6211db19 perf: Add store_intelligence_cache for fast /intelligence/stores
- Remove costly correlated subquery (snapshot_count) from /stores endpoint
- Add migration 092 for store_intelligence_cache table
- Update analytics_refresh to populate cache with pre-computed metrics
- Add /intelligence/stores/cached endpoint using cache table

Performance: O(n*m) → O(1) for snapshot counts, ~10x faster response

🤖 Generated with [Claude Code](https://claude.com/claude-code)
2025-12-13 00:13:41 -07:00
Kelly
e62f927218 feat: Auto-retry failed proxies after cooldown period
- Add last_failed_at column to track failure time
- Failed proxies auto-retry after 4 hours (configurable)
- Proxies permanently failed after 10 failures
- Add /retry-stats and /reenable-failed API endpoints
- markProxySuccess() re-enables recovered proxies

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 00:08:44 -07:00
Kelly
675f42841e feat: Import 500 Evomi residential proxies
- Update unique constraint to include username/password for session-based proxies
- All proxies imported as inactive (run Test All to verify and activate)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 00:05:21 -07:00
Kelly
472dbdf418 fix: Support http://host:port:user:pass proxy format in bulk import
Add regex pattern to parseProxyLine() for non-standard colon-separated
format used by some proxy providers (e.g., Evomi residential proxies).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 23:58:24 -07:00
Kelly
5fcc03aff4 style: Use lowercase state codes in API URLs
/api/state/az/summary instead of /api/state/AZ/summary
Backend already handles case conversion

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 23:54:14 -07:00
Kelly
2d489e068b fix: Correct mv_state_metrics to use brand_name_raw
- Changed unique_brands from COUNT(brand_id) to COUNT(brand_name_raw)
- brand_id is often NULL, brand_name_raw has actual data
- AZ now correctly shows 462 brands (was 144)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 23:50:53 -07:00
Kelly
470097eb19 fix: Intelligence stores endpoint and UI consistency
- Fix stores endpoint to only show stores with actual products (INNER JOIN + HAVING)
- Update badge colors to match Workers/Tasks dashboard style
- Use emerald/amber/red/gray color scheme consistently
- Chain badge now uses purple (bg-purple-100)
- Add migration 092 to fix Trulieve store URLs

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 23:37:28 -07:00
Kelly
5af86edf83 feat: Update last_payload_at and last_store_discovery_at timestamps
- payload-storage.ts: Update dispensaries.last_payload_at when saving payload
- promotion.ts: Update dispensaries.last_store_discovery_at on INSERT/UPDATE

🤖 Generated with [Claude Code](https://claude.com/claude-code)
2025-12-12 23:30:57 -07:00
Kelly
55b26e9153 feat: Auto-healing entry_point_discovery with browser-first transport
- Rewrote entry_point_discovery with auto-healing scheme:
  1. Check dutchie_discovery_locations for existing platform_location_id
  2. Browser-based GraphQL with 5x network retries
  3. Mark as needs_investigation on hard failure
- Browser (Puppeteer) is now DEFAULT transport - curl only when explicit
- Added migration 091 for tracking columns:
  - last_store_discovery_at: When store_discovery updated record
  - last_payload_at: When last product payload was saved
- Updated CODEBASE_MAP.md with transport rules documentation

🤖 Generated with [Claude Code](https://claude.com/claude-code)
2025-12-12 22:55:21 -07:00
Kelly
97bfdb9618 fix: Show worker friendly names in Live Activity panel 2025-12-12 22:37:40 -07:00
Kelly
6f49c5e84a fix: Use database task counts for Completed/Failed stats on Workers page 2025-12-12 22:36:37 -07:00
Kelly
a6f09ee6e3 fix: Calculate stale task count from heartbeat age 2025-12-12 22:15:16 -07:00
Kelly
c62f8cbf06 feat: Parallelized store discovery, modification tracking, and task deduplication
Store Discovery Parallelization:
- Add store_discovery_state handler for per-state parallel discovery
- Add POST /api/tasks/batch/store-discovery endpoint
- 8 workers can now process states in parallel (~30-45 min vs 3+ hours)

Modification Tracking (Migration 090):
- Add last_modified_at, last_modified_by_task, last_modified_task_id to dispensaries
- Add same columns to store_products
- Update all handlers to set tracking info on modifications

Stale Task Recovery:
- Add periodic stale cleanup every 10 minutes (worker-0 only)
- Prevents orphaned tasks from blocking queue after worker crashes

Task Deduplication:
- createStaggeredTasks now skips if pending/active task exists for same role
- Skips if same role completed within last 4 hours
- API responses include skipped count

🤖 Generated with [Claude Code](https://claude.com/claude-code)
2025-12-12 22:15:04 -07:00
kelly
e4e8438d8b Merge pull request 'feat: Worker improvements and Run Now duplicate prevention' (#64) from feat/minio-payload-storage into master 2025-12-13 03:35:48 +00:00
Kelly
822d2b0609 feat: Idempotent entry_point_discovery with bulk endpoint
- Track id_resolution_status, attempts, and errors in handler
- Add POST /api/tasks/batch/entry-point-discovery endpoint
- Skip already-resolved stores, retry failed with force flag
2025-12-12 20:27:36 -07:00
Kelly
dfd36dacf8 fix: Show next run time correctly for schedules 2025-12-12 20:21:15 -07:00
Kelly
4ea7139ed5 feat: Add step reporting to all task handlers
Added updateStep() calls to:
- payload-fetch-curl: loading → preflight → fetching → saving
- product-refresh: loading → normalizing → upserting
- store-discovery-http: starting → preflight → navigating → fetching

This enables real-time visibility of worker progress in the dashboard.
2025-12-12 20:14:00 -07:00
Kelly
63023a4061 feat: Worker improvements and Run Now duplicate prevention
- Fix Run Now to prevent duplicate task creation
- Add loading state to Run Now button in UI
- Return early when no stores need refresh
- Worker dashboard improvements
- Browser pooling architecture updates
- K8s worker config updates (8 replicas, 3 concurrent tasks)
2025-12-12 20:11:31 -07:00
kelly
13a80e893e Merge pull request 'feat: Add MinIO/S3 support for payload storage' (#63) from feat/minio-payload-storage into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/63
2025-12-12 19:00:29 +00:00
Kelly
c98c409f59 feat: Add MinIO/S3 support for payload storage
- Update payload-storage.ts to use MinIO when configured
- Payloads stored at: cannaiq/payloads/{year}/{month}/{day}/store_{id}_{ts}.json.gz
- Falls back to local filesystem when MINIO_* env vars not set
- Enables shared storage across all worker pods
- Fixes ephemeral storage issue where payloads were lost on pod restart

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 11:30:57 -07:00
kelly
6c8993f7bd Merge pull request 'fix(workers): Increase max concurrent tasks to 15' (#62) from feat/proxy-reload-and-bulk-import into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/62
2025-12-12 18:19:04 +00:00
Kelly
92f88fdcd6 fix(workers): Increase max concurrent tasks to 15 and add K8s permission rule
- Change MAX_CONCURRENT_TASKS default from 3 to 15
- Add CLAUDE.md rule requiring explicit permission before kubectl commands

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 10:54:33 -07:00
kelly
fd4a9b1434 Merge pull request 'feat(scheduler): Immutable schedules and HTTP-only pipeline' (#61) from feat/proxy-reload-and-bulk-import into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/61
2025-12-12 16:37:16 +00:00
Kelly
832ef1cf83 feat(scheduler): Immutable schedules and HTTP-only pipeline
## Changes
- **Migration 089**: Add is_immutable and method columns to task_schedules
  - Per-state product_discovery schedules (4h default)
  - Store discovery weekly (168h)
  - All schedules use HTTP transport (Puppeteer/browser)
- **Task Scheduler**: HTTP-only product discovery with per-state scheduling
  - Each state has its own immutable schedule
  - Schedules can be edited (interval/priority) but not deleted
- **TasksDashboard UI**: Full immutability support
  - Lock icon for immutable schedules
  - State and Method columns in schedules table
  - Disabled delete for immutable, restricted edit fields
- **Store Discovery HTTP**: Auto-queue product_discovery for new stores
- **Migration 088**: Discovery payloads storage schema

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 09:24:08 -07:00
kelly
b05eaceaf0 Merge pull request 'feat(tasks): Dual transport handlers and self-healing product_refresh' (#60) from feat/proxy-reload-and-bulk-import into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/60
2025-12-12 10:33:13 +00:00
kelly
909470d3dc Merge pull request 'fix(proxy): Convert non-standard proxy URL format and simplify preflight' (#59) from feat/proxy-reload-and-bulk-import into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/59
2025-12-12 10:03:14 +00:00
Kelly
9a24b4896c feat(tasks): Dual transport handlers and self-healing product_refresh
- Rename product-discovery.ts to product-discovery-curl.ts (axios-based)
- Rename payload-fetch.ts to payload-fetch-curl.ts
- Add product-discovery-http.ts (Puppeteer browser-based handler)
- Add method field to CreateTaskParams for transport selection
- Update task-service to insert method column on task creation
- Update task-worker with getHandlerForTask() for dual transport routing
- product_refresh now queues upstream tasks when no payload exists:
  - Has platform_dispensary_id → queues product_discovery (http)
  - No platform_dispensary_id → queues entry_point_discovery

This enables HTTP workers to pick up browser-based tasks while curl
workers handle axios-based tasks, and prevents product_refresh from
failing repeatedly when no crawl has been performed.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 03:02:56 -07:00
Kelly
dd8fce6e35 fix(proxy): Convert non-standard proxy URL format and simplify preflight
- CrawlRotator.getProxyUrl() now converts non-standard format (http://host:port:user:pass) to standard format (http://user:pass@host:port)
- Simplify puppeteer preflight to only use ipify.org for IP verification (much lighter than fingerprint.com)
- Remove heavy anti-detect site tests from preflight - not needed, trust stealth plugin
- Fixes 503 errors when using session-based residential proxies

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 02:13:51 -07:00
kelly
65b96d9cb9 Merge pull request 'feat(workers): Add proxy reload, staggered tasks, and bulk proxy import' (#58) from feat/proxy-reload-and-bulk-import into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/58
2025-12-12 09:11:23 +00:00
Kelly
f82eed4dc3 feat(workers): Add proxy reload, staggered tasks, and bulk proxy import
- Periodic proxy reload: Workers now reload proxies every 60s to pick up changes
- Staggered task scheduling: New API endpoints for creating tasks with delays
- Bulk proxy import: Script supports multiple URL formats including host:port:user:pass
- Proxy URL column: Migration 086 adds proxy_url for non-standard formats

Key changes:
- crawl-rotator.ts: Added reloadIfStale(), isStale(), setReloadInterval()
- task-worker.ts: Calls reloadIfStale() in main loop
- task-service.ts: Added createStaggeredTasks() and createAZStoreTasks()
- tasks.ts: Added POST /batch/staggered and /batch/az-stores endpoints
- import-proxies.ts: New script for bulk proxy import
- CLAUDE.md: Documented staggered task workflow

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 01:53:15 -07:00
kelly
d997ec51a2 Merge pull request 'feat(tasks): Consolidate schedule management into task_schedules' (#57) from feat/task-schedules-consolidation into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/57
2025-12-12 08:31:29 +00:00
Kelly
6490df9faf feat(tasks): Consolidate schedule management into task_schedules
- Add schedule CRUD endpoints to /api/tasks/schedules
- Add Schedules section to TasksDashboard with edit/delete/bulk actions
- Deprecate job_schedules table (entries disabled in DB)
- Mark CrawlSchedulePage as deprecated (removed from menu)
- Add deprecation comments to legacy schedule methods in api.ts
- Add migration comments to workers.ts explaining consolidation

Key changes:
- Schedule management now at /admin/tasks instead of /admin/schedule
- task_schedules uses interval_hours (simpler than base_interval_minutes + jitter)
- All schedule routes placed before /:id to avoid Express route conflicts

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 01:15:21 -07:00
kelly
d86190912f Merge pull request 'feat(api): Add payload query API and trusted origins management' (#51) from feat/query-api-and-trusted-origins into master
Reviewed-on: https://code.cannabrands.app/Creationshop/dispensary-scraper/pulls/51
2025-12-12 07:49:54 +00:00
kelly
a077f81c65 Merge pull request 'fix(preflight): Phase 2 - Correct parameter order and add IP/fingerprint reporting' (#56) from feat/preflight-phase2-reporting into master 2025-12-12 07:35:02 +00:00
Kelly
daab0ae9b2 feat(api): Add payload query API and trusted origins management
Query API:
- GET /api/payloads/store/:id/query - Filter products with flexible params
  (brand, category, price_min/max, thc_min/max, search, sort, pagination)
- GET /api/payloads/store/:id/aggregate - Group by brand/category with metrics
  (count, avg_price, min_price, max_price, avg_thc, in_stock_count)
- Documentation at docs/QUERY_API.md

Trusted Origins Admin:
- GET/POST/PUT/DELETE /api/admin/trusted-origins - Manage auth bypass list
- Trusted IPs, domains, and regex patterns stored in DB
- 5-minute cache with invalidation on admin updates
- Fallback to hardcoded defaults if DB unavailable
- Migration 085 creates table with seed data

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-11 23:28:05 -07:00
214 changed files with 43313 additions and 2248 deletions

7
.gitignore vendored
View File

@@ -51,3 +51,10 @@ coverage/
*.tmp *.tmp
*.temp *.temp
llm-scraper/ llm-scraper/
# Claude Code
.claude/
# Test/debug scripts
backend/scripts/test-*.ts
backend/scripts/run-*.ts

View File

@@ -3,7 +3,7 @@ steps:
# PR VALIDATION: Parallel type checks (PRs only) # PR VALIDATION: Parallel type checks (PRs only)
# =========================================== # ===========================================
typecheck-backend: typecheck-backend:
image: code.cannabrands.app/creationshop/node:20 image: node:22
commands: commands:
- cd backend - cd backend
- npm ci --prefer-offline - npm ci --prefer-offline
@@ -13,7 +13,7 @@ steps:
event: pull_request event: pull_request
typecheck-cannaiq: typecheck-cannaiq:
image: code.cannabrands.app/creationshop/node:20 image: node:22
commands: commands:
- cd cannaiq - cd cannaiq
- npm ci --prefer-offline - npm ci --prefer-offline
@@ -23,7 +23,7 @@ steps:
event: pull_request event: pull_request
typecheck-findadispo: typecheck-findadispo:
image: code.cannabrands.app/creationshop/node:20 image: node:22
commands: commands:
- cd findadispo/frontend - cd findadispo/frontend
- npm ci --prefer-offline - npm ci --prefer-offline
@@ -33,7 +33,7 @@ steps:
event: pull_request event: pull_request
typecheck-findagram: typecheck-findagram:
image: code.cannabrands.app/creationshop/node:20 image: node:22
commands: commands:
- cd findagram/frontend - cd findagram/frontend
- npm ci --prefer-offline - npm ci --prefer-offline
@@ -58,7 +58,7 @@ steps:
-H "Authorization: token $GITEA_TOKEN" \ -H "Authorization: token $GITEA_TOKEN" \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{"Do":"merge"}' \ -d '{"Do":"merge"}' \
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge" "https://git.spdy.io/api/v1/repos/Creationshop/cannaiq/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
depends_on: depends_on:
- typecheck-backend - typecheck-backend
- typecheck-cannaiq - typecheck-cannaiq
@@ -68,127 +68,128 @@ steps:
event: pull_request event: pull_request
# =========================================== # ===========================================
# MASTER DEPLOY: Parallel Docker builds # DOCKER: Multi-stage builds with layer caching
# NOTE: cache_from/cache_to removed due to plugin bug splitting on commas
# =========================================== # ===========================================
docker-backend: docker-backend:
image: woodpeckerci/plugin-docker-buildx image: gcr.io/kaniko-project/executor:debug
settings: commands:
registry: code.cannabrands.app - /kaniko/executor
repo: code.cannabrands.app/creationshop/dispensary-scraper --context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/backend
tags: --dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/backend/Dockerfile
- latest --destination=10.100.9.70:5000/cannaiq/backend:latest
- ${CI_COMMIT_SHA:0:8} --destination=10.100.9.70:5000/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8}
dockerfile: backend/Dockerfile --build-arg=APP_BUILD_VERSION=sha-${CI_COMMIT_SHA:0:8}
context: backend --build-arg=APP_GIT_SHA=${CI_COMMIT_SHA}
username: --build-arg=APP_BUILD_TIME=${CI_PIPELINE_CREATED}
from_secret: registry_username --registry-mirror=10.100.9.70:5000
password: --insecure-registry=10.100.9.70:5000
from_secret: registry_password --cache=true
platforms: linux/amd64 --cache-repo=10.100.9.70:5000/cannaiq/cache-backend
provenance: false --cache-ttl=168h
build_args:
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
APP_GIT_SHA: ${CI_COMMIT_SHA}
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
depends_on: [] depends_on: []
when: when:
branch: master branch: [master, develop]
event: push event: push
docker-cannaiq: docker-cannaiq:
image: woodpeckerci/plugin-docker-buildx image: gcr.io/kaniko-project/executor:debug
settings: commands:
registry: code.cannabrands.app - /kaniko/executor
repo: code.cannabrands.app/creationshop/cannaiq-frontend --context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/cannaiq
tags: --dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/cannaiq/Dockerfile
- latest --destination=10.100.9.70:5000/cannaiq/frontend:latest
- ${CI_COMMIT_SHA:0:8} --destination=10.100.9.70:5000/cannaiq/frontend:sha-${CI_COMMIT_SHA:0:8}
dockerfile: cannaiq/Dockerfile --registry-mirror=10.100.9.70:5000
context: cannaiq --insecure-registry=10.100.9.70:5000
username: --cache=true
from_secret: registry_username --cache-repo=10.100.9.70:5000/cannaiq/cache-cannaiq
password: --cache-ttl=168h
from_secret: registry_password
platforms: linux/amd64
provenance: false
depends_on: [] depends_on: []
when: when:
branch: master branch: [master, develop]
event: push event: push
docker-findadispo: docker-findadispo:
image: woodpeckerci/plugin-docker-buildx image: gcr.io/kaniko-project/executor:debug
settings: commands:
registry: code.cannabrands.app - /kaniko/executor
repo: code.cannabrands.app/creationshop/findadispo-frontend --context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findadispo/frontend
tags: --dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findadispo/frontend/Dockerfile
- latest --destination=10.100.9.70:5000/cannaiq/findadispo:latest
- ${CI_COMMIT_SHA:0:8} --destination=10.100.9.70:5000/cannaiq/findadispo:sha-${CI_COMMIT_SHA:0:8}
dockerfile: findadispo/frontend/Dockerfile --registry-mirror=10.100.9.70:5000
context: findadispo/frontend --insecure-registry=10.100.9.70:5000
username: --cache=true
from_secret: registry_username --cache-repo=10.100.9.70:5000/cannaiq/cache-findadispo
password: --cache-ttl=168h
from_secret: registry_password
platforms: linux/amd64
provenance: false
depends_on: [] depends_on: []
when: when:
branch: master branch: [master, develop]
event: push event: push
docker-findagram: docker-findagram:
image: woodpeckerci/plugin-docker-buildx image: gcr.io/kaniko-project/executor:debug
settings: commands:
registry: code.cannabrands.app - /kaniko/executor
repo: code.cannabrands.app/creationshop/findagram-frontend --context=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findagram/frontend
tags: --dockerfile=/woodpecker/src/git.spdy.io/Creationshop/cannaiq/findagram/frontend/Dockerfile
- latest --destination=10.100.9.70:5000/cannaiq/findagram:latest
- ${CI_COMMIT_SHA:0:8} --destination=10.100.9.70:5000/cannaiq/findagram:sha-${CI_COMMIT_SHA:0:8}
dockerfile: findagram/frontend/Dockerfile --registry-mirror=10.100.9.70:5000
context: findagram/frontend --insecure-registry=10.100.9.70:5000
username: --cache=true
from_secret: registry_username --cache-repo=10.100.9.70:5000/cannaiq/cache-findagram
password: --cache-ttl=168h
from_secret: registry_password
platforms: linux/amd64
provenance: false
depends_on: [] depends_on: []
when: when:
branch: master branch: [master, develop]
event: push event: push
# =========================================== # ===========================================
# STAGE 3: Deploy and Run Migrations # DEPLOY: Pull from local registry
# =========================================== # ===========================================
deploy: deploy:
image: bitnami/kubectl:latest image: bitnami/kubectl:latest
environment: environment:
KUBECONFIG_CONTENT: K8S_TOKEN:
from_secret: kubeconfig_data from_secret: k8s_token
commands: commands:
- mkdir -p ~/.kube - mkdir -p ~/.kube
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config - |
cat > ~/.kube/config << KUBEEOF
apiVersion: v1
kind: Config
clusters:
- cluster:
certificate-authority-data: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkakNDQVIyZ0F3SUJBZ0lCQURBS0JnZ3Foa2pPUFFRREFqQWpNU0V3SHdZRFZRUUREQmhyTTNNdGMyVnkKZG1WeUxXTmhRREUzTmpVM05UUTNPRE13SGhjTk1qVXhNakUwTWpNeU5qSXpXaGNOTXpVeE1qRXlNak15TmpJegpXakFqTVNFd0h3WURWUVFEREJock0zTXRjMlZ5ZG1WeUxXTmhRREUzTmpVM05UUTNPRE13V1RBVEJnY3Foa2pPClBRSUJCZ2dxaGtqT1BRTUJCd05DQUFRWDRNdFJRTW5lWVJVV0s2cjZ3VEV2WjAxNnV4T3NUR3JJZ013TXVnNGwKajQ1bHZ6ZkM1WE1NY1pESnUxZ0t1dVJhVGxlb0xVOVJnSERIUUI4TUwzNTJvMEl3UURBT0JnTlZIUThCQWY4RQpCQU1DQXFRd0R3WURWUjBUQVFIL0JBVXdBd0VCL3pBZEJnTlZIUTRFRmdRVXIzNDZpNE42TFhzaEZsREhvSlU0CjJ1RjZseGN3Q2dZSUtvWkl6ajBFQXdJRFJ3QXdSQUlnVUtqdWRFQWJyS1JDVHROVXZTc1Rmb3FEaHFSeDM5MkYKTFFSVWlKK0hCVElDSUJqOFIxbG1zSnFSRkRHMEpwMGN4OG5ZZnFCaElRQzh6WWdRdTdBZmR4L3IKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
server: https://10.100.6.10:6443
name: spdy-k3s
contexts:
- context:
cluster: spdy-k3s
namespace: cannaiq
user: cannaiq-admin
name: cannaiq
current-context: cannaiq
users:
- name: cannaiq-admin
user:
token: $K8S_TOKEN
KUBEEOF
- chmod 600 ~/.kube/config - chmod 600 ~/.kube/config
# Deploy backend first - kubectl set image deployment/scraper scraper=10.100.9.70:5000/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper - kubectl rollout status deployment/scraper -n cannaiq --timeout=300s
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s - REPLICAS=$(kubectl get deployment scraper-worker -n cannaiq -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then kubectl scale deployment/scraper-worker --replicas=5 -n cannaiq; fi
# Note: Migrations run automatically at startup via auto-migrate - kubectl set image deployment/scraper-worker worker=10.100.9.70:5000/cannaiq/backend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
# Deploy remaining services - kubectl set image deployment/cannaiq-frontend cannaiq-frontend=10.100.9.70:5000/cannaiq/frontend:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
# Resilience: ensure workers are scaled up if at 0 - kubectl set image deployment/findadispo-frontend findadispo-frontend=10.100.9.70:5000/cannaiq/findadispo:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi - kubectl set image deployment/findagram-frontend findagram-frontend=10.100.9.70:5000/cannaiq/findagram:sha-${CI_COMMIT_SHA:0:8} -n cannaiq
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper - kubectl rollout status deployment/cannaiq-frontend -n cannaiq --timeout=120s
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
depends_on: depends_on:
- docker-backend - docker-backend
- docker-cannaiq - docker-cannaiq
- docker-findadispo - docker-findadispo
- docker-findagram - docker-findagram
when: when:
branch: master branch: [master, develop]
event: push event: push

View File

@@ -1,191 +0,0 @@
steps:
# ===========================================
# PR VALIDATION: Only typecheck changed projects
# ===========================================
typecheck-backend:
image: code.cannabrands.app/creationshop/node:20
commands:
- npm config set cache /npm-cache/backend --global
- cd backend
- npm ci --prefer-offline
- npx tsc --noEmit
volumes:
- npm-cache:/npm-cache
depends_on: []
when:
event: pull_request
path:
include: ['backend/**']
typecheck-cannaiq:
image: code.cannabrands.app/creationshop/node:20
commands:
- npm config set cache /npm-cache/cannaiq --global
- cd cannaiq
- npm ci --prefer-offline
- npx tsc --noEmit
volumes:
- npm-cache:/npm-cache
depends_on: []
when:
event: pull_request
path:
include: ['cannaiq/**']
# findadispo/findagram typechecks skipped - they have || true anyway
# ===========================================
# AUTO-MERGE: Merge PR after all checks pass
# ===========================================
auto-merge:
image: alpine:latest
environment:
GITEA_TOKEN:
from_secret: gitea_token
commands:
- apk add --no-cache curl
- |
echo "Merging PR #${CI_COMMIT_PULL_REQUEST}..."
curl -s -X POST \
-H "Authorization: token $GITEA_TOKEN" \
-H "Content-Type: application/json" \
-d '{"Do":"merge"}' \
"https://code.cannabrands.app/api/v1/repos/Creationshop/dispensary-scraper/pulls/${CI_COMMIT_PULL_REQUEST}/merge"
depends_on:
- typecheck-backend
- typecheck-cannaiq
when:
event: pull_request
# ===========================================
# MASTER DEPLOY: Parallel Docker builds
# ===========================================
docker-backend:
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/dispensary-scraper
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: backend/Dockerfile
context: backend
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/dispensary-scraper:cache,mode=max
build_args:
APP_BUILD_VERSION: ${CI_COMMIT_SHA:0:8}
APP_GIT_SHA: ${CI_COMMIT_SHA}
APP_BUILD_TIME: ${CI_PIPELINE_CREATED}
CONTAINER_IMAGE_TAG: ${CI_COMMIT_SHA:0:8}
depends_on: []
when:
branch: master
event: push
docker-cannaiq:
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/cannaiq-frontend
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: cannaiq/Dockerfile
context: cannaiq
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/cannaiq-frontend:cache,mode=max
depends_on: []
when:
branch: master
event: push
docker-findadispo:
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/findadispo-frontend
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: findadispo/frontend/Dockerfile
context: findadispo/frontend
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findadispo-frontend:cache,mode=max
depends_on: []
when:
branch: master
event: push
docker-findagram:
image: woodpeckerci/plugin-docker-buildx
settings:
registry: code.cannabrands.app
repo: code.cannabrands.app/creationshop/findagram-frontend
tags:
- latest
- ${CI_COMMIT_SHA:0:8}
dockerfile: findagram/frontend/Dockerfile
context: findagram/frontend
username:
from_secret: registry_username
password:
from_secret: registry_password
platforms: linux/amd64
provenance: false
cache_from: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache
cache_to: type=registry,ref=code.cannabrands.app/creationshop/findagram-frontend:cache,mode=max
depends_on: []
when:
branch: master
event: push
# ===========================================
# STAGE 3: Deploy and Run Migrations
# ===========================================
deploy:
image: bitnami/kubectl:latest
environment:
KUBECONFIG_CONTENT:
from_secret: kubeconfig_data
commands:
- mkdir -p ~/.kube
- echo "$KUBECONFIG_CONTENT" | tr -d '[:space:]' | base64 -d > ~/.kube/config
- chmod 600 ~/.kube/config
# Deploy backend first
- kubectl set image deployment/scraper scraper=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl rollout status deployment/scraper -n dispensary-scraper --timeout=300s
# Note: Migrations run automatically at startup via auto-migrate
# Deploy remaining services
# Resilience: ensure workers are scaled up if at 0
- REPLICAS=$(kubectl get deployment scraper-worker -n dispensary-scraper -o jsonpath='{.spec.replicas}'); if [ "$REPLICAS" = "0" ]; then echo "Scaling workers from 0 to 5"; kubectl scale deployment/scraper-worker --replicas=5 -n dispensary-scraper; fi
- kubectl set image deployment/scraper-worker worker=code.cannabrands.app/creationshop/dispensary-scraper:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/cannaiq-frontend cannaiq-frontend=code.cannabrands.app/creationshop/cannaiq-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findadispo-frontend findadispo-frontend=code.cannabrands.app/creationshop/findadispo-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl set image deployment/findagram-frontend findagram-frontend=code.cannabrands.app/creationshop/findagram-frontend:${CI_COMMIT_SHA:0:8} -n dispensary-scraper
- kubectl rollout status deployment/cannaiq-frontend -n dispensary-scraper --timeout=120s
depends_on:
- docker-backend
- docker-cannaiq
- docker-findadispo
- docker-findagram
when:
branch: master
event: push

168
CLAUDE.md
View File

@@ -1,5 +1,8 @@
# Claude Guidelines for CannaiQ # Claude Guidelines for CannaiQ
## CURRENT ENVIRONMENT: PRODUCTION
**We are working in PRODUCTION only.** All database queries and API calls should target the remote production environment, not localhost. Use kubectl port-forward or remote DB connections as needed.
## PERMANENT RULES (NEVER VIOLATE) ## PERMANENT RULES (NEVER VIOLATE)
### 1. NO DELETE ### 1. NO DELETE
@@ -17,6 +20,76 @@ Never deploy unless user explicitly says: "CLAUDE — DEPLOYMENT IS NOW AUTHORIZ
### 5. DB POOL ONLY ### 5. DB POOL ONLY
Never import `src/db/migrate.ts` at runtime. Use `src/db/pool.ts` for DB access. Never import `src/db/migrate.ts` at runtime. Use `src/db/pool.ts` for DB access.
### 6. CI/CD DEPLOYMENT — BATCH CHANGES, PUSH ONCE
**Never manually deploy or check deployment status.** The project uses Woodpecker CI.
**CRITICAL: Each CI build takes 30 minutes. NEVER push incrementally.**
**Workflow:**
1. Make ALL related code changes first
2. Test locally if possible (./setup-local.sh)
3. ONE commit with all changes
4. ONE push to master
5. **STOP** - CI handles the rest
6. Wait for user to confirm deployment worked
**DO NOT:**
- Push multiple small commits (each triggers 30-min build)
- Run `kubectl rollout status` to check deployment
- Run `kubectl logs` to verify new code is running
- Manually restart pods
- Check CI pipeline status
Batch everything, push once, wait for user feedback.
### 7. K8S POD LIMITS — CRITICAL
**EXACTLY 8 PODS** for `scraper-worker` deployment. NEVER CHANGE THIS.
**Replica Count is LOCKED:**
- Always 8 replicas — no more, no less
- NEVER scale down (even temporarily)
- NEVER scale up beyond 8
- If pods are not 8, restore to 8 immediately
**Pods vs Workers:**
- **Pod** = Kubernetes container instance (ALWAYS 8)
- **Worker** = Concurrent task runner INSIDE a pod (controlled by `MAX_CONCURRENT_TASKS` env var)
- Formula: `8 pods × MAX_CONCURRENT_TASKS = 24 total concurrent workers`
**Browser Task Memory Limits:**
- Each Puppeteer/Chrome browser uses ~400 MB RAM
- Pod memory limit is 2 GB
- **MAX_CONCURRENT_TASKS=3** is the safe maximum for browser tasks
- More than 3 concurrent browsers per pod = OOM crash
| Browsers | RAM Used | Status |
|----------|----------|--------|
| 3 | ~1.3 GB | Safe (recommended) |
| 4 | ~1.7 GB | Risky |
| 5+ | >2 GB | OOM crash |
**To increase throughput:** Add more pods (up to 8), NOT more concurrent tasks per pod.
```bash
# CORRECT - scale pods (up to 8)
kubectl scale deployment/scraper-worker -n dispensary-scraper --replicas=8
# WRONG - will cause OOM crashes
kubectl set env deployment/scraper-worker -n dispensary-scraper MAX_CONCURRENT_TASKS=10
```
**If K8s API returns ServiceUnavailable:** STOP IMMEDIATELY. Do not retry. The cluster is overloaded.
### 7. K8S REQUIRES EXPLICIT PERMISSION
**NEVER run kubectl commands without explicit user permission.**
Before running ANY `kubectl` command (scale, rollout, set env, delete, apply, etc.):
1. Tell the user what you want to do
2. Wait for explicit approval
3. Only then execute the command
This applies to ALL kubectl operations - even read-only ones like `kubectl get pods`.
--- ---
## Quick Reference ## Quick Reference
@@ -203,6 +276,101 @@ All other browsers are filtered out. Uses `intoli/user-agents` library for reali
These binaries mimic real browser TLS fingerprints to avoid detection. These binaries mimic real browser TLS fingerprints to avoid detection.
### Evomi Residential Proxy API
Workers use Evomi's residential proxy API for geo-targeted proxies on-demand.
**Priority Order**:
1. Evomi API (if EVOMI_USER/EVOMI_PASS configured)
2. DB proxies (fallback if Evomi not configured)
**Environment Variables**:
| Variable | Description | Default |
|----------|-------------|---------|
| `EVOMI_USER` | API username | - |
| `EVOMI_PASS` | API key | - |
| `EVOMI_HOST` | Proxy host | `rpc.evomi.com` |
| `EVOMI_PORT` | Proxy port | `1000` |
**K8s Secret**: Credentials stored in `scraper-secrets`:
```bash
kubectl get secret scraper-secrets -n dispensary-scraper -o jsonpath='{.data.EVOMI_PASS}' | base64 -d
```
**Proxy URL Format**: `http://{user}_{session}_{geo}:{pass}@{host}:{port}`
- `session`: Worker ID for sticky sessions
- `geo`: State code (e.g., `arizona`, `california`)
**Files**:
- `src/services/crawl-rotator.ts` - `getEvomiConfig()`, `buildEvomiProxyUrl()`
- `src/tasks/task-worker.ts` - Proxy initialization order
---
## Bulk Task Workflow (Updated 2025-12-13)
### Overview
Tasks are created with `scheduled_for = NOW()` by default. Worker-level controls handle pacing - no task-level staggering needed.
### How It Works
```
1. Task created with scheduled_for = NOW()
2. Worker claims task only when scheduled_for <= NOW()
3. Worker runs preflight on EVERY task claim (proxy health check)
4. If preflight passes, worker executes task
5. If preflight fails, task released back to pending for another worker
6. Worker finishes task, polls for next available task
7. Repeat - preflight runs on each new task claim
```
### Worker-Level Throttling
These controls pace task execution - no staggering at task creation time:
| Control | Purpose |
|---------|---------|
| `MAX_CONCURRENT_TASKS` | Limits concurrent tasks per pod (default: 3) |
| Working hours | Restricts when tasks run (configurable per schedule) |
| Preflight checks | Ensures proxy health before each task |
| Per-store locking | Only one active task per dispensary |
### Key Points
- **Preflight is per-task, not per-startup**: Each task claim triggers a new preflight check
- **Worker controls pacing**: Tasks scheduled for NOW() but claimed based on worker capacity
- **Optional staggering**: Pass `stagger_seconds > 0` if you need explicit delays
### API Endpoints
```bash
# Create bulk tasks for specific dispensary IDs
POST /api/tasks/batch/staggered
{
"dispensary_ids": [1, 2, 3, 4],
"role": "product_refresh", # or "product_discovery"
"stagger_seconds": 0, # default: 0 (all NOW)
"platform": "dutchie", # default: "dutchie"
"method": null # "curl" | "http" | null
}
# Create bulk tasks for all stores in a state
POST /api/tasks/crawl-state/:stateCode
{
"stagger_seconds": 0, # default: 0 (all NOW)
"method": "http" # default: "http"
}
```
### Example: Tasks for AZ Stores
```bash
curl -X POST http://localhost:3010/api/tasks/crawl-state/AZ \
-H "Content-Type: application/json"
```
### Related Files
| File | Purpose |
|------|---------|
| `src/tasks/task-service.ts` | `createStaggeredTasks()` method |
| `src/routes/tasks.ts` | API endpoints for batch task creation |
| `src/tasks/task-worker.ts` | Worker task claiming and preflight logic |
--- ---
## Documentation ## Documentation

View File

@@ -1,17 +1,33 @@
# Build stage # Build stage
# Image: code.cannabrands.app/creationshop/dispensary-scraper # Image: git.spdy.io/creationshop/dispensary-scraper
FROM code.cannabrands.app/creationshop/node:20-slim AS builder FROM node:22-slim AS builder
# Install build tools for native modules (bcrypt, sharp)
RUN apt-get update && apt-get install -y \
python3 \
build-essential \
--no-install-recommends \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app WORKDIR /app
COPY package*.json ./ COPY package*.json ./
RUN npm install
# Install dependencies with retry and fallback registry
RUN npm config set fetch-retries 3 && \
npm config set fetch-retry-mintimeout 20000 && \
npm config set fetch-retry-maxtimeout 120000 && \
npm install || \
(npm config set registry https://registry.npmmirror.com && npm install)
COPY . . COPY . .
RUN npm run build RUN npm run build
# Prune dev dependencies for smaller production image
RUN npm prune --production
# Production stage # Production stage
FROM code.cannabrands.app/creationshop/node:20-slim FROM node:22-slim
# Build arguments for version info # Build arguments for version info
ARG APP_BUILD_VERSION=dev ARG APP_BUILD_VERSION=dev
@@ -44,8 +60,7 @@ ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
WORKDIR /app WORKDIR /app
COPY package*.json ./ COPY package*.json ./
RUN npm install --omit=dev COPY --from=builder /app/node_modules ./node_modules
COPY --from=builder /app/dist ./dist COPY --from=builder /app/dist ./dist
# Copy migrations for auto-migrate on startup # Copy migrations for auto-migrate on startup

View File

@@ -99,10 +99,60 @@ src/scraper-v2/*.ts # Entire directory deprecated
|------|---------|--------| |------|---------|--------|
| `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** | | `src/tasks/handlers/payload-fetch.ts` | Fetch products from Dutchie | **PRIMARY** |
| `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** | | `src/tasks/handlers/product-refresh.ts` | Process payload into DB | **PRIMARY** |
| `src/tasks/handlers/entry-point-discovery.ts` | Resolve platform IDs (auto-healing) | **PRIMARY** |
| `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE | | `src/tasks/handlers/menu-detection.ts` | Detect menu type | ACTIVE |
| `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs | ACTIVE | | `src/tasks/handlers/id-resolution.ts` | Resolve platform IDs (legacy) | LEGACY |
| `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE | | `src/tasks/handlers/image-download.ts` | Download product images | ACTIVE |
---
## Transport Rules (CRITICAL)
**Browser-based (Puppeteer) is the DEFAULT transport. curl is ONLY allowed when explicitly specified.**
### Transport Selection
| `task.method` | Transport Used | Notes |
|---------------|----------------|-------|
| `null` | Browser (Puppeteer) | DEFAULT - use this for most tasks |
| `'http'` | Browser (Puppeteer) | Explicit browser request |
| `'curl'` | curl-impersonate | ONLY when explicitly needed |
### Why Browser-First?
1. **Anti-detection**: Puppeteer with StealthPlugin evades bot detection
2. **Session cookies**: Browser maintains session state automatically
3. **Fingerprinting**: Real browser fingerprint (TLS, headers, etc.)
4. **Age gates**: Browser can click through age verification
### Entry Point Discovery Auto-Healing
The `entry_point_discovery` handler uses a healing strategy:
```
1. FIRST: Check dutchie_discovery_locations for existing platform_location_id
- By linked dutchie_discovery_id
- By slug match in discovery data
→ If found, NO network call needed
2. SECOND: Browser-based GraphQL (Puppeteer)
- 5x retries for network/proxy failures
- On HTTP 403: rotate proxy and retry
- On HTTP 404 after 2 attempts: mark as 'removed'
3. HARD FAILURE: After exhausting options → 'needs_investigation'
```
### DO NOT Use curl Unless:
- Task explicitly has `method = 'curl'`
- You're testing curl-impersonate binaries
- The API explicitly requires curl fingerprinting
### Files
| File | Transport | Purpose |
|------|-----------|---------|
| `src/services/puppeteer-preflight.ts` | Browser | Preflight check |
| `src/services/curl-preflight.ts` | curl | Preflight check |
| `src/tasks/handlers/entry-point-discovery.ts` | Browser | Platform ID resolution |
| `src/tasks/handlers/payload-fetch.ts` | Both | Product fetching |
### Database ### Database
| File | Purpose | Status | | File | Purpose | Status |
|------|---------|--------| |------|---------|--------|

343
backend/docs/QUERY_API.md Normal file
View File

@@ -0,0 +1,343 @@
# CannaiQ Query API
Query raw crawl payload data with flexible filters, sorting, and aggregation.
## Base URL
```
https://cannaiq.co/api/payloads
```
## Authentication
Include your API key in the header:
```
X-API-Key: your-api-key
```
---
## Endpoints
### 1. Query Products
Filter and search products from a store's latest crawl data.
```
GET /api/payloads/store/{dispensaryId}/query
```
#### Query Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `brand` | string | Filter by brand name (partial match) |
| `category` | string | Filter by category (flower, vape, edible, etc.) |
| `subcategory` | string | Filter by subcategory |
| `strain_type` | string | Filter by strain (indica, sativa, hybrid, cbd) |
| `in_stock` | boolean | Filter by stock status (true/false) |
| `price_min` | number | Minimum price |
| `price_max` | number | Maximum price |
| `thc_min` | number | Minimum THC percentage |
| `thc_max` | number | Maximum THC percentage |
| `search` | string | Search product name (partial match) |
| `fields` | string | Comma-separated fields to return |
| `limit` | number | Max results (default 100, max 1000) |
| `offset` | number | Skip results for pagination |
| `sort` | string | Sort by: name, price, thc, brand |
| `order` | string | Sort order: asc, desc |
#### Available Fields
When using `fields` parameter, you can request:
- `id` - Product ID
- `name` - Product name
- `brand` - Brand name
- `category` - Product category
- `subcategory` - Product subcategory
- `strain_type` - Indica/Sativa/Hybrid/CBD
- `price` - Current price
- `price_med` - Medical price
- `price_rec` - Recreational price
- `thc` - THC percentage
- `cbd` - CBD percentage
- `weight` - Product weight/size
- `status` - Stock status
- `in_stock` - Boolean in-stock flag
- `image_url` - Product image
- `description` - Product description
#### Examples
**Get all flower products under $40:**
```
GET /api/payloads/store/112/query?category=flower&price_max=40
```
**Search for "Blue Dream" with high THC:**
```
GET /api/payloads/store/112/query?search=blue+dream&thc_min=20
```
**Get only name and price for Alien Labs products:**
```
GET /api/payloads/store/112/query?brand=Alien+Labs&fields=name,price,thc
```
**Get top 10 highest THC products:**
```
GET /api/payloads/store/112/query?sort=thc&order=desc&limit=10
```
**Paginate through in-stock products:**
```
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=0
GET /api/payloads/store/112/query?in_stock=true&limit=50&offset=50
```
#### Response
```json
{
"success": true,
"dispensaryId": 112,
"payloadId": 45,
"fetchedAt": "2025-12-11T10:30:00Z",
"query": {
"filters": {
"brand": "Alien Labs",
"category": null,
"price_max": null
},
"sort": "price",
"order": "asc",
"limit": 100,
"offset": 0
},
"pagination": {
"total": 15,
"returned": 15,
"limit": 100,
"offset": 0,
"has_more": false
},
"products": [
{
"id": "507f1f77bcf86cd799439011",
"name": "Alien Labs - Baklava 3.5g",
"brand": "Alien Labs",
"category": "flower",
"strain_type": "hybrid",
"price": 55,
"thc": "28.5",
"in_stock": true
}
]
}
```
---
### 2. Aggregate Data
Group products and calculate metrics.
```
GET /api/payloads/store/{dispensaryId}/aggregate
```
#### Query Parameters
| Parameter | Type | Description |
|-----------|------|-------------|
| `group_by` | string | **Required.** Field to group by: brand, category, subcategory, strain_type |
| `metrics` | string | Comma-separated metrics (default: count) |
#### Available Metrics
- `count` - Number of products
- `avg_price` - Average price
- `min_price` - Lowest price
- `max_price` - Highest price
- `avg_thc` - Average THC percentage
- `in_stock_count` - Number of in-stock products
#### Examples
**Count products by brand:**
```
GET /api/payloads/store/112/aggregate?group_by=brand
```
**Get price stats by category:**
```
GET /api/payloads/store/112/aggregate?group_by=category&metrics=count,avg_price,min_price,max_price
```
**Get THC averages by strain type:**
```
GET /api/payloads/store/112/aggregate?group_by=strain_type&metrics=count,avg_thc
```
**Brand analysis with stock info:**
```
GET /api/payloads/store/112/aggregate?group_by=brand&metrics=count,avg_price,in_stock_count
```
#### Response
```json
{
"success": true,
"dispensaryId": 112,
"payloadId": 45,
"fetchedAt": "2025-12-11T10:30:00Z",
"groupBy": "brand",
"metrics": ["count", "avg_price"],
"totalProducts": 450,
"groupCount": 85,
"aggregations": [
{
"brand": "Alien Labs",
"count": 15,
"avg_price": 52.33
},
{
"brand": "Connected",
"count": 12,
"avg_price": 48.50
}
]
}
```
---
### 3. Compare Stores (Price Comparison)
Query the same data from multiple stores and compare in your app:
```javascript
// Get flower prices from Store A
const storeA = await fetch('/api/payloads/store/112/query?category=flower&fields=name,brand,price');
// Get flower prices from Store B
const storeB = await fetch('/api/payloads/store/115/query?category=flower&fields=name,brand,price');
// Compare in your app
const dataA = await storeA.json();
const dataB = await storeB.json();
// Find matching products and compare prices
```
---
### 4. Price History
For historical price data, use the snapshots endpoint:
```
GET /api/v1/products/{productId}/history?days=30
```
Or compare payloads over time:
```
GET /api/payloads/store/{dispensaryId}/diff?from={payloadId1}&to={payloadId2}
```
The diff endpoint shows:
- Products added
- Products removed
- Price changes
- Stock changes
---
### 5. List Stores
Get available dispensaries to query:
```
GET /api/stores
```
Returns all stores with their IDs, names, and locations.
---
## Use Cases
### Price Comparison App
```javascript
// 1. Get stores in Arizona
const stores = await fetch('/api/stores?state=AZ').then(r => r.json());
// 2. Query flower prices from each store
const prices = await Promise.all(
stores.map(store =>
fetch(`/api/payloads/store/${store.id}/query?category=flower&fields=name,brand,price`)
.then(r => r.json())
)
);
// 3. Build comparison matrix in your app
```
### Brand Analytics Dashboard
```javascript
// Get brand presence across stores
const brandData = await Promise.all(
storeIds.map(id =>
fetch(`/api/payloads/store/${id}/aggregate?group_by=brand&metrics=count,avg_price`)
.then(r => r.json())
)
);
// Aggregate brand presence across all stores
```
### Deal Finder
```javascript
// Find high-THC flower under $30
const deals = await fetch(
'/api/payloads/store/112/query?category=flower&price_max=30&thc_min=20&in_stock=true&sort=thc&order=desc'
).then(r => r.json());
```
### Inventory Tracker
```javascript
// Get products that went out of stock
const diff = await fetch('/api/payloads/store/112/diff').then(r => r.json());
const outOfStock = diff.details.stockChanges.filter(
p => p.newStatus !== 'Active'
);
```
---
## Rate Limits
- Default: 100 requests/minute per API key
- Contact support for higher limits
## Error Responses
```json
{
"success": false,
"error": "Error message here"
}
```
Common errors:
- `404` - Store or payload not found
- `400` - Missing required parameter
- `401` - Invalid or missing API key
- `429` - Rate limit exceeded

View File

@@ -504,6 +504,103 @@ The Workers Dashboard shows:
| `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling | | `src/routes/worker-registry.ts:148-195` | Heartbeat endpoint handling |
| `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources | | `cannaiq/src/pages/WorkersDashboard.tsx:233-305` | UI components for resources |
## Browser Task Memory Limits (Updated 2025-12)
Browser-based tasks (Puppeteer/Chrome) have strict memory constraints that limit concurrency.
### Why Browser Tasks Are Different
Each browser task launches a Chrome process. Unlike I/O-bound API calls, browsers consume significant RAM:
| Component | RAM Usage |
|-----------|-----------|
| Node.js runtime | ~150 MB |
| Chrome browser (base) | ~200-250 MB |
| Dutchie menu page (loaded) | ~100-150 MB |
| **Per browser total** | **~350-450 MB** |
### Memory Math for Pod Limits
```
Pod memory limit: 2 GB (2000 MB)
Node.js runtime: -150 MB
Safety buffer: -100 MB
────────────────────────────────
Available for browsers: 1750 MB
Per browser + page: ~400 MB
Max browsers: 1750 ÷ 400 = ~4 browsers
Recommended: 3 browsers (leaves headroom for spikes)
```
### MAX_CONCURRENT_TASKS for Browser Tasks
| Browsers per Pod | RAM Used | Risk Level |
|------------------|----------|------------|
| 1 | ~500 MB | Very safe |
| 2 | ~900 MB | Safe |
| **3** | **~1.3 GB** | **Recommended** |
| 4 | ~1.7 GB | Tight (may OOM) |
| 5+ | >2 GB | Will OOM crash |
**CRITICAL**: `MAX_CONCURRENT_TASKS=3` is the maximum safe value for browser tasks with current pod limits.
### Scaling Strategy
Scale **horizontally** (more pods) rather than vertically (more concurrency per pod):
```
┌─────────────────────────────────────────────────────────────────────────┐
│ Cluster: 8 pods × 3 browsers = 24 concurrent tasks │
│ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Pod 0 │ │ Pod 1 │ │ Pod 2 │ │ Pod 3 │ │
│ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Pod 4 │ │ Pod 5 │ │ Pod 6 │ │ Pod 7 │ │
│ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │ 3 browsers │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
└─────────────────────────────────────────────────────────────────────────┘
```
### Browser Lifecycle Per Task
Each task gets a fresh browser with fresh IP/identity:
```
1. Claim task from queue
2. Get fresh proxy from pool
3. Launch browser with proxy
4. Run preflight (verify IP)
5. Execute scrape
6. Close browser
7. Repeat
```
This ensures:
- Fresh IP per task (proxy rotation)
- Fresh fingerprint per task (UA rotation)
- No cookie/session bleed between tasks
- Predictable memory usage
### Increasing Capacity
To handle more concurrent tasks:
1. **Add more pods** (up to 8 per CLAUDE.md limit)
2. **Increase pod memory** (allows 4 browsers per pod):
```yaml
resources:
limits:
memory: "2.5Gi" # from 2Gi
```
**DO NOT** simply increase `MAX_CONCURRENT_TASKS` without also increasing pod memory limits.
## Monitoring ## Monitoring
### Logs ### Logs

View File

@@ -2,7 +2,7 @@ apiVersion: v1
kind: Service kind: Service
metadata: metadata:
name: scraper-worker name: scraper-worker
namespace: dispensary-scraper namespace: cannaiq
labels: labels:
app: scraper-worker app: scraper-worker
spec: spec:
@@ -17,7 +17,7 @@ apiVersion: apps/v1
kind: StatefulSet kind: StatefulSet
metadata: metadata:
name: scraper-worker name: scraper-worker
namespace: dispensary-scraper namespace: cannaiq
spec: spec:
serviceName: scraper-worker serviceName: scraper-worker
replicas: 8 replicas: 8
@@ -37,7 +37,7 @@ spec:
- name: regcred - name: regcred
containers: containers:
- name: worker - name: worker
image: code.cannabrands.app/creationshop/dispensary-scraper:latest image: git.spdy.io/creationshop/cannaiq:latest
imagePullPolicy: Always imagePullPolicy: Always
command: ["node"] command: ["node"]
args: ["dist/tasks/task-worker.js"] args: ["dist/tasks/task-worker.js"]

View File

@@ -0,0 +1,59 @@
-- Migration 085: Trusted Origins Management
-- Allows admin to manage trusted IPs and domains via UI instead of hardcoded values
-- Trusted origins table (IPs and domains that bypass API key auth)
CREATE TABLE IF NOT EXISTS trusted_origins (
id SERIAL PRIMARY KEY,
-- Origin type: 'ip', 'domain', 'pattern'
origin_type VARCHAR(20) NOT NULL CHECK (origin_type IN ('ip', 'domain', 'pattern')),
-- The actual value
-- For ip: '127.0.0.1', '::1', '192.168.1.0/24'
-- For domain: 'cannaiq.co', 'findadispo.com'
-- For pattern: '^https://.*\.cannabrands\.app$' (regex)
origin_value VARCHAR(255) NOT NULL,
-- Description for admin reference
description TEXT,
-- Active flag
active BOOLEAN DEFAULT true,
-- Audit
created_at TIMESTAMPTZ DEFAULT NOW(),
created_by INTEGER REFERENCES users(id),
updated_at TIMESTAMPTZ DEFAULT NOW(),
UNIQUE(origin_type, origin_value)
);
-- Index for quick lookups
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active ON trusted_origins(active) WHERE active = true;
CREATE INDEX IF NOT EXISTS idx_trusted_origins_type ON trusted_origins(origin_type, active);
-- Seed with current hardcoded values
INSERT INTO trusted_origins (origin_type, origin_value, description) VALUES
-- Trusted IPs (localhost)
('ip', '127.0.0.1', 'Localhost IPv4'),
('ip', '::1', 'Localhost IPv6'),
('ip', '::ffff:127.0.0.1', 'Localhost IPv4-mapped IPv6'),
-- Trusted domains
('domain', 'cannaiq.co', 'CannaiQ production'),
('domain', 'www.cannaiq.co', 'CannaiQ production (www)'),
('domain', 'findadispo.com', 'FindADispo production'),
('domain', 'www.findadispo.com', 'FindADispo production (www)'),
('domain', 'findagram.co', 'Findagram production'),
('domain', 'www.findagram.co', 'Findagram production (www)'),
('domain', 'localhost:3010', 'Local backend dev'),
('domain', 'localhost:8080', 'Local admin dev'),
('domain', 'localhost:5173', 'Local Vite dev'),
-- Pattern-based (regex)
('pattern', '^https://.*\.cannabrands\.app$', 'All cannabrands.app subdomains'),
('pattern', '^https://.*\.cannaiq\.co$', 'All cannaiq.co subdomains')
ON CONFLICT (origin_type, origin_value) DO NOTHING;
-- Add comment
COMMENT ON TABLE trusted_origins IS 'IPs and domains that bypass API key authentication. Managed via /admin.';

View File

@@ -0,0 +1,10 @@
-- Migration 086: Add proxy_url column for alternative URL formats
-- Some proxy providers use non-standard URL formats (e.g., host:port:user:pass)
-- This column allows storing the raw URL directly
-- Add proxy_url column - if set, used directly instead of constructing from parts
ALTER TABLE proxies
ADD COLUMN IF NOT EXISTS proxy_url TEXT;
-- Add comment
COMMENT ON COLUMN proxies.proxy_url IS 'Raw proxy URL (if provider uses non-standard format). Takes precedence over constructed URL from host/port/user/pass.';

View File

@@ -0,0 +1,30 @@
-- Migration 088: Extend raw_crawl_payloads for discovery payloads
--
-- Enables saving raw store data from Dutchie discovery crawls.
-- Store discovery returns raw dispensary objects - save them for historical analysis.
-- Add payload_type to distinguish product crawls from discovery crawls
ALTER TABLE raw_crawl_payloads
ADD COLUMN IF NOT EXISTS payload_type VARCHAR(32) NOT NULL DEFAULT 'product';
-- Add state_code for discovery payloads (null for product payloads)
ALTER TABLE raw_crawl_payloads
ADD COLUMN IF NOT EXISTS state_code VARCHAR(10);
-- Add store_count for discovery payloads (alternative to product_count)
ALTER TABLE raw_crawl_payloads
ADD COLUMN IF NOT EXISTS store_count INTEGER;
-- Make dispensary_id nullable for discovery payloads
ALTER TABLE raw_crawl_payloads
ALTER COLUMN dispensary_id DROP NOT NULL;
-- Add index for discovery payload queries
CREATE INDEX IF NOT EXISTS idx_raw_crawl_payloads_type_state
ON raw_crawl_payloads(payload_type, state_code)
WHERE payload_type = 'store_discovery';
-- Comments
COMMENT ON COLUMN raw_crawl_payloads.payload_type IS 'Type: product (default), store_discovery';
COMMENT ON COLUMN raw_crawl_payloads.state_code IS 'State code for discovery payloads (e.g., AZ, MI)';
COMMENT ON COLUMN raw_crawl_payloads.store_count IS 'Number of stores in discovery payload';

View File

@@ -0,0 +1,105 @@
-- Migration 089: Immutable Schedules with Per-State Product Discovery
--
-- Key changes:
-- 1. Add is_immutable column - schedules can be edited but not deleted
-- 2. Add method column - all tasks use 'http' (Puppeteer transport)
-- 3. Store discovery weekly (168h)
-- 4. Per-state product_discovery schedules (4h default)
-- 5. Remove old payload_fetch schedules
-- =====================================================
-- 1) Add new columns to task_schedules
-- =====================================================
ALTER TABLE task_schedules
ADD COLUMN IF NOT EXISTS is_immutable BOOLEAN DEFAULT FALSE;
ALTER TABLE task_schedules
ADD COLUMN IF NOT EXISTS method VARCHAR(10) DEFAULT 'http';
-- =====================================================
-- 2) Update store_discovery to weekly and immutable
-- =====================================================
UPDATE task_schedules
SET interval_hours = 168, -- 7 days
is_immutable = TRUE,
method = 'http',
description = 'Discover new Dutchie stores weekly (HTTP transport)'
WHERE name IN ('store_discovery_dutchie', 'Store Discovery');
-- Insert if doesn't exist
INSERT INTO task_schedules (name, role, interval_hours, priority, description, is_immutable, method, platform, next_run_at)
VALUES ('Store Discovery', 'store_discovery', 168, 5, 'Discover new Dutchie stores weekly (HTTP transport)', TRUE, 'http', 'dutchie', NOW())
ON CONFLICT (name) DO UPDATE SET
interval_hours = 168,
is_immutable = TRUE,
method = 'http',
description = 'Discover new Dutchie stores weekly (HTTP transport)';
-- =====================================================
-- 3) Remove old payload_fetch and product_refresh_all schedules
-- =====================================================
DELETE FROM task_schedules WHERE name IN ('payload_fetch_all', 'product_refresh_all');
-- =====================================================
-- 4) Create per-state product_discovery schedules
-- =====================================================
-- One schedule per state that has dispensaries with active cannabis programs
INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
SELECT
'product_discovery_' || lower(s.code) AS name,
'product_discovery' AS role,
s.code AS state_code,
4 AS interval_hours, -- 4 hours default, editable
10 AS priority,
'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
TRUE AS is_immutable, -- Can edit but not delete
'http' AS method,
CASE WHEN s.is_active THEN TRUE ELSE FALSE END AS enabled,
-- Stagger start times: each state starts 5 minutes after the previous
NOW() + (ROW_NUMBER() OVER (ORDER BY s.code) * INTERVAL '5 minutes') AS next_run_at
FROM states s
WHERE EXISTS (
SELECT 1 FROM dispensaries d
WHERE d.state_id = s.id AND d.crawl_enabled = true
)
ON CONFLICT (name) DO UPDATE SET
is_immutable = TRUE,
method = 'http',
description = EXCLUDED.description;
-- Also create schedules for states that might have stores discovered later
INSERT INTO task_schedules (name, role, state_code, interval_hours, priority, description, is_immutable, method, enabled, next_run_at)
SELECT
'product_discovery_' || lower(s.code) AS name,
'product_discovery' AS role,
s.code AS state_code,
4 AS interval_hours,
10 AS priority,
'Product discovery for ' || s.name || ' dispensaries (HTTP transport)' AS description,
TRUE AS is_immutable,
'http' AS method,
FALSE AS enabled, -- Disabled until stores exist
NOW() + INTERVAL '1 hour'
FROM states s
WHERE NOT EXISTS (
SELECT 1 FROM task_schedules ts WHERE ts.name = 'product_discovery_' || lower(s.code)
)
ON CONFLICT (name) DO NOTHING;
-- =====================================================
-- 5) Make analytics_refresh immutable
-- =====================================================
UPDATE task_schedules
SET is_immutable = TRUE, method = 'http'
WHERE name = 'analytics_refresh';
-- =====================================================
-- 6) Add index for schedule lookups
-- =====================================================
CREATE INDEX IF NOT EXISTS idx_task_schedules_state_code
ON task_schedules(state_code)
WHERE state_code IS NOT NULL;
-- Comments
COMMENT ON COLUMN task_schedules.is_immutable IS 'If TRUE, schedule cannot be deleted (only edited)';
COMMENT ON COLUMN task_schedules.method IS 'Transport method: http (Puppeteer/browser) or curl (axios)';

View File

@@ -0,0 +1,66 @@
-- Migration 090: Add modification tracking columns
--
-- Tracks when records were last modified and by which task.
-- Enables debugging, auditing, and understanding data freshness.
--
-- Columns added:
-- last_modified_at - When the record was last modified by a task
-- last_modified_by_task - Which task role modified it (e.g., 'product_refresh')
-- last_modified_task_id - The specific task ID that modified it
-- ============================================================
-- dispensaries table
-- ============================================================
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
-- Index for querying recently modified records
CREATE INDEX IF NOT EXISTS idx_dispensaries_last_modified
ON dispensaries(last_modified_at DESC)
WHERE last_modified_at IS NOT NULL;
-- Index for querying by task type
CREATE INDEX IF NOT EXISTS idx_dispensaries_modified_by_task
ON dispensaries(last_modified_by_task)
WHERE last_modified_by_task IS NOT NULL;
COMMENT ON COLUMN dispensaries.last_modified_at IS 'Timestamp when this record was last modified by a task';
COMMENT ON COLUMN dispensaries.last_modified_by_task IS 'Task role that last modified this record (e.g., store_discovery_state, entry_point_discovery)';
COMMENT ON COLUMN dispensaries.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';
-- ============================================================
-- store_products table
-- ============================================================
ALTER TABLE store_products
ADD COLUMN IF NOT EXISTS last_modified_at TIMESTAMPTZ;
ALTER TABLE store_products
ADD COLUMN IF NOT EXISTS last_modified_by_task VARCHAR(50);
ALTER TABLE store_products
ADD COLUMN IF NOT EXISTS last_modified_task_id INTEGER;
-- Index for querying recently modified products
CREATE INDEX IF NOT EXISTS idx_store_products_last_modified
ON store_products(last_modified_at DESC)
WHERE last_modified_at IS NOT NULL;
-- Index for querying by task type
CREATE INDEX IF NOT EXISTS idx_store_products_modified_by_task
ON store_products(last_modified_by_task)
WHERE last_modified_by_task IS NOT NULL;
-- Composite index for finding products modified by a specific task
CREATE INDEX IF NOT EXISTS idx_store_products_task_modified
ON store_products(dispensary_id, last_modified_at DESC)
WHERE last_modified_at IS NOT NULL;
COMMENT ON COLUMN store_products.last_modified_at IS 'Timestamp when this record was last modified by a task';
COMMENT ON COLUMN store_products.last_modified_by_task IS 'Task role that last modified this record (e.g., product_refresh, product_discovery)';
COMMENT ON COLUMN store_products.last_modified_task_id IS 'ID of the worker_tasks record that last modified this';

View File

@@ -0,0 +1,26 @@
-- Migration 091: Add store discovery tracking columns
-- Per auto-healing scheme (2025-12-12):
-- Track when store_discovery last updated each dispensary
-- Track when last payload was saved
-- Add last_store_discovery_at to track when store_discovery updated this record
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_store_discovery_at TIMESTAMPTZ;
-- Add last_payload_at to track when last product payload was saved
-- (Complements last_fetch_at which tracks API fetch time)
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS last_payload_at TIMESTAMPTZ;
-- Add index for finding stale discovery data
CREATE INDEX IF NOT EXISTS idx_dispensaries_store_discovery_at
ON dispensaries (last_store_discovery_at DESC NULLS LAST)
WHERE crawl_enabled = true;
-- Add index for finding dispensaries without recent payloads
CREATE INDEX IF NOT EXISTS idx_dispensaries_payload_at
ON dispensaries (last_payload_at DESC NULLS LAST)
WHERE crawl_enabled = true;
COMMENT ON COLUMN dispensaries.last_store_discovery_at IS 'When store_discovery task last updated this record';
COMMENT ON COLUMN dispensaries.last_payload_at IS 'When last product payload was saved for this dispensary';

View File

@@ -0,0 +1,30 @@
-- Fix 3 Trulieve/Harvest stores with incorrect menu URLs
-- These records have NULL or mismatched platform_dispensary_id so store_discovery
-- ON CONFLICT can't update them automatically
UPDATE dispensaries
SET
menu_url = 'https://dutchie.com/dispensary/svaccha-llc-nirvana-center-apache-junction',
updated_at = NOW()
WHERE id = 224;
UPDATE dispensaries
SET
menu_url = 'https://dutchie.com/dispensary/trulieve-of-phoenix-tatum',
updated_at = NOW()
WHERE id = 76;
UPDATE dispensaries
SET
menu_url = 'https://dutchie.com/dispensary/harvest-of-havasu',
updated_at = NOW()
WHERE id = 403;
-- Queue entry_point_discovery tasks to resolve their platform_dispensary_id
-- method='http' ensures only workers that passed http preflight can claim these
INSERT INTO worker_tasks (role, dispensary_id, priority, scheduled_for, method)
VALUES
('entry_point_discovery', 224, 5, NOW(), 'http'),
('entry_point_discovery', 76, 5, NOW(), 'http'),
('entry_point_discovery', 403, 5, NOW(), 'http')
ON CONFLICT DO NOTHING;

View File

@@ -0,0 +1,35 @@
-- Migration 092: Store Intelligence Cache
-- Pre-computed store intelligence data refreshed by analytics_refresh task
-- Eliminates costly aggregation queries on /intelligence/stores endpoint
CREATE TABLE IF NOT EXISTS store_intelligence_cache (
dispensary_id INTEGER PRIMARY KEY REFERENCES dispensaries(id) ON DELETE CASCADE,
-- Basic counts
sku_count INTEGER NOT NULL DEFAULT 0,
brand_count INTEGER NOT NULL DEFAULT 0,
snapshot_count INTEGER NOT NULL DEFAULT 0,
-- Pricing
avg_price_rec NUMERIC(10,2),
avg_price_med NUMERIC(10,2),
min_price NUMERIC(10,2),
max_price NUMERIC(10,2),
-- Category breakdown (JSONB for flexibility)
category_counts JSONB DEFAULT '{}',
-- Timestamps
last_crawl_at TIMESTAMPTZ,
last_refresh_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Metadata
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Index for fast lookups
CREATE INDEX IF NOT EXISTS idx_store_intelligence_cache_refresh
ON store_intelligence_cache (last_refresh_at DESC);
COMMENT ON TABLE store_intelligence_cache IS 'Pre-computed store intelligence metrics, refreshed by analytics_refresh task';
COMMENT ON COLUMN store_intelligence_cache.category_counts IS 'JSON object mapping category_raw to product count';

View File

@@ -0,0 +1,43 @@
-- Migration: 093_fix_mv_state_metrics.sql
-- Purpose: Fix mv_state_metrics to use brand_name_raw and show correct store counts
-- Issues fixed:
-- 1. unique_brands used brand_id (often NULL), now uses brand_name_raw
-- 2. Added out_of_stock_products column
-- 3. dispensary_count now correctly named
-- Drop and recreate the materialized view with correct definition
DROP MATERIALIZED VIEW IF EXISTS mv_state_metrics;
CREATE MATERIALIZED VIEW mv_state_metrics AS
SELECT
d.state,
s.name AS state_name,
COUNT(DISTINCT d.id) AS dispensary_count,
COUNT(DISTINCT CASE WHEN d.menu_type = 'dutchie' THEN d.id END) AS dutchie_stores,
COUNT(DISTINCT CASE WHEN d.crawl_enabled = true THEN d.id END) AS active_stores,
COUNT(sp.id) AS total_products,
COUNT(CASE WHEN COALESCE(sp.is_in_stock, true) THEN sp.id END) AS in_stock_products,
COUNT(CASE WHEN sp.is_in_stock = false THEN sp.id END) AS out_of_stock_products,
COUNT(CASE WHEN sp.is_on_special THEN sp.id END) AS on_special_products,
COUNT(DISTINCT sp.brand_name_raw) FILTER (WHERE sp.brand_name_raw IS NOT NULL AND sp.brand_name_raw != '') AS unique_brands,
COUNT(DISTINCT sp.category_raw) FILTER (WHERE sp.category_raw IS NOT NULL) AS unique_categories,
ROUND(AVG(sp.price_rec) FILTER (WHERE sp.price_rec > 0)::NUMERIC, 2) AS avg_price_rec,
MIN(sp.price_rec) FILTER (WHERE sp.price_rec > 0) AS min_price_rec,
MAX(sp.price_rec) FILTER (WHERE sp.price_rec > 0) AS max_price_rec,
NOW() AS refreshed_at
FROM dispensaries d
LEFT JOIN states s ON d.state = s.code
LEFT JOIN store_products sp ON d.id = sp.dispensary_id
WHERE d.state IS NOT NULL
GROUP BY d.state, s.name;
-- Create unique index for CONCURRENTLY refresh support
CREATE UNIQUE INDEX idx_mv_state_metrics_state ON mv_state_metrics(state);
-- Update refresh function
CREATE OR REPLACE FUNCTION refresh_state_metrics()
RETURNS void AS $$
BEGIN
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_state_metrics;
END;
$$ LANGUAGE plpgsql;

View File

@@ -0,0 +1,516 @@
-- Migration: Import 500 Evomi residential proxies
-- These are sticky-session rotating proxies where password contains session ID
-- Active is set to false - run Test All to verify and activate
-- First, drop the old unique constraint that doesn't account for username/password
ALTER TABLE proxies DROP CONSTRAINT IF EXISTS proxies_host_port_protocol_key;
-- Add new unique constraint that includes username and password
-- This allows multiple entries for the same host:port with different credentials (sessions)
ALTER TABLE proxies ADD CONSTRAINT proxies_host_port_protocol_username_password_key
UNIQUE(host, port, protocol, username, password);
-- Now insert all 500 proxies
INSERT INTO proxies (host, port, protocol, username, password, active, max_connections)
VALUES
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4XRRPF1UQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5UNGX7N7K', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9PSKYP1GU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GZBKKYL2S', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YHJHM0XZU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ESDYQ34CJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GAXUMFKQI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2FF66K4CI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SUYM0R49B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A8VHZMEFP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WNRLH6NXR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SPSB3IUX6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-85N76UU5Q', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-189P3LH2F', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-47DQOAGWY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IBT0QO7M2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UPXOUOH8X', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BFQ1PH75D', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KNTFKRY1J', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5L8IG6DZX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9YE13X0BA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6KBHCHF0I', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CETHHFHZ6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A06J8ST3I', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YFS93P1YR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RB74B3R6C', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2JW27O3EU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCUX84BL0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1A2KSG6HO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4QW8ILV0E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0Q09GH2VL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-16BRXBCYC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9W02B3R4L', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CVAEH76YT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CATOG0Q5I', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F81625L74', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DO4AVTPK4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SBZPXORD5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JA1AWOX03', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0FUJTRSYT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CM1R2RSTB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EHPJZCK1S', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYLKORNAF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-05A8BUD25', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RHM1Q6O4M', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ES5VPCE6Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-P0JEGLP4O', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OC4AX88D0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3BN54IEBV', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ABSC7S550', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LNIJU6R2V', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OYGQPPCOV', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-32YBOHQWR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7KGEMK4SL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FAW8T2EBW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GPV69KI9T', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JPBHSN8M2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VZ1JQOF15', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7DJXXPK1E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JXKQ7JVZ1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-88Q5UQX3B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HAI5K0JFO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-65SUKG0QH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1XFJETX1F', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7ZNUCVCBW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O1DCK15LA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WLTEA65WB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCHAFNK2P', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6ODSZ6CUT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SZ8R2EFH4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9EPPYQREC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MPCBES7UI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FCCPL0XWZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GJ23UYEGI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RQT80689I', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TDQO2AP5E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D5Q5SEUEO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DZN4ZTENM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4HVQ33VK9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F1HJ7GPHA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RM708QD2Y', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-K36N27GM5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O73TS0DAE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-54QXRWEA8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1P6LP0365', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WMZ2ST34E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-175UYF58T', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-W0HTK6F28', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D5275CTIM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IH2IWVZOH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C4VFW7GSA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O9XGULSNA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PJ1W1P5L9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MQQU30KPC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BNPIBZTYV', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7BNRCH922', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5AZLU117B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3PPJ49VJC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FMC8CQO74', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VCHW23CXJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1S4749PCB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0T9DJFZPK', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-L0RMV65W3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FZ1ZZUQNA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6IFJD23DI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZKUEP5XM0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z8KU62CLT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LO77J78X1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-27FBKYRJ4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0TDQTESGW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IMKI89WQ1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ANS65MIJS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O3T2OTT0Y', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MWW6Z1QVM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TT47MX0BB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-59CFKTM14', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DOD61TVZN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RH9Y298WS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X98AATJ7B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C3UMES1W8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8O3J7G3PT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3K4OH78OJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N4A3JMVL1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HK1SRLAC9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y9VLJJXVU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KTTH7R0EC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JKVX01E8T', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HW2VPAHJO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7WZ9UHBH8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JTKFK0CP7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-G3F27NXG5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-K7I2JWYSP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CTUU8UQ0T', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ISHMAP6RQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LVWNZ1LHP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N5CQ1YG2Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XL2XY2SLZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UCRZVFIV1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VLGQFYNEL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YPCDM9O5Y', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R6VA2S25E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4W8X8BBUL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5INDC8M80', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q8RKKOF29', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B5ED3EFBC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8IC5ZXAX1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KCGM25D75', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1MO06IRID', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4QWGUGN6W', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5T9M5KEHT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9KG7W7NZF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NYGN5R2CL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H61OXFCJ2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-30WSQ4EFH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-J36NG6MY2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TZU34ZA7A', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LPWNYL74G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DDJTXOS4Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HFOS4S185', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2MLGIFL1M', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CI5AHX0TC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WSXVCH1WN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8F0C3D06T', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3YZR0664F', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1L2VMWTM0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KPMCB57O7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N6QXQDZV3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-35FAYFWDP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TVZWE2JR8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0WK86IKLF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8WBU6ESHJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGU6UNM01', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-86CXNEQZC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NZ4LFCHE3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZKB6D72RF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BKXNG77NS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3MJ332POD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SL9VEYNJ0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LY8KO43Z8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8KGF1XR1L', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WT6FB54HW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7UQ9JMG5E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KX3L2040U', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HL809F9WU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-T9GU40ERH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I5O2NX3G9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RVOUYU3NO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2T3ETNUKS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SW0B93DZZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PQ55UF3K6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VNRWWHHJB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Q26FZ7EP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZWD9FA90J', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QSGMQX3RZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-83NZ9MEAC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q9QQ4AL37', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QBE9KD60Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NRNUXUO44', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8F0XKQ9P8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-095JV1CJN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WRRSIRUTZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DTUD7IDQI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ASCEAI9LD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YOUM7BJZH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PEG2ZH9J3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WAUW31F78', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GIBZ6U7AQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-63TD9LFBG', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0MH1N9MJB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YFP9RNQIK', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SW4N5162D', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-53MWFB2MP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QWLUKBMIN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JHS6QIX9G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6R04HZ5UD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OUJLT31VN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6BMKW933S', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R4GG84E4Q', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-00XAP630X', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AK97MC2A0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NBS2GKGO5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NVFEWK4S5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MTV3WSYS1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JS8RM4JGW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6NL4QR1XN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4BUUQVSN6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-56WEAAU3M', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WCA56PFTF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TK1QAZP0B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SYZ5ADFXP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-S3VLOUW6G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V2K1V1JWJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MZ6VHV5PQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DRZDQDPN3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-231VVRYYA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-06G3MC88G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WS52I2ZVD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QTNQD55U', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EX7ALECU3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DQN8TVQY6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FJT54OQFI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BLTYUF7QR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8DL2JXDSO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KBAOXIJ4Y', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYL28R5UW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NCRDA8LYB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BQYKXQLXU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PSHCS65MR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-90Y1WFVYZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4GG33NUPW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5Y0A79GED', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RMZHTAD6J', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XBSOJ5I36', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AAJW53VNE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9NYSPSEL6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-94WMY337S', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-35Y3BJQFW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R7WY3TMRC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RXAQVH0F3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EFQ2AVFSB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XPOUJSAVD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RSHPF5NTT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z9402336V', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OI36C5WOJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XEOGV1LVS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QIQDXG9NC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9IY242GGT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PQTEUT52E', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-18NKI3WPS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-34U3QAA49', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-S05TYKBBF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B4J8WCWDD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HR377WC28', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PNRR7S1T2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UNR0N0KJ9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NARQQANBE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8PUL1MYUU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KJPCT1FP3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGC80N0AM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Y1JN8DH3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y56M31T07', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NHYHXQSV1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V30RZVG7L', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CR6V2GSOU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VSAF5O0LJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4F4BF2LFH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ERSMQHXNX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q0TFLZQWS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZXCS6SMHD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JHXYAUGRA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IT2XYWES2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-22UCD94OG', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VGDLQ3K35', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O8AFL8RGX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9RBIZ8G9X', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9JIU0SVBV', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PWRBG0GWU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZME1MX12T', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A7LWRKSJP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5XISX0HD4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5T6EXKD3Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-10ILV351B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FDULBZDIY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SFVR6I980', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FKV8DCZGT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ECRK3M3IZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WMKSLOF39', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HGE60O6AL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RGCWDJOT8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DESWK5KVN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RD593HJ92', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XWNCAO39B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AQ4XGDLX8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2ZOVEA1PL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JF4FUX83X', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CQ228GK3B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XCTMU9I7U', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-M3F37T22W', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ASZUXM9M9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CJVHX24WW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KZT4T898V', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RI128R5TE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HCAG6X9MJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XOQENWBP7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1LTQGM497', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZLVZT4O1G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FTIXTXCIA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O2YE6QNHY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0JPDDBF47', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H1FP1IFJI', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FYBPBMY5B', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F7BWDVC97', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MLENB1LQ4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FT9YNU8UP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5W21Q2O5L', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YM61QWPR3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XXFQJJHZM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H52YKCM9X', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NT56ZNZ54', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DRJY7BMB5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-P6886RPXX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PBXW2EY5K', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5VQCJTM36', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NMM3GGM1J', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1JQQ0CDSA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-R89YI91K4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7L7L9MXOT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-50Z7MXKZS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EGADRZTIB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1DR7H46H6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-O28QZL994', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EYTRWVERM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HAJZAUWJV', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-AGYO3AB89', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-V224329ZM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4YTMSFWYK', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QP40RL1N1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CB1BVAMAH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-9VGXUY02O', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BCPVVKCZ3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VDC3CWZX7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7HWLI21FA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5QWIUJEFM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4C3PBMAIZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QC7DM7PH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-A6R5G3FWV', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3A6WDE12Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0F2LZA9RU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XGBJXMXRX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-5YOGR8PQ1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LPBFBUF3N', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TUSPGR2AY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-G05I8M2FQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-H5NDXJIAQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X8FJL8WQZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KIB2FQRUP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VNV0OYWR7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GKBPM3PB2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-XVPI30KE7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Y3PRMJP51', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KEPP5SBML', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-0PDUZ6QEQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1GHWWFLLE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-149S2TO8O', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1ZB6FSIGE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VCRQTXDZL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-645JVC3XL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2HJ00JBSR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7FZDG2W65', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HD6ANE3LN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2HS1B1J8V', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-IHOHYMDF5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZYZMAFEKF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JO85WX5JE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-RURJDCURW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FZC3BLXPJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-B0YR2LOZ1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6ZFP58ZRK', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UMZDLHQ78', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8A2IHDXY3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-EDYEPWUMT', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X3TM99R12', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DLV0UTQ72', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-SFU0ZYIM0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YAJ6A66NH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-X8CFU41AU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CJ3Z4WP32', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UJBLRQKXA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-T78R8EBGH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DDIH55GNZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F1SSD4NWF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4BE55FKRD', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BG2DFBL46', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MKVMNR7W4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-C3Z4JUGU5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NVP8EEEGQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MQFWP2LU7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BH873JG6H', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3D76651SM', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-KZ7V6KWMP', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CD8NEJFJN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-PWXE9L30H', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1RT95F5LR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q7CEEROE5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q08APOAEG', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NNKREGLXE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YQEG33MKX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VRD9G7H5K', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-68R86GQ1G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BXZUKQL2M', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QM13UD73C', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I7OOGJLNS', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GXDBO1IQJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-JJZPRFMWN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-DBTDFITGW', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VYHL6ASIJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F61NNU332', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-6Z9H72KMC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WVOONDMA9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CXTSTBXN3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-CSMZLC921', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3FTBSARZJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ESHGKBXLY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-E0YLXW5H4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3QFI6UMWE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-23VOWHO88', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-02Q9U5QCH', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3POMNSMB0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NTT8OWUFQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MT5XEHJWX', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ILDOY0PCQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MN9HU4DGO', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1YOPU7GLL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-ZC5BM5MYB', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UD3FXK3I9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-LMDJOV52Y', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-N45X16BSL', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1CBY3Z7QC', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F0D3AO9E6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YQA8GUOD1', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2EE999233', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-D6GD5WT2Y', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-7DFBMLTMY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-J6TJKC6VJ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2AWQ3ZRF4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-4KOVIF5W3', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-3489SXI1U', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-F37VKUHVE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-GHBMAVCE4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-W64U46547', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-1GUJV1MGQ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-M13IOZVI9', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-TX7EVZN1Z', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2PTS2ML8J', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-VTG83RVX7', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-2IOE6BR66', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-I68XZMR23', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Q940UN6MU', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8Y9NFR0N0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-MYP341DZ8', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WJ68VGKAZ', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-819MSDR9H', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-27CGND4VG', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YYDOD47BF', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-YU7F6J8G5', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-HMY16WTCA', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FPWEBRLG2', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-FGE79X0DE', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-551LMZ84R', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UWMBDCTX4', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-BNHQXW9HY', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WB0P5LCN6', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z4P9E1SVG', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-UVW2G9IRN', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-OO93WVLB0', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-NTRIK82TG', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-8TXV42S74', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-Z74LKL50G', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-QQEXNIPTR', false, 1),
('core-residential.evomi.com', 1000, 'http', 'kl8', 'ogh9U1Xe7Gzxzozo4rmP_country-US_session-WGK2VD34L', false, 1)
ON CONFLICT DO NOTHING;

View File

@@ -0,0 +1,81 @@
-- Migration: Auto-retry failed proxies after cooldown period
-- Proxies that fail will be retried after a configurable interval
-- Add last_failed_at column to track when proxy last failed
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS last_failed_at TIMESTAMP;
-- Add retry settings
INSERT INTO settings (key, value, description)
VALUES
('proxy_retry_interval_hours', '4', 'Hours to wait before retrying a failed proxy'),
('proxy_max_failures_before_permanent', '10', 'Max failures before proxy is permanently disabled')
ON CONFLICT (key) DO NOTHING;
-- Create function to get eligible proxies (active OR failed but past retry interval)
CREATE OR REPLACE FUNCTION get_eligible_proxy_ids()
RETURNS TABLE(proxy_id INT) AS $$
DECLARE
retry_hours INT;
BEGIN
-- Get retry interval from settings (default 4 hours)
SELECT COALESCE(value::int, 4) INTO retry_hours
FROM settings WHERE key = 'proxy_retry_interval_hours';
RETURN QUERY
SELECT p.id
FROM proxies p
WHERE p.active = true
OR (
p.active = false
AND p.last_failed_at IS NOT NULL
AND p.last_failed_at < NOW() - (retry_hours || ' hours')::interval
AND p.failure_count < 10 -- Don't retry if too many failures
)
ORDER BY
p.active DESC, -- Prefer active proxies
p.failure_count ASC, -- Then prefer proxies with fewer failures
RANDOM();
END;
$$ LANGUAGE plpgsql;
-- Create scheduled job to periodically re-enable proxies past their retry window
-- This runs every hour and marks proxies as active if they're past retry interval
CREATE OR REPLACE FUNCTION auto_reenable_proxies()
RETURNS INT AS $$
DECLARE
retry_hours INT;
max_failures INT;
reenabled_count INT;
BEGIN
-- Get settings
SELECT COALESCE(value::int, 4) INTO retry_hours
FROM settings WHERE key = 'proxy_retry_interval_hours';
SELECT COALESCE(value::int, 10) INTO max_failures
FROM settings WHERE key = 'proxy_max_failures_before_permanent';
-- Re-enable proxies that have cooled down
UPDATE proxies
SET active = true,
updated_at = NOW()
WHERE active = false
AND last_failed_at IS NOT NULL
AND last_failed_at < NOW() - (retry_hours || ' hours')::interval
AND failure_count < max_failures;
GET DIAGNOSTICS reenabled_count = ROW_COUNT;
IF reenabled_count > 0 THEN
RAISE NOTICE 'Auto-reenabled % proxies after % hour cooldown', reenabled_count, retry_hours;
END IF;
RETURN reenabled_count;
END;
$$ LANGUAGE plpgsql;
-- Add index for efficient querying
CREATE INDEX IF NOT EXISTS idx_proxies_retry
ON proxies(active, last_failed_at, failure_count);
COMMENT ON COLUMN proxies.last_failed_at IS 'Timestamp of last failure - used for auto-retry logic';
COMMENT ON FUNCTION auto_reenable_proxies() IS 'Call periodically to re-enable failed proxies that have cooled down';

View File

@@ -0,0 +1,20 @@
-- Migration: Add trigram indexes for fast ILIKE product searches
-- Enables fast searches on name_raw, brand_name_raw, and description
-- Enable pg_trgm extension if not already enabled
CREATE EXTENSION IF NOT EXISTS pg_trgm;
-- Create GIN trigram indexes for fast ILIKE searches
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_name_trgm
ON store_products USING gin (name_raw gin_trgm_ops);
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_brand_name_trgm
ON store_products USING gin (brand_name_raw gin_trgm_ops);
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_description_trgm
ON store_products USING gin (description gin_trgm_ops);
-- Add comment
COMMENT ON INDEX idx_store_products_name_trgm IS 'Trigram index for fast ILIKE searches on product name';
COMMENT ON INDEX idx_store_products_brand_name_trgm IS 'Trigram index for fast ILIKE searches on brand name';
COMMENT ON INDEX idx_store_products_description_trgm IS 'Trigram index for fast ILIKE searches on description';

View File

@@ -0,0 +1,11 @@
-- Migration: Add indexes for dashboard performance
-- Speeds up the tasks listing query with ORDER BY and JOIN
-- Index for JOIN with worker_registry
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_worker_tasks_worker_id
ON worker_tasks(worker_id)
WHERE worker_id IS NOT NULL;
-- Index for ORDER BY created_at DESC (dashboard listing)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_worker_tasks_created_at_desc
ON worker_tasks(created_at DESC);

View File

@@ -0,0 +1,13 @@
-- Migration: Add stage tracking columns to dispensaries table
-- Required for stage checkpoint feature in task handlers
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_successes INTEGER DEFAULT 0;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS consecutive_failures INTEGER DEFAULT 0;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_successful_crawl_at TIMESTAMPTZ;
-- Indexes for finding stores by status
CREATE INDEX IF NOT EXISTS idx_dispensaries_consecutive_successes
ON dispensaries(consecutive_successes) WHERE consecutive_successes > 0;
CREATE INDEX IF NOT EXISTS idx_dispensaries_consecutive_failures
ON dispensaries(consecutive_failures) WHERE consecutive_failures > 0;

View File

@@ -0,0 +1,68 @@
-- Migration: 099_working_hours.sql
-- Description: Working hours profiles for natural traffic pattern simulation
-- Created: 2024-12-13
-- Working hours table: defines hourly activity weights to mimic natural traffic
CREATE TABLE IF NOT EXISTS working_hours (
id SERIAL PRIMARY KEY,
name VARCHAR(50) UNIQUE NOT NULL,
description TEXT,
-- Hour weights: {"0": 15, "1": 5, ..., "18": 100, ...}
-- Value = percent chance to trigger activity that hour (0-100)
hour_weights JSONB NOT NULL,
-- Day-of-week multipliers (0=Sunday, 6=Saturday)
-- Optional adjustment for weekend vs weekday patterns
dow_weights JSONB DEFAULT '{"0": 90, "1": 100, "2": 100, "3": 100, "4": 100, "5": 110, "6": 95}',
timezone VARCHAR(50) DEFAULT 'America/Phoenix',
enabled BOOLEAN DEFAULT true,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Seed: Natural traffic pattern based on internet usage research
-- Optimized for cannabis dispensary browsing (lunch + after-work peaks)
INSERT INTO working_hours (name, description, timezone, hour_weights) VALUES (
'natural_traffic',
'Mimics natural user browsing patterns - peaks at lunch and 5-7 PM',
'America/Phoenix',
'{
"0": 15,
"1": 5,
"2": 5,
"3": 5,
"4": 5,
"5": 10,
"6": 20,
"7": 30,
"8": 35,
"9": 45,
"10": 50,
"11": 60,
"12": 75,
"13": 65,
"14": 60,
"15": 70,
"16": 80,
"17": 95,
"18": 100,
"19": 100,
"20": 90,
"21": 70,
"22": 45,
"23": 25
}'::jsonb
) ON CONFLICT (name) DO UPDATE SET
hour_weights = EXCLUDED.hour_weights,
description = EXCLUDED.description,
updated_at = NOW();
-- Index for quick lookups
CREATE INDEX IF NOT EXISTS idx_working_hours_name ON working_hours(name);
CREATE INDEX IF NOT EXISTS idx_working_hours_enabled ON working_hours(enabled);
COMMENT ON TABLE working_hours IS 'Activity profiles for natural traffic simulation. Hour weights are percent chance (0-100) to trigger activity.';
COMMENT ON COLUMN working_hours.hour_weights IS 'JSON object mapping hour (0-23) to percent chance (0-100). 100 = always run, 0 = never run.';
COMMENT ON COLUMN working_hours.dow_weights IS 'Optional day-of-week multipliers. 0=Sunday. Applied as (hour_weight * dow_weight / 100).';

View File

@@ -0,0 +1,19 @@
-- Migration: 100_worker_timezone.sql
-- Description: Add timezone column to worker_registry for working hours support
-- Created: 2024-12-13
-- Add timezone column to worker_registry
-- Populated from preflight IP geolocation (e.g., 'America/New_York')
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS timezone VARCHAR(50);
-- Add working_hours_id to link worker to a specific working hours profile
-- NULL means use default 'natural_traffic' profile
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS working_hours_id INTEGER REFERENCES working_hours(id);
-- Index for workers by timezone (useful for capacity planning)
CREATE INDEX IF NOT EXISTS idx_worker_registry_timezone ON worker_registry(timezone);
COMMENT ON COLUMN worker_registry.timezone IS 'IANA timezone from preflight IP geolocation (e.g., America/New_York)';
COMMENT ON COLUMN worker_registry.working_hours_id IS 'Reference to working_hours profile. NULL uses default natural_traffic.';

View File

@@ -0,0 +1,78 @@
-- Migration: 101_worker_preflight_timezone.sql
-- Description: Update update_worker_preflight to extract timezone from fingerprint
-- Created: 2024-12-13
CREATE OR REPLACE FUNCTION public.update_worker_preflight(
p_worker_id character varying,
p_transport character varying,
p_status character varying,
p_ip character varying DEFAULT NULL,
p_response_ms integer DEFAULT NULL,
p_error text DEFAULT NULL,
p_fingerprint jsonb DEFAULT NULL
)
RETURNS void
LANGUAGE plpgsql
AS $function$
DECLARE
v_curl_status VARCHAR(20);
v_http_status VARCHAR(20);
v_overall_status VARCHAR(20);
v_timezone VARCHAR(50);
BEGIN
IF p_transport = 'curl' THEN
UPDATE worker_registry
SET
preflight_curl_status = p_status,
preflight_curl_at = NOW(),
preflight_curl_ms = p_response_ms,
preflight_curl_error = p_error,
curl_ip = p_ip,
updated_at = NOW()
WHERE worker_id = p_worker_id;
ELSIF p_transport = 'http' THEN
-- Extract timezone from fingerprint JSON if present
v_timezone := p_fingerprint->>'detectedTimezone';
UPDATE worker_registry
SET
preflight_http_status = p_status,
preflight_http_at = NOW(),
preflight_http_ms = p_response_ms,
preflight_http_error = p_error,
http_ip = p_ip,
fingerprint_data = COALESCE(p_fingerprint, fingerprint_data),
-- Save extracted timezone
timezone = COALESCE(v_timezone, timezone),
updated_at = NOW()
WHERE worker_id = p_worker_id;
END IF;
-- Update overall preflight status
SELECT preflight_curl_status, preflight_http_status
INTO v_curl_status, v_http_status
FROM worker_registry
WHERE worker_id = p_worker_id;
-- Compute overall status
IF v_curl_status = 'passed' AND v_http_status = 'passed' THEN
v_overall_status := 'passed';
ELSIF v_curl_status = 'passed' OR v_http_status = 'passed' THEN
v_overall_status := 'partial';
ELSIF v_curl_status = 'failed' OR v_http_status = 'failed' THEN
v_overall_status := 'failed';
ELSE
v_overall_status := 'pending';
END IF;
UPDATE worker_registry
SET
preflight_status = v_overall_status,
preflight_at = NOW()
WHERE worker_id = p_worker_id;
END;
$function$;
COMMENT ON FUNCTION update_worker_preflight(varchar, varchar, varchar, varchar, integer, text, jsonb)
IS 'Updates worker preflight status and extracts timezone from fingerprint for working hours';

View File

@@ -0,0 +1,114 @@
-- Migration: 102_check_working_hours.sql
-- Description: Function to check if worker should be available based on working hours
-- Created: 2024-12-13
-- Function to check if a worker should be available for work
-- Returns TRUE if worker passes the probability check for current hour
-- Returns FALSE if worker should sleep/skip this cycle
CREATE OR REPLACE FUNCTION check_working_hours(
p_worker_id VARCHAR,
p_profile_name VARCHAR DEFAULT 'natural_traffic'
)
RETURNS TABLE (
is_available BOOLEAN,
current_hour INTEGER,
hour_weight INTEGER,
worker_timezone VARCHAR,
roll INTEGER,
reason TEXT
)
LANGUAGE plpgsql
AS $function$
DECLARE
v_timezone VARCHAR(50);
v_hour INTEGER;
v_weight INTEGER;
v_dow INTEGER;
v_dow_weight INTEGER;
v_final_weight INTEGER;
v_roll INTEGER;
v_hour_weights JSONB;
v_dow_weights JSONB;
v_profile_enabled BOOLEAN;
BEGIN
-- Get worker's timezone (from preflight)
SELECT wr.timezone INTO v_timezone
FROM worker_registry wr
WHERE wr.worker_id = p_worker_id;
-- Default to America/Phoenix if no timezone set
v_timezone := COALESCE(v_timezone, 'America/Phoenix');
-- Get current hour in worker's timezone
v_hour := EXTRACT(HOUR FROM NOW() AT TIME ZONE v_timezone)::INTEGER;
-- Get day of week (0=Sunday)
v_dow := EXTRACT(DOW FROM NOW() AT TIME ZONE v_timezone)::INTEGER;
-- Get working hours profile
SELECT wh.hour_weights, wh.dow_weights, wh.enabled
INTO v_hour_weights, v_dow_weights, v_profile_enabled
FROM working_hours wh
WHERE wh.name = p_profile_name AND wh.enabled = true;
-- If profile not found or disabled, always available
IF v_hour_weights IS NULL THEN
RETURN QUERY SELECT
TRUE::BOOLEAN,
v_hour,
100::INTEGER,
v_timezone,
0::INTEGER,
'Profile not found or disabled - defaulting to available'::TEXT;
RETURN;
END IF;
-- Get hour weight (default to 50 if hour not specified)
v_weight := COALESCE((v_hour_weights->>v_hour::TEXT)::INTEGER, 50);
-- Get day-of-week weight (default to 100)
v_dow_weight := COALESCE((v_dow_weights->>v_dow::TEXT)::INTEGER, 100);
-- Calculate final weight (hour_weight * dow_weight / 100)
v_final_weight := (v_weight * v_dow_weight / 100);
-- Roll the dice (0-99)
v_roll := floor(random() * 100)::INTEGER;
-- Return result
RETURN QUERY SELECT
(v_roll < v_final_weight)::BOOLEAN AS is_available,
v_hour AS current_hour,
v_final_weight AS hour_weight,
v_timezone AS worker_timezone,
v_roll AS roll,
CASE
WHEN v_roll < v_final_weight THEN
format('Available: rolled %s < %s%% threshold', v_roll, v_final_weight)
ELSE
format('Sleeping: rolled %s >= %s%% threshold', v_roll, v_final_weight)
END AS reason;
END;
$function$;
-- Simplified version that just returns boolean
CREATE OR REPLACE FUNCTION is_worker_available(
p_worker_id VARCHAR,
p_profile_name VARCHAR DEFAULT 'natural_traffic'
)
RETURNS BOOLEAN
LANGUAGE plpgsql
AS $function$
DECLARE
v_result BOOLEAN;
BEGIN
SELECT is_available INTO v_result
FROM check_working_hours(p_worker_id, p_profile_name);
RETURN COALESCE(v_result, TRUE);
END;
$function$;
COMMENT ON FUNCTION check_working_hours(VARCHAR, VARCHAR) IS
'Check if worker should be available based on working hours profile. Returns detailed info.';
COMMENT ON FUNCTION is_worker_available(VARCHAR, VARCHAR) IS
'Simple boolean check if worker passes working hours probability roll.';

View File

@@ -0,0 +1,12 @@
-- Migration: 103_schedule_dispensary_id.sql
-- Description: Add dispensary_id to task_schedules for per-store schedules
-- Created: 2025-12-13
-- Add dispensary_id column for single-store schedules
ALTER TABLE task_schedules
ADD COLUMN IF NOT EXISTS dispensary_id INTEGER REFERENCES dispensaries(id);
-- Index for quick lookups
CREATE INDEX IF NOT EXISTS idx_task_schedules_dispensary_id ON task_schedules(dispensary_id);
COMMENT ON COLUMN task_schedules.dispensary_id IS 'For single-store schedules. If set, only this store is refreshed. If NULL, uses state_code for all stores in state.';

View File

@@ -0,0 +1,25 @@
-- Migration 104: Add source tracking to worker_tasks
-- Purpose: Track WHERE tasks are created from (schedule vs API endpoint)
--
-- All automated task creation should be visible in task_schedules.
-- This column helps identify "phantom" tasks created outside the schedule system.
-- Add source column to worker_tasks
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS source VARCHAR(100);
-- Add source_id column (references schedule_id if from a schedule)
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS source_schedule_id INTEGER REFERENCES task_schedules(id);
-- Add request metadata (IP, user agent) for debugging
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS source_metadata JSONB;
-- Create index for querying by source
CREATE INDEX IF NOT EXISTS idx_worker_tasks_source ON worker_tasks(source);
-- Comment explaining source values
COMMENT ON COLUMN worker_tasks.source IS 'Task creation source: schedule, api_run_now, api_crawl_state, api_batch_staggered, api_batch_az_stores, task_chain, manual';
COMMENT ON COLUMN worker_tasks.source_schedule_id IS 'ID of the schedule that created this task (if source=schedule or source=api_run_now)';
COMMENT ON COLUMN worker_tasks.source_metadata IS 'Request metadata: {ip, user_agent, endpoint, timestamp}';

View File

@@ -0,0 +1,25 @@
-- Migration 105: Add indexes for dashboard performance
-- Purpose: Speed up the /dashboard and /national/summary endpoints
--
-- These queries were identified as slow:
-- 1. COUNT(*) FROM store_product_snapshots WHERE captured_at >= NOW() - INTERVAL '24 hours'
-- 2. National summary aggregate queries
-- Index for snapshot counts by time (used in dashboard)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_product_snapshots_captured_at
ON store_product_snapshots(captured_at DESC);
-- Index for crawl traces by time and success (used in dashboard)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_crawl_traces_started_success
ON crawl_orchestration_traces(started_at DESC, success);
-- Partial index for recent failed crawls (faster for dashboard alerts)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_crawl_traces_recent_failures
ON crawl_orchestration_traces(started_at DESC)
WHERE success = false;
-- Composite index for store_products aggregations by dispensary
-- Helps with national summary state metrics query
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_store_products_dispensary_brand
ON store_products(dispensary_id, brand_name_raw)
WHERE brand_name_raw IS NOT NULL;

View File

@@ -0,0 +1,10 @@
-- Migration: 106_rename_store_discovery_schedule.sql
-- Description: Rename store_discovery_dutchie to 'Store Discovery'
-- Created: 2025-12-13
-- Update the schedule name for better display
-- The platform='dutchie' field is preserved for badge display in UI
UPDATE task_schedules
SET name = 'Store Discovery',
updated_at = NOW()
WHERE name = 'store_discovery_dutchie';

View File

@@ -0,0 +1,23 @@
-- Migration: 107_proxy_tracking.sql
-- Description: Add proxy tracking columns to worker_tasks for geo-targeting visibility
-- Created: 2025-12-13
-- Add proxy tracking columns to worker_tasks
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
ALTER TABLE worker_tasks
ADD COLUMN IF NOT EXISTS proxy_source VARCHAR(10);
-- Comments
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'IP address of proxy used for this task';
COMMENT ON COLUMN worker_tasks.proxy_geo IS 'Geo target used (e.g., "arizona", "phoenix, arizona")';
COMMENT ON COLUMN worker_tasks.proxy_source IS 'Source of proxy: "api" (Evomi dynamic) or "static" (fallback table)';
-- Index for proxy analysis
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip
ON worker_tasks(proxy_ip)
WHERE proxy_ip IS NOT NULL;

View File

@@ -0,0 +1,231 @@
-- Migration: 108_worker_geo_sessions.sql
-- Description: Add geo session tracking to worker_registry for state-based task assignment
-- Created: 2025-12-13
-- Worker geo session columns
-- Worker qualifies with a geo (state/city), then only claims tasks matching that geo
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS current_state VARCHAR(2);
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS current_city VARCHAR(100);
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS geo_session_started_at TIMESTAMPTZ;
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS session_task_count INT DEFAULT 0;
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS session_max_tasks INT DEFAULT 7;
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS proxy_geo VARCHAR(100);
-- Comments
COMMENT ON COLUMN worker_registry.current_state IS 'Worker''s current geo assignment (US state code, e.g., AZ)';
COMMENT ON COLUMN worker_registry.current_city IS 'Worker''s current city assignment (optional, e.g., phoenix)';
COMMENT ON COLUMN worker_registry.geo_session_started_at IS 'When worker''s current geo session started';
COMMENT ON COLUMN worker_registry.session_task_count IS 'Number of tasks completed in current geo session';
COMMENT ON COLUMN worker_registry.session_max_tasks IS 'Max tasks per geo session before re-qualification (default 7)';
COMMENT ON COLUMN worker_registry.proxy_geo IS 'Geo target string used for proxy (e.g., "arizona" or "phoenix, arizona")';
-- Index for finding workers by state
CREATE INDEX IF NOT EXISTS idx_worker_registry_current_state
ON worker_registry(current_state)
WHERE current_state IS NOT NULL;
-- ============================================================
-- UPDATED claim_task FUNCTION
-- Now filters by worker's geo session state
-- ============================================================
CREATE OR REPLACE FUNCTION claim_task(
p_role VARCHAR(50),
p_worker_id VARCHAR(100),
p_curl_passed BOOLEAN DEFAULT TRUE,
p_http_passed BOOLEAN DEFAULT FALSE
) RETURNS worker_tasks AS $$
DECLARE
claimed_task worker_tasks;
worker_state VARCHAR(2);
session_valid BOOLEAN;
session_tasks INT;
max_tasks INT;
BEGIN
-- Get worker's current geo session info
SELECT
current_state,
session_task_count,
session_max_tasks,
(geo_session_started_at IS NOT NULL AND geo_session_started_at > NOW() - INTERVAL '60 minutes')
INTO worker_state, session_tasks, max_tasks, session_valid
FROM worker_registry
WHERE worker_id = p_worker_id;
-- If no valid geo session, or session exhausted, worker can't claim tasks
-- Worker must re-qualify first
IF worker_state IS NULL OR NOT session_valid OR session_tasks >= COALESCE(max_tasks, 7) THEN
RETURN NULL;
END IF;
-- Claim task matching worker's state
UPDATE worker_tasks
SET
status = 'claimed',
worker_id = p_worker_id,
claimed_at = NOW(),
updated_at = NOW()
WHERE id = (
SELECT wt.id FROM worker_tasks wt
JOIN dispensaries d ON wt.dispensary_id = d.id
WHERE wt.role = p_role
AND wt.status = 'pending'
AND (wt.scheduled_for IS NULL OR wt.scheduled_for <= NOW())
-- GEO FILTER: Task's dispensary must match worker's state
AND d.state = worker_state
-- Method compatibility: worker must have passed the required preflight
AND (
wt.method IS NULL -- No preference, any worker can claim
OR (wt.method = 'curl' AND p_curl_passed = TRUE)
OR (wt.method = 'http' AND p_http_passed = TRUE)
)
-- Exclude stores that already have an active task
AND (wt.dispensary_id IS NULL OR wt.dispensary_id NOT IN (
SELECT dispensary_id FROM worker_tasks
WHERE status IN ('claimed', 'running')
AND dispensary_id IS NOT NULL
))
ORDER BY wt.priority DESC, wt.created_at ASC
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING * INTO claimed_task;
-- If task claimed, increment session task count
-- Note: Use claimed_task.id IS NOT NULL (not claimed_task IS NOT NULL)
-- PostgreSQL composite type NULL check quirk
IF claimed_task.id IS NOT NULL THEN
UPDATE worker_registry
SET session_task_count = session_task_count + 1
WHERE worker_id = p_worker_id;
END IF;
RETURN claimed_task;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: assign_worker_geo
-- Assigns a geo session to a worker based on demand
-- Returns the assigned state, or NULL if no tasks available
-- ============================================================
CREATE OR REPLACE FUNCTION assign_worker_geo(
p_worker_id VARCHAR(100)
) RETURNS VARCHAR(2) AS $$
DECLARE
assigned_state VARCHAR(2);
BEGIN
-- Find state with highest demand (pending tasks) and lowest coverage (workers)
SELECT d.state INTO assigned_state
FROM dispensaries d
JOIN worker_tasks wt ON wt.dispensary_id = d.id
LEFT JOIN worker_registry wr ON wr.current_state = d.state
AND wr.status = 'active'
AND wr.geo_session_started_at > NOW() - INTERVAL '60 minutes'
WHERE wt.status = 'pending'
AND d.platform_dispensary_id IS NOT NULL
GROUP BY d.state
ORDER BY
COUNT(wt.id) DESC, -- Most pending tasks first
COUNT(DISTINCT wr.worker_id) ASC -- Fewest workers second
LIMIT 1;
-- If no pending tasks anywhere, return NULL
IF assigned_state IS NULL THEN
RETURN NULL;
END IF;
-- Assign the state to this worker
UPDATE worker_registry
SET
current_state = assigned_state,
current_city = NULL, -- City assigned later if available
geo_session_started_at = NOW(),
session_task_count = 0
WHERE worker_id = p_worker_id;
RETURN assigned_state;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: check_worker_geo_session
-- Returns info about worker's current geo session
-- ============================================================
CREATE OR REPLACE FUNCTION check_worker_geo_session(
p_worker_id VARCHAR(100)
) RETURNS TABLE (
current_state VARCHAR(2),
current_city VARCHAR(100),
session_valid BOOLEAN,
session_tasks_remaining INT,
session_minutes_remaining INT
) AS $$
BEGIN
RETURN QUERY
SELECT
wr.current_state,
wr.current_city,
(wr.geo_session_started_at IS NOT NULL AND wr.geo_session_started_at > NOW() - INTERVAL '60 minutes') as session_valid,
GREATEST(0, wr.session_max_tasks - wr.session_task_count) as session_tasks_remaining,
GREATEST(0, EXTRACT(EPOCH FROM (wr.geo_session_started_at + INTERVAL '60 minutes' - NOW())) / 60)::INT as session_minutes_remaining
FROM worker_registry wr
WHERE wr.worker_id = p_worker_id;
END;
$$ LANGUAGE plpgsql;
-- View for worker thinness per state
-- Derives states from dispensaries table - no external states table dependency
CREATE OR REPLACE VIEW worker_state_capacity AS
WITH active_states AS (
-- Get unique states from dispensaries with valid platform IDs
SELECT DISTINCT state as code
FROM dispensaries
WHERE state IS NOT NULL
AND platform_dispensary_id IS NOT NULL
),
pending_by_state AS (
SELECT d.state, COUNT(*) as count
FROM worker_tasks t
JOIN dispensaries d ON t.dispensary_id = d.id
WHERE t.status = 'pending'
AND d.state IS NOT NULL
GROUP BY d.state
),
workers_by_state AS (
SELECT
current_state,
COUNT(*) as count,
SUM(GREATEST(0, session_max_tasks - session_task_count)) as remaining_capacity
FROM worker_registry
WHERE status IN ('active', 'idle') -- Include both active and idle workers
AND preflight_http_status = 'passed'
AND current_state IS NOT NULL
AND geo_session_started_at > NOW() - INTERVAL '60 minutes'
GROUP BY current_state
)
SELECT
s.code as state,
s.code as state_name, -- Use code as name since we don't have a states lookup table
COALESCE(p.count, 0) as pending_tasks,
COALESCE(w.count, 0) as workers_on_state,
COALESCE(w.remaining_capacity, 0) as remaining_capacity,
CASE
WHEN COALESCE(w.remaining_capacity, 0) = 0 AND COALESCE(p.count, 0) > 0 THEN 'no_coverage'
WHEN COALESCE(w.remaining_capacity, 0) < COALESCE(p.count, 0) THEN 'thin'
ELSE 'ok'
END as status
FROM active_states s
LEFT JOIN pending_by_state p ON p.state = s.code
LEFT JOIN workers_by_state w ON w.current_state = s.code
ORDER BY COALESCE(p.count, 0) DESC;

View File

@@ -0,0 +1,354 @@
-- Migration: 109_worker_identity_pool.sql
-- Description: Identity pool for diverse IP/fingerprint rotation
-- Created: 2025-12-14
--
-- Workers claim identities (IP + fingerprint) from pool.
-- Each identity used for 3-5 tasks, then cools down 2-3 hours.
-- This creates natural browsing patterns - same person doesn't hit 20 stores.
-- ============================================================
-- IDENTITY POOL TABLE
-- ============================================================
CREATE TABLE IF NOT EXISTS worker_identities (
id SERIAL PRIMARY KEY,
-- Evomi session controls the IP
session_id VARCHAR(100) UNIQUE NOT NULL,
-- Detected IP from this session
ip_address INET,
-- Geo targeting
state_code VARCHAR(2) NOT NULL,
city VARCHAR(100), -- City-level targeting for diversity
-- Fingerprint data (UA, timezone, locale, device, etc.)
fingerprint JSONB NOT NULL,
-- Timestamps
created_at TIMESTAMPTZ DEFAULT NOW(),
last_used_at TIMESTAMPTZ,
cooldown_until TIMESTAMPTZ, -- Can't reuse until this time
-- Usage stats
total_tasks_completed INT DEFAULT 0,
total_sessions INT DEFAULT 1, -- How many times this identity has been used
-- Current state
is_active BOOLEAN DEFAULT FALSE, -- Currently claimed by a worker
active_worker_id VARCHAR(100), -- Which worker has it
-- Health tracking
consecutive_failures INT DEFAULT 0,
is_healthy BOOLEAN DEFAULT TRUE -- Set false if IP gets blocked
);
-- Indexes for efficient lookups
CREATE INDEX IF NOT EXISTS idx_worker_identities_state_city
ON worker_identities(state_code, city);
CREATE INDEX IF NOT EXISTS idx_worker_identities_available
ON worker_identities(state_code, is_active, cooldown_until)
WHERE is_healthy = TRUE;
CREATE INDEX IF NOT EXISTS idx_worker_identities_cooldown
ON worker_identities(cooldown_until)
WHERE is_healthy = TRUE AND is_active = FALSE;
-- ============================================================
-- METRO AREA MAPPING
-- For fallback when exact city not available
-- ============================================================
CREATE TABLE IF NOT EXISTS metro_areas (
id SERIAL PRIMARY KEY,
metro_name VARCHAR(100) NOT NULL,
state_code VARCHAR(2) NOT NULL,
city VARCHAR(100) NOT NULL,
is_primary BOOLEAN DEFAULT FALSE, -- Primary city of the metro
UNIQUE(state_code, city)
);
-- Phoenix Metro Area
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
('Phoenix Metro', 'AZ', 'Phoenix', TRUE),
('Phoenix Metro', 'AZ', 'Mesa', FALSE),
('Phoenix Metro', 'AZ', 'Glendale', FALSE),
('Phoenix Metro', 'AZ', 'Tempe', FALSE),
('Phoenix Metro', 'AZ', 'Scottsdale', FALSE),
('Phoenix Metro', 'AZ', 'Chandler', FALSE),
('Phoenix Metro', 'AZ', 'Peoria', FALSE),
('Phoenix Metro', 'AZ', 'El Mirage', FALSE),
('Phoenix Metro', 'AZ', 'Tolleson', FALSE),
('Phoenix Metro', 'AZ', 'Sun City', FALSE),
('Phoenix Metro', 'AZ', 'Apache Junction', FALSE),
('Phoenix Metro', 'AZ', 'Cave Creek', FALSE),
('Phoenix Metro', 'AZ', 'Gilbert', FALSE),
('Phoenix Metro', 'AZ', 'Surprise', FALSE),
('Phoenix Metro', 'AZ', 'Avondale', FALSE),
('Phoenix Metro', 'AZ', 'Goodyear', FALSE),
('Phoenix Metro', 'AZ', 'Buckeye', FALSE),
('Phoenix Metro', 'AZ', 'Queen Creek', FALSE)
ON CONFLICT (state_code, city) DO NOTHING;
-- Tucson Metro Area
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
('Tucson Metro', 'AZ', 'Tucson', TRUE),
('Tucson Metro', 'AZ', 'Oro Valley', FALSE),
('Tucson Metro', 'AZ', 'Marana', FALSE),
('Tucson Metro', 'AZ', 'Sahuarita', FALSE),
('Tucson Metro', 'AZ', 'South Tucson', FALSE)
ON CONFLICT (state_code, city) DO NOTHING;
-- Flagstaff Area
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
('Flagstaff Area', 'AZ', 'Flagstaff', TRUE),
('Flagstaff Area', 'AZ', 'Sedona', FALSE)
ON CONFLICT (state_code, city) DO NOTHING;
-- Prescott Area
INSERT INTO metro_areas (metro_name, state_code, city, is_primary) VALUES
('Prescott Area', 'AZ', 'Prescott', TRUE),
('Prescott Area', 'AZ', 'Prescott Valley', FALSE)
ON CONFLICT (state_code, city) DO NOTHING;
-- ============================================================
-- FUNCTION: claim_identity
-- Claims an available identity for a worker
-- Tries: exact city -> metro area -> any in state -> create new
-- ============================================================
CREATE OR REPLACE FUNCTION claim_identity(
p_worker_id VARCHAR(100),
p_state_code VARCHAR(2),
p_city VARCHAR(100) DEFAULT NULL
) RETURNS worker_identities AS $$
DECLARE
claimed_identity worker_identities;
metro_name_val VARCHAR(100);
primary_city VARCHAR(100);
BEGIN
-- 1. Try exact city match (if city provided)
IF p_city IS NOT NULL THEN
UPDATE worker_identities
SET is_active = TRUE,
active_worker_id = p_worker_id,
last_used_at = NOW()
WHERE id = (
SELECT id FROM worker_identities
WHERE state_code = p_state_code
AND city = p_city
AND is_active = FALSE
AND is_healthy = TRUE
AND (cooldown_until IS NULL OR cooldown_until < NOW())
ORDER BY last_used_at ASC NULLS FIRST
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING * INTO claimed_identity;
IF claimed_identity.id IS NOT NULL THEN
RETURN claimed_identity;
END IF;
END IF;
-- 2. Try metro area fallback
IF p_city IS NOT NULL THEN
-- Find the metro area for this city
SELECT ma.metro_name INTO metro_name_val
FROM metro_areas ma
WHERE ma.state_code = p_state_code AND ma.city = p_city;
IF metro_name_val IS NOT NULL THEN
-- Get primary city of metro
SELECT ma.city INTO primary_city
FROM metro_areas ma
WHERE ma.metro_name = metro_name_val AND ma.is_primary = TRUE;
-- Try any city in same metro
UPDATE worker_identities wi
SET is_active = TRUE,
active_worker_id = p_worker_id,
last_used_at = NOW()
WHERE wi.id = (
SELECT wi2.id FROM worker_identities wi2
JOIN metro_areas ma ON wi2.city = ma.city AND wi2.state_code = ma.state_code
WHERE ma.metro_name = metro_name_val
AND wi2.is_active = FALSE
AND wi2.is_healthy = TRUE
AND (wi2.cooldown_until IS NULL OR wi2.cooldown_until < NOW())
ORDER BY wi2.last_used_at ASC NULLS FIRST
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING * INTO claimed_identity;
IF claimed_identity.id IS NOT NULL THEN
RETURN claimed_identity;
END IF;
END IF;
END IF;
-- 3. Try any identity in state
UPDATE worker_identities
SET is_active = TRUE,
active_worker_id = p_worker_id,
last_used_at = NOW()
WHERE id = (
SELECT id FROM worker_identities
WHERE state_code = p_state_code
AND is_active = FALSE
AND is_healthy = TRUE
AND (cooldown_until IS NULL OR cooldown_until < NOW())
ORDER BY last_used_at ASC NULLS FIRST
LIMIT 1
FOR UPDATE SKIP LOCKED
)
RETURNING * INTO claimed_identity;
-- Return whatever we got (NULL if nothing available - caller should create new)
RETURN claimed_identity;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: release_identity
-- Releases an identity back to pool with cooldown
-- ============================================================
CREATE OR REPLACE FUNCTION release_identity(
p_identity_id INT,
p_tasks_completed INT DEFAULT 0,
p_failed BOOLEAN DEFAULT FALSE
) RETURNS VOID AS $$
DECLARE
cooldown_hours FLOAT;
BEGIN
-- Random cooldown between 2-3 hours for diversity
cooldown_hours := 2 + random(); -- 2.0 to 3.0 hours
UPDATE worker_identities
SET is_active = FALSE,
active_worker_id = NULL,
total_tasks_completed = total_tasks_completed + p_tasks_completed,
total_sessions = total_sessions + 1,
cooldown_until = NOW() + (cooldown_hours || ' hours')::INTERVAL,
consecutive_failures = CASE WHEN p_failed THEN consecutive_failures + 1 ELSE 0 END,
is_healthy = CASE WHEN consecutive_failures >= 3 THEN FALSE ELSE TRUE END
WHERE id = p_identity_id;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: get_pending_tasks_by_geo
-- Gets pending tasks grouped by state/city for identity assignment
-- ============================================================
CREATE OR REPLACE FUNCTION get_pending_tasks_by_geo(
p_limit INT DEFAULT 10
) RETURNS TABLE (
state_code VARCHAR(2),
city VARCHAR(100),
pending_count BIGINT,
available_identities BIGINT
) AS $$
BEGIN
RETURN QUERY
SELECT
d.state as state_code,
d.city,
COUNT(t.id) as pending_count,
(
SELECT COUNT(*) FROM worker_identities wi
WHERE wi.state_code = d.state
AND (wi.city = d.city OR wi.city IS NULL)
AND wi.is_active = FALSE
AND wi.is_healthy = TRUE
AND (wi.cooldown_until IS NULL OR wi.cooldown_until < NOW())
) as available_identities
FROM worker_tasks t
JOIN dispensaries d ON t.dispensary_id = d.id
WHERE t.status = 'pending'
AND d.state IS NOT NULL
GROUP BY d.state, d.city
ORDER BY COUNT(t.id) DESC
LIMIT p_limit;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- FUNCTION: get_tasks_for_identity
-- Gets tasks matching an identity's geo (same city or metro)
-- ============================================================
CREATE OR REPLACE FUNCTION get_tasks_for_identity(
p_state_code VARCHAR(2),
p_city VARCHAR(100),
p_limit INT DEFAULT 5
) RETURNS TABLE (
task_id INT,
dispensary_id INT,
dispensary_name VARCHAR(255),
dispensary_city VARCHAR(100),
role VARCHAR(50)
) AS $$
DECLARE
metro_name_val VARCHAR(100);
BEGIN
-- Find metro area for this city
SELECT ma.metro_name INTO metro_name_val
FROM metro_areas ma
WHERE ma.state_code = p_state_code AND ma.city = p_city;
RETURN QUERY
SELECT
t.id as task_id,
d.id as dispensary_id,
d.name as dispensary_name,
d.city as dispensary_city,
t.role
FROM worker_tasks t
JOIN dispensaries d ON t.dispensary_id = d.id
WHERE t.status = 'pending'
AND d.state = p_state_code
AND (
-- Exact city match
d.city = p_city
-- Or same metro area
OR (metro_name_val IS NOT NULL AND d.city IN (
SELECT ma.city FROM metro_areas ma WHERE ma.metro_name = metro_name_val
))
-- Or any in state if no metro
OR (metro_name_val IS NULL)
)
ORDER BY
CASE WHEN d.city = p_city THEN 0 ELSE 1 END, -- Prefer exact city
t.priority DESC,
t.created_at ASC
LIMIT p_limit;
END;
$$ LANGUAGE plpgsql;
-- ============================================================
-- VIEW: identity_pool_status
-- Overview of identity pool health and availability
-- ============================================================
CREATE OR REPLACE VIEW identity_pool_status AS
SELECT
state_code,
city,
COUNT(*) as total_identities,
COUNT(*) FILTER (WHERE is_active) as active,
COUNT(*) FILTER (WHERE NOT is_active AND is_healthy AND (cooldown_until IS NULL OR cooldown_until < NOW())) as available,
COUNT(*) FILTER (WHERE NOT is_active AND cooldown_until > NOW()) as cooling_down,
COUNT(*) FILTER (WHERE NOT is_healthy) as unhealthy,
SUM(total_tasks_completed) as total_tasks,
AVG(total_tasks_completed)::INT as avg_tasks_per_identity
FROM worker_identities
GROUP BY state_code, city
ORDER BY state_code, city;
-- ============================================================
-- Comments
-- ============================================================
COMMENT ON TABLE worker_identities IS 'Pool of IP/fingerprint identities for worker rotation';
COMMENT ON TABLE metro_areas IS 'City groupings for geographic fallback matching';
COMMENT ON FUNCTION claim_identity IS 'Claim an available identity: exact city -> metro -> state -> NULL (create new)';
COMMENT ON FUNCTION release_identity IS 'Release identity with 2-3 hour random cooldown';
COMMENT ON FUNCTION get_pending_tasks_by_geo IS 'Get pending task counts by state/city';
COMMENT ON FUNCTION get_tasks_for_identity IS 'Get tasks matching identity geo (city or metro area)';

View File

@@ -0,0 +1,92 @@
-- Migration: 110_trusted_origins.sql
-- Description: Trusted origins for API access without token
-- Created: 2024-12-14
--
-- Manages which domains, IPs, and patterns can access the API without a Bearer token.
-- Used by auth middleware to grant 'internal' role to trusted requests.
-- ============================================================
-- TRUSTED ORIGINS TABLE
-- ============================================================
CREATE TABLE IF NOT EXISTS trusted_origins (
id SERIAL PRIMARY KEY,
-- Origin identification
name VARCHAR(100) NOT NULL, -- Friendly name (e.g., "CannaIQ Production")
origin_type VARCHAR(20) NOT NULL, -- 'domain', 'ip', or 'pattern'
origin_value VARCHAR(255) NOT NULL, -- The actual value to match
-- Metadata
description TEXT, -- Optional notes
active BOOLEAN DEFAULT TRUE,
-- Tracking
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW(),
created_by INTEGER REFERENCES users(id),
-- Constraints
CONSTRAINT valid_origin_type CHECK (origin_type IN ('domain', 'ip', 'pattern')),
UNIQUE(origin_type, origin_value)
);
-- Index for active lookups (used by auth middleware)
CREATE INDEX IF NOT EXISTS idx_trusted_origins_active
ON trusted_origins(active) WHERE active = TRUE;
-- Updated at trigger
CREATE OR REPLACE FUNCTION update_trusted_origins_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS trusted_origins_updated_at ON trusted_origins;
CREATE TRIGGER trusted_origins_updated_at
BEFORE UPDATE ON trusted_origins
FOR EACH ROW
EXECUTE FUNCTION update_trusted_origins_updated_at();
-- ============================================================
-- SEED DEFAULT TRUSTED ORIGINS
-- These match the hardcoded fallbacks in middleware.ts
-- ============================================================
-- Production domains
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
('CannaIQ Production', 'domain', 'https://cannaiq.co', 'Main CannaIQ dashboard'),
('CannaIQ Production (www)', 'domain', 'https://www.cannaiq.co', 'Main CannaIQ dashboard with www'),
('FindADispo Production', 'domain', 'https://findadispo.com', 'Consumer dispensary finder'),
('FindADispo Production (www)', 'domain', 'https://www.findadispo.com', 'Consumer dispensary finder with www'),
('Findagram Production', 'domain', 'https://findagram.co', 'Instagram-style cannabis discovery'),
('Findagram Production (www)', 'domain', 'https://www.findagram.co', 'Instagram-style cannabis discovery with www')
ON CONFLICT (origin_type, origin_value) DO NOTHING;
-- Wildcard patterns
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
('CannaBrands Subdomains', 'pattern', '^https://.*\\.cannabrands\\.app$', 'All *.cannabrands.app subdomains'),
('CannaIQ Subdomains', 'pattern', '^https://.*\\.cannaiq\\.co$', 'All *.cannaiq.co subdomains')
ON CONFLICT (origin_type, origin_value) DO NOTHING;
-- Local development
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
('Local API', 'domain', 'http://localhost:3010', 'Local backend API'),
('Local Admin', 'domain', 'http://localhost:8080', 'Local admin dashboard'),
('Local Vite Dev', 'domain', 'http://localhost:5173', 'Vite dev server')
ON CONFLICT (origin_type, origin_value) DO NOTHING;
-- Trusted IPs (localhost)
INSERT INTO trusted_origins (name, origin_type, origin_value, description) VALUES
('Localhost IPv4', 'ip', '127.0.0.1', 'Local machine'),
('Localhost IPv6', 'ip', '::1', 'Local machine IPv6'),
('Localhost IPv6 Mapped', 'ip', '::ffff:127.0.0.1', 'IPv6-mapped IPv4 localhost')
ON CONFLICT (origin_type, origin_value) DO NOTHING;
-- ============================================================
-- COMMENTS
-- ============================================================
COMMENT ON TABLE trusted_origins IS 'Domains, IPs, and patterns that can access API without token';
COMMENT ON COLUMN trusted_origins.origin_type IS 'domain = exact URL match, ip = IP address, pattern = regex pattern';
COMMENT ON COLUMN trusted_origins.origin_value IS 'For domain: full URL. For ip: IP address. For pattern: regex string';

View File

@@ -0,0 +1,35 @@
-- Migration: 111_system_settings.sql
-- Description: System settings table for runtime configuration
-- Created: 2024-12-14
CREATE TABLE IF NOT EXISTS system_settings (
key VARCHAR(100) PRIMARY KEY,
value TEXT NOT NULL,
description TEXT,
updated_at TIMESTAMPTZ DEFAULT NOW(),
updated_by INTEGER REFERENCES users(id)
);
-- Task pool gate - controls whether workers can claim tasks
INSERT INTO system_settings (key, value, description) VALUES
('task_pool_open', 'true', 'When false, workers cannot claim new tasks from the pool')
ON CONFLICT (key) DO NOTHING;
-- Updated at trigger
CREATE OR REPLACE FUNCTION update_system_settings_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS system_settings_updated_at ON system_settings;
CREATE TRIGGER system_settings_updated_at
BEFORE UPDATE ON system_settings
FOR EACH ROW
EXECUTE FUNCTION update_system_settings_updated_at();
COMMENT ON TABLE system_settings IS 'Runtime configuration settings';
COMMENT ON COLUMN system_settings.key IS 'Setting name (e.g., task_pool_open)';
COMMENT ON COLUMN system_settings.value IS 'Setting value as string';

View File

@@ -0,0 +1,390 @@
-- Migration 112: Worker Session Pool
-- Tracks IP/fingerprint sessions with exclusive locks and cooldowns
-- Each worker claims up to 6 tasks, uses one IP/fingerprint for those tasks,
-- then retires the session (8hr cooldown before IP can be reused)
-- Drop old identity pool tables if they exist (replacing with simpler session model)
DROP TABLE IF EXISTS worker_identity_claims CASCADE;
DROP TABLE IF EXISTS worker_identities CASCADE;
-- Worker sessions: tracks active and cooling down IP/fingerprint pairs
CREATE TABLE IF NOT EXISTS worker_sessions (
id SERIAL PRIMARY KEY,
-- IP and fingerprint for this session
ip_address VARCHAR(45) NOT NULL,
fingerprint_hash VARCHAR(64) NOT NULL,
fingerprint_data JSONB,
-- Geo this session is locked to
state_code VARCHAR(2) NOT NULL,
city VARCHAR(100),
-- Ownership
worker_id VARCHAR(255), -- NULL if in cooldown
-- Status: 'active' (locked to worker), 'cooldown' (8hr wait), 'available'
status VARCHAR(20) NOT NULL DEFAULT 'available',
-- Task tracking
tasks_claimed INTEGER NOT NULL DEFAULT 0,
tasks_completed INTEGER NOT NULL DEFAULT 0,
tasks_failed INTEGER NOT NULL DEFAULT 0,
max_tasks INTEGER NOT NULL DEFAULT 6,
-- Timestamps
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
locked_at TIMESTAMPTZ, -- When worker locked this session
retired_at TIMESTAMPTZ, -- When session was retired (cooldown starts)
cooldown_until TIMESTAMPTZ, -- When session becomes available again
-- Constraints
CONSTRAINT valid_status CHECK (status IN ('active', 'cooldown', 'available'))
);
-- Indexes for fast lookups
CREATE INDEX IF NOT EXISTS idx_worker_sessions_ip ON worker_sessions(ip_address);
CREATE INDEX IF NOT EXISTS idx_worker_sessions_status ON worker_sessions(status);
CREATE INDEX IF NOT EXISTS idx_worker_sessions_worker ON worker_sessions(worker_id) WHERE worker_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_worker_sessions_geo ON worker_sessions(state_code, city);
CREATE INDEX IF NOT EXISTS idx_worker_sessions_cooldown ON worker_sessions(cooldown_until) WHERE status = 'cooldown';
-- Unique constraint: only one active session per IP
CREATE UNIQUE INDEX IF NOT EXISTS idx_worker_sessions_active_ip
ON worker_sessions(ip_address)
WHERE status = 'active';
-- Function: Check if IP is available (not active, not in cooldown)
CREATE OR REPLACE FUNCTION is_ip_available(check_ip VARCHAR(45))
RETURNS BOOLEAN AS $$
BEGIN
-- Check if any session has this IP and is either active or in cooldown
RETURN NOT EXISTS (
SELECT 1 FROM worker_sessions
WHERE ip_address = check_ip
AND (status = 'active' OR (status = 'cooldown' AND cooldown_until > NOW()))
);
END;
$$ LANGUAGE plpgsql;
-- Function: Lock a session to a worker
-- Returns the session if successful, NULL if IP not available
CREATE OR REPLACE FUNCTION lock_worker_session(
p_worker_id VARCHAR(255),
p_ip_address VARCHAR(45),
p_state_code VARCHAR(2),
p_city VARCHAR(100) DEFAULT NULL,
p_fingerprint_hash VARCHAR(64) DEFAULT NULL,
p_fingerprint_data JSONB DEFAULT NULL
) RETURNS worker_sessions AS $$
DECLARE
v_session worker_sessions;
BEGIN
-- First check if IP is available
IF NOT is_ip_available(p_ip_address) THEN
RETURN NULL;
END IF;
-- Try to find an existing available session for this IP
SELECT * INTO v_session
FROM worker_sessions
WHERE ip_address = p_ip_address
AND status = 'available'
FOR UPDATE SKIP LOCKED
LIMIT 1;
IF v_session.id IS NOT NULL THEN
-- Reuse existing session
UPDATE worker_sessions SET
worker_id = p_worker_id,
status = 'active',
state_code = p_state_code,
city = p_city,
fingerprint_hash = COALESCE(p_fingerprint_hash, fingerprint_hash),
fingerprint_data = COALESCE(p_fingerprint_data, fingerprint_data),
tasks_claimed = 0,
tasks_completed = 0,
tasks_failed = 0,
locked_at = NOW(),
retired_at = NULL,
cooldown_until = NULL
WHERE id = v_session.id
RETURNING * INTO v_session;
ELSE
-- Create new session
INSERT INTO worker_sessions (
ip_address, fingerprint_hash, fingerprint_data,
state_code, city, worker_id, status, locked_at
) VALUES (
p_ip_address, COALESCE(p_fingerprint_hash, md5(random()::text)),
p_fingerprint_data, p_state_code, p_city, p_worker_id, 'active', NOW()
)
RETURNING * INTO v_session;
END IF;
RETURN v_session;
END;
$$ LANGUAGE plpgsql;
-- Function: Retire a session (start 8hr cooldown)
CREATE OR REPLACE FUNCTION retire_worker_session(p_worker_id VARCHAR(255))
RETURNS BOOLEAN AS $$
DECLARE
v_updated INTEGER;
BEGIN
UPDATE worker_sessions SET
status = 'cooldown',
worker_id = NULL,
retired_at = NOW(),
cooldown_until = NOW() + INTERVAL '8 hours'
WHERE worker_id = p_worker_id
AND status = 'active';
GET DIAGNOSTICS v_updated = ROW_COUNT;
RETURN v_updated > 0;
END;
$$ LANGUAGE plpgsql;
-- Function: Release expired cooldowns
CREATE OR REPLACE FUNCTION release_expired_sessions()
RETURNS INTEGER AS $$
DECLARE
v_released INTEGER;
BEGIN
UPDATE worker_sessions SET
status = 'available'
WHERE status = 'cooldown'
AND cooldown_until <= NOW();
GET DIAGNOSTICS v_released = ROW_COUNT;
RETURN v_released;
END;
$$ LANGUAGE plpgsql;
-- Function: Get session for worker
CREATE OR REPLACE FUNCTION get_worker_session(p_worker_id VARCHAR(255))
RETURNS worker_sessions AS $$
SELECT * FROM worker_sessions
WHERE worker_id = p_worker_id AND status = 'active'
LIMIT 1;
$$ LANGUAGE sql;
-- Function: Increment task counters
CREATE OR REPLACE FUNCTION session_task_completed(p_worker_id VARCHAR(255))
RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_sessions SET
tasks_completed = tasks_completed + 1
WHERE worker_id = p_worker_id AND status = 'active';
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION session_task_failed(p_worker_id VARCHAR(255))
RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_sessions SET
tasks_failed = tasks_failed + 1
WHERE worker_id = p_worker_id AND status = 'active';
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION session_task_claimed(p_worker_id VARCHAR(255), p_count INTEGER DEFAULT 1)
RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_sessions SET
tasks_claimed = tasks_claimed + p_count
WHERE worker_id = p_worker_id AND status = 'active';
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
-- Scheduled job hint: Run release_expired_sessions() every 5 minutes
COMMENT ON FUNCTION release_expired_sessions() IS
'Run periodically to release sessions from cooldown. Suggest: every 5 minutes.';
-- =============================================================================
-- ATOMIC TASK CLAIMING
-- Worker claims up to 6 tasks for same geo in one transaction
-- =============================================================================
-- Function: Claim up to N tasks for same geo
-- Returns claimed tasks with dispensary geo info
CREATE OR REPLACE FUNCTION claim_tasks_batch(
p_worker_id VARCHAR(255),
p_max_tasks INTEGER DEFAULT 6,
p_role VARCHAR(50) DEFAULT NULL -- Optional role filter
) RETURNS TABLE (
task_id INTEGER,
role VARCHAR(50),
dispensary_id INTEGER,
dispensary_name VARCHAR(255),
city VARCHAR(100),
state_code VARCHAR(2),
platform VARCHAR(50),
method VARCHAR(20)
) AS $$
DECLARE
v_target_state VARCHAR(2);
v_target_city VARCHAR(100);
v_claimed_count INTEGER := 0;
BEGIN
-- First, find the geo with most pending tasks to target
SELECT d.state, d.city INTO v_target_state, v_target_city
FROM worker_tasks t
JOIN dispensaries d ON t.dispensary_id = d.id
WHERE t.status = 'pending'
AND (p_role IS NULL OR t.role = p_role)
GROUP BY d.state, d.city
ORDER BY COUNT(*) DESC
LIMIT 1;
-- No pending tasks
IF v_target_state IS NULL THEN
RETURN;
END IF;
-- Claim up to p_max_tasks for this geo
RETURN QUERY
WITH claimed AS (
UPDATE worker_tasks t SET
status = 'claimed',
worker_id = p_worker_id,
claimed_at = NOW()
FROM (
SELECT t2.id
FROM worker_tasks t2
JOIN dispensaries d ON t2.dispensary_id = d.id
WHERE t2.status = 'pending'
AND d.state = v_target_state
AND (v_target_city IS NULL OR d.city = v_target_city)
AND (p_role IS NULL OR t2.role = p_role)
ORDER BY t2.priority DESC, t2.created_at ASC
FOR UPDATE SKIP LOCKED
LIMIT p_max_tasks
) sub
WHERE t.id = sub.id
RETURNING t.id, t.role, t.dispensary_id, t.method
)
SELECT
c.id as task_id,
c.role,
c.dispensary_id,
d.name as dispensary_name,
d.city,
d.state as state_code,
d.platform,
c.method
FROM claimed c
JOIN dispensaries d ON c.dispensary_id = d.id;
END;
$$ LANGUAGE plpgsql;
-- Function: Release claimed tasks back to pending (for failed worker or cleanup)
CREATE OR REPLACE FUNCTION release_claimed_tasks(p_worker_id VARCHAR(255))
RETURNS INTEGER AS $$
DECLARE
v_released INTEGER;
BEGIN
UPDATE worker_tasks SET
status = 'pending',
worker_id = NULL,
claimed_at = NULL
WHERE worker_id = p_worker_id
AND status IN ('claimed', 'running');
GET DIAGNOSTICS v_released = ROW_COUNT;
RETURN v_released;
END;
$$ LANGUAGE plpgsql;
-- Function: Mark task as running
CREATE OR REPLACE FUNCTION start_task(p_task_id INTEGER, p_worker_id VARCHAR(255))
RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_tasks SET
status = 'running',
started_at = NOW()
WHERE id = p_task_id
AND worker_id = p_worker_id
AND status = 'claimed';
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
-- Function: Mark task as completed (leaves pool)
CREATE OR REPLACE FUNCTION complete_task(
p_task_id INTEGER,
p_worker_id VARCHAR(255),
p_result JSONB DEFAULT NULL
) RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_tasks SET
status = 'completed',
completed_at = NOW(),
result = p_result
WHERE id = p_task_id
AND worker_id = p_worker_id
AND status = 'running';
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
-- Function: Mark task as failed (returns to pending for retry)
CREATE OR REPLACE FUNCTION fail_task(
p_task_id INTEGER,
p_worker_id VARCHAR(255),
p_error TEXT DEFAULT NULL,
p_max_retries INTEGER DEFAULT 3
) RETURNS BOOLEAN AS $$
DECLARE
v_retry_count INTEGER;
BEGIN
-- Get current retry count
SELECT COALESCE(retry_count, 0) INTO v_retry_count
FROM worker_tasks WHERE id = p_task_id;
IF v_retry_count >= p_max_retries THEN
-- Max retries exceeded - mark as permanently failed
UPDATE worker_tasks SET
status = 'failed',
completed_at = NOW(),
error_message = p_error,
retry_count = v_retry_count + 1
WHERE id = p_task_id
AND worker_id = p_worker_id;
ELSE
-- Return to pending for retry
UPDATE worker_tasks SET
status = 'pending',
worker_id = NULL,
claimed_at = NULL,
started_at = NULL,
error_message = p_error,
retry_count = v_retry_count + 1
WHERE id = p_task_id
AND worker_id = p_worker_id;
END IF;
RETURN FOUND;
END;
$$ LANGUAGE plpgsql;
-- Add retry_count column if not exists
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'worker_tasks' AND column_name = 'retry_count'
) THEN
ALTER TABLE worker_tasks ADD COLUMN retry_count INTEGER NOT NULL DEFAULT 0;
END IF;
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'worker_tasks' AND column_name = 'claimed_at'
) THEN
ALTER TABLE worker_tasks ADD COLUMN claimed_at TIMESTAMPTZ;
END IF;
END $$;

View File

@@ -0,0 +1,381 @@
-- Task Pools: Group tasks by geo area for worker assignment
-- Workers claim a pool, get proxy for that geo, then pull tasks from pool
-- ============================================================================
-- TASK POOLS TABLE
-- ============================================================================
-- Each pool represents a metro area (e.g., Phoenix AZ = 100mi radius)
-- Dispensaries are assigned to pools based on location
-- Workers claim a pool, not individual tasks
CREATE TABLE IF NOT EXISTS task_pools (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL UNIQUE, -- e.g., 'phoenix_az'
display_name VARCHAR(100) NOT NULL, -- e.g., 'Phoenix, AZ'
state_code VARCHAR(2) NOT NULL, -- e.g., 'AZ'
city VARCHAR(100) NOT NULL, -- e.g., 'Phoenix'
latitude DECIMAL(10, 6) NOT NULL, -- pool center lat
longitude DECIMAL(10, 6) NOT NULL, -- pool center lng
radius_miles INTEGER DEFAULT 100, -- pool radius (100mi default)
timezone VARCHAR(50) NOT NULL, -- e.g., 'America/Phoenix'
is_active BOOLEAN DEFAULT true,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- Index for active pools
CREATE INDEX IF NOT EXISTS idx_task_pools_active ON task_pools(is_active) WHERE is_active = true;
-- ============================================================================
-- LINK DISPENSARIES TO POOLS
-- ============================================================================
-- Add pool_id to dispensaries table
ALTER TABLE dispensaries
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
-- Index for pool membership
CREATE INDEX IF NOT EXISTS idx_dispensaries_pool ON dispensaries(pool_id) WHERE pool_id IS NOT NULL;
-- ============================================================================
-- WORKER POOL ASSIGNMENT
-- ============================================================================
-- Track which pool a worker is currently assigned to
ALTER TABLE worker_registry
ADD COLUMN IF NOT EXISTS current_pool_id INTEGER REFERENCES task_pools(id),
ADD COLUMN IF NOT EXISTS pool_claimed_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS pool_stores_visited INTEGER DEFAULT 0,
ADD COLUMN IF NOT EXISTS pool_max_stores INTEGER DEFAULT 6;
-- ============================================================================
-- SEED INITIAL POOLS
-- ============================================================================
-- Major cannabis markets with approximate center coordinates
INSERT INTO task_pools (name, display_name, state_code, city, latitude, longitude, timezone, radius_miles) VALUES
-- Arizona
('phoenix_az', 'Phoenix, AZ', 'AZ', 'Phoenix', 33.4484, -112.0740, 'America/Phoenix', 100),
('tucson_az', 'Tucson, AZ', 'AZ', 'Tucson', 32.2226, -110.9747, 'America/Phoenix', 75),
-- California
('los_angeles_ca', 'Los Angeles, CA', 'CA', 'Los Angeles', 34.0522, -118.2437, 'America/Los_Angeles', 100),
('san_francisco_ca', 'San Francisco, CA', 'CA', 'San Francisco', 37.7749, -122.4194, 'America/Los_Angeles', 75),
('san_diego_ca', 'San Diego, CA', 'CA', 'San Diego', 32.7157, -117.1611, 'America/Los_Angeles', 75),
('sacramento_ca', 'Sacramento, CA', 'CA', 'Sacramento', 38.5816, -121.4944, 'America/Los_Angeles', 75),
-- Colorado
('denver_co', 'Denver, CO', 'CO', 'Denver', 39.7392, -104.9903, 'America/Denver', 100),
-- Illinois
('chicago_il', 'Chicago, IL', 'IL', 'Chicago', 41.8781, -87.6298, 'America/Chicago', 100),
-- Massachusetts
('boston_ma', 'Boston, MA', 'MA', 'Boston', 42.3601, -71.0589, 'America/New_York', 75),
-- Michigan
('detroit_mi', 'Detroit, MI', 'MI', 'Detroit', 42.3314, -83.0458, 'America/Detroit', 100),
-- Nevada
('las_vegas_nv', 'Las Vegas, NV', 'NV', 'Las Vegas', 36.1699, -115.1398, 'America/Los_Angeles', 75),
('reno_nv', 'Reno, NV', 'NV', 'Reno', 39.5296, -119.8138, 'America/Los_Angeles', 50),
-- New Jersey
('newark_nj', 'Newark, NJ', 'NJ', 'Newark', 40.7357, -74.1724, 'America/New_York', 75),
-- New York
('new_york_ny', 'New York, NY', 'NY', 'New York', 40.7128, -74.0060, 'America/New_York', 75),
-- Oklahoma
('oklahoma_city_ok', 'Oklahoma City, OK', 'OK', 'Oklahoma City', 35.4676, -97.5164, 'America/Chicago', 100),
('tulsa_ok', 'Tulsa, OK', 'OK', 'Tulsa', 36.1540, -95.9928, 'America/Chicago', 75),
-- Oregon
('portland_or', 'Portland, OR', 'OR', 'Portland', 45.5152, -122.6784, 'America/Los_Angeles', 75),
-- Washington
('seattle_wa', 'Seattle, WA', 'WA', 'Seattle', 47.6062, -122.3321, 'America/Los_Angeles', 100)
ON CONFLICT (name) DO NOTHING;
-- ============================================================================
-- FUNCTION: Assign dispensary to nearest pool
-- ============================================================================
CREATE OR REPLACE FUNCTION assign_dispensary_to_pool(disp_id INTEGER)
RETURNS INTEGER AS $$
DECLARE
disp_lat DECIMAL(10,6);
disp_lng DECIMAL(10,6);
nearest_pool_id INTEGER;
BEGIN
-- Get dispensary coordinates
SELECT latitude, longitude INTO disp_lat, disp_lng
FROM dispensaries WHERE id = disp_id;
IF disp_lat IS NULL OR disp_lng IS NULL THEN
RETURN NULL;
END IF;
-- Find nearest active pool within radius
-- Using Haversine approximation (accurate enough for 100mi)
SELECT id INTO nearest_pool_id
FROM task_pools
WHERE is_active = true
AND (
3959 * acos(
cos(radians(latitude)) * cos(radians(disp_lat)) *
cos(radians(disp_lng) - radians(longitude)) +
sin(radians(latitude)) * sin(radians(disp_lat))
)
) <= radius_miles
ORDER BY (
3959 * acos(
cos(radians(latitude)) * cos(radians(disp_lat)) *
cos(radians(disp_lng) - radians(longitude)) +
sin(radians(latitude)) * sin(radians(disp_lat))
)
)
LIMIT 1;
-- Update dispensary
IF nearest_pool_id IS NOT NULL THEN
UPDATE dispensaries SET pool_id = nearest_pool_id WHERE id = disp_id;
END IF;
RETURN nearest_pool_id;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- FUNCTION: Assign all dispensaries to pools (batch)
-- ============================================================================
CREATE OR REPLACE FUNCTION assign_all_dispensaries_to_pools()
RETURNS TABLE(assigned INTEGER, unassigned INTEGER) AS $$
DECLARE
assigned_count INTEGER := 0;
unassigned_count INTEGER := 0;
disp RECORD;
pool_id INTEGER;
BEGIN
FOR disp IN SELECT id FROM dispensaries WHERE pool_id IS NULL AND latitude IS NOT NULL LOOP
pool_id := assign_dispensary_to_pool(disp.id);
IF pool_id IS NOT NULL THEN
assigned_count := assigned_count + 1;
ELSE
unassigned_count := unassigned_count + 1;
END IF;
END LOOP;
RETURN QUERY SELECT assigned_count, unassigned_count;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- FUNCTION: Get pools with pending tasks
-- ============================================================================
CREATE OR REPLACE FUNCTION get_pools_with_pending_tasks()
RETURNS TABLE(
pool_id INTEGER,
pool_name VARCHAR(100),
display_name VARCHAR(100),
state_code VARCHAR(2),
city VARCHAR(100),
timezone VARCHAR(50),
pending_count BIGINT,
store_count BIGINT
) AS $$
BEGIN
RETURN QUERY
SELECT
tp.id as pool_id,
tp.name as pool_name,
tp.display_name,
tp.state_code,
tp.city,
tp.timezone,
COUNT(DISTINCT t.id) as pending_count,
COUNT(DISTINCT d.id) as store_count
FROM task_pools tp
JOIN dispensaries d ON d.pool_id = tp.id
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
WHERE tp.is_active = true
GROUP BY tp.id, tp.name, tp.display_name, tp.state_code, tp.city, tp.timezone
HAVING COUNT(DISTINCT t.id) > 0
ORDER BY COUNT(DISTINCT t.id) DESC;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- FUNCTION: Worker claims a pool
-- ============================================================================
CREATE OR REPLACE FUNCTION worker_claim_pool(
p_worker_id VARCHAR(100),
p_pool_id INTEGER DEFAULT NULL
)
RETURNS TABLE(
pool_id INTEGER,
pool_name VARCHAR(100),
display_name VARCHAR(100),
state_code VARCHAR(2),
city VARCHAR(100),
latitude DECIMAL(10,6),
longitude DECIMAL(10,6),
timezone VARCHAR(50)
) AS $$
DECLARE
claimed_pool_id INTEGER;
BEGIN
-- If no pool specified, pick the one with most pending tasks
IF p_pool_id IS NULL THEN
SELECT tp.id INTO claimed_pool_id
FROM task_pools tp
JOIN dispensaries d ON d.pool_id = tp.id
JOIN tasks t ON t.dispensary_id = d.id AND t.status = 'pending'
WHERE tp.is_active = true
GROUP BY tp.id
ORDER BY COUNT(DISTINCT t.id) DESC
LIMIT 1;
ELSE
claimed_pool_id := p_pool_id;
END IF;
IF claimed_pool_id IS NULL THEN
RETURN;
END IF;
-- Update worker registry with pool assignment
UPDATE worker_registry
SET
current_pool_id = claimed_pool_id,
pool_claimed_at = NOW(),
pool_stores_visited = 0,
pool_max_stores = 6,
updated_at = NOW()
WHERE worker_id = p_worker_id;
-- Return pool info
RETURN QUERY
SELECT
tp.id,
tp.name,
tp.display_name,
tp.state_code,
tp.city,
tp.latitude,
tp.longitude,
tp.timezone
FROM task_pools tp
WHERE tp.id = claimed_pool_id;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- FUNCTION: Pull tasks from worker's pool (up to 6 stores)
-- ============================================================================
CREATE OR REPLACE FUNCTION pull_tasks_from_pool(
p_worker_id VARCHAR(100),
p_max_stores INTEGER DEFAULT 6
)
RETURNS TABLE(
task_id INTEGER,
dispensary_id INTEGER,
dispensary_name VARCHAR(255),
role VARCHAR(50),
platform VARCHAR(50),
method VARCHAR(20)
) AS $$
DECLARE
worker_pool_id INTEGER;
stores_visited INTEGER;
max_stores INTEGER;
stores_remaining INTEGER;
BEGIN
-- Get worker's current pool and store count
SELECT current_pool_id, pool_stores_visited, pool_max_stores
INTO worker_pool_id, stores_visited, max_stores
FROM worker_registry
WHERE worker_id = p_worker_id;
IF worker_pool_id IS NULL THEN
RAISE EXCEPTION 'Worker % has no pool assigned', p_worker_id;
END IF;
stores_remaining := max_stores - stores_visited;
IF stores_remaining <= 0 THEN
RETURN; -- Worker exhausted
END IF;
-- Claim tasks from pool (one task per store, up to remaining capacity)
RETURN QUERY
WITH available_stores AS (
SELECT DISTINCT ON (d.id)
t.id as task_id,
d.id as dispensary_id,
d.name as dispensary_name,
t.role,
t.platform,
t.method
FROM tasks t
JOIN dispensaries d ON d.id = t.dispensary_id
WHERE d.pool_id = worker_pool_id
AND t.status = 'pending'
AND t.scheduled_for <= NOW()
ORDER BY d.id, t.priority DESC, t.created_at ASC
LIMIT stores_remaining
),
claimed AS (
UPDATE tasks
SET
status = 'claimed',
claimed_by = p_worker_id,
claimed_at = NOW()
WHERE id IN (SELECT task_id FROM available_stores)
RETURNING id
)
SELECT
av.task_id,
av.dispensary_id,
av.dispensary_name,
av.role,
av.platform,
av.method
FROM available_stores av
WHERE av.task_id IN (SELECT id FROM claimed);
-- Update worker store count
UPDATE worker_registry
SET
pool_stores_visited = pool_stores_visited + (
SELECT COUNT(DISTINCT dispensary_id)
FROM tasks
WHERE claimed_by = p_worker_id AND status = 'claimed'
),
updated_at = NOW()
WHERE worker_id = p_worker_id;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- FUNCTION: Worker releases pool (exhausted or done)
-- ============================================================================
CREATE OR REPLACE FUNCTION worker_release_pool(p_worker_id VARCHAR(100))
RETURNS BOOLEAN AS $$
BEGIN
UPDATE worker_registry
SET
current_pool_id = NULL,
pool_claimed_at = NULL,
pool_stores_visited = 0,
current_state = NULL,
current_city = NULL,
updated_at = NOW()
WHERE worker_id = p_worker_id;
RETURN true;
END;
$$ LANGUAGE plpgsql;
-- ============================================================================
-- RUN: Assign existing dispensaries to pools
-- ============================================================================
SELECT * FROM assign_all_dispensaries_to_pools();

View File

@@ -0,0 +1,10 @@
-- Migration 114: Add pool_id to task_schedules
-- Allows schedules to target specific geo pools
ALTER TABLE task_schedules
ADD COLUMN IF NOT EXISTS pool_id INTEGER REFERENCES task_pools(id);
-- Index for pool-based schedule queries
CREATE INDEX IF NOT EXISTS idx_task_schedules_pool ON task_schedules(pool_id) WHERE pool_id IS NOT NULL;
COMMENT ON COLUMN task_schedules.pool_id IS 'Optional geo pool filter. NULL = all pools/dispensaries matching state_code';

View File

@@ -0,0 +1,17 @@
-- Migration: Add proxy_ip tracking to worker_tasks
-- Purpose: Prevent same IP from hitting multiple stores on same platform simultaneously
--
-- Anti-detection measure: Dutchie/Jane may flag if same IP makes requests
-- for multiple different stores. This column lets us track and prevent that.
-- Add proxy_ip column to track which proxy IP is being used for each task
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS proxy_ip VARCHAR(45);
-- Index for quick lookup of active tasks by proxy IP
-- Used to check: "Is this IP already hitting another store?"
CREATE INDEX IF NOT EXISTS idx_worker_tasks_proxy_ip_active
ON worker_tasks (proxy_ip, platform)
WHERE status IN ('claimed', 'running') AND proxy_ip IS NOT NULL;
-- Comment
COMMENT ON COLUMN worker_tasks.proxy_ip IS 'Proxy IP assigned to this task. Used to prevent same IP hitting multiple stores on same platform.';

View File

@@ -0,0 +1,16 @@
-- Migration: Add source tracking columns to worker_tasks
-- Purpose: Track where tasks originated from (schedule, API, manual)
-- Add source tracking columns
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source VARCHAR(50);
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source_schedule_id INTEGER REFERENCES task_schedules(id);
ALTER TABLE worker_tasks ADD COLUMN IF NOT EXISTS source_metadata JSONB;
-- Index for tracking tasks by schedule
CREATE INDEX IF NOT EXISTS idx_worker_tasks_source_schedule
ON worker_tasks (source_schedule_id) WHERE source_schedule_id IS NOT NULL;
-- Comments
COMMENT ON COLUMN worker_tasks.source IS 'Origin of task: schedule, api, manual, chain';
COMMENT ON COLUMN worker_tasks.source_schedule_id IS 'ID of schedule that created this task';
COMMENT ON COLUMN worker_tasks.source_metadata IS 'Additional metadata about task origin';

View File

@@ -0,0 +1,32 @@
-- Migration 117: Per-store crawl interval scheduling
-- Adds columns for configurable per-store crawl intervals
-- Part of Real-Time Inventory Tracking feature
-- Per-store crawl interval (NULL = use state schedule default 4h)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS crawl_interval_minutes INT DEFAULT NULL;
-- When this store should next be crawled (used by high-frequency scheduler)
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS next_crawl_at TIMESTAMPTZ DEFAULT NULL;
-- Track last request time to enforce minimum spacing
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_crawl_started_at TIMESTAMPTZ DEFAULT NULL;
-- Change tracking for optimization
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_inventory_hash TEXT DEFAULT NULL;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_price_hash TEXT DEFAULT NULL;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS inventory_changes_24h INT DEFAULT 0;
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS price_changes_24h INT DEFAULT 0;
-- Index for scheduler query: find stores due for high-frequency crawl
CREATE INDEX IF NOT EXISTS idx_dispensaries_next_crawl
ON dispensaries(next_crawl_at)
WHERE crawl_interval_minutes IS NOT NULL AND crawl_enabled = TRUE;
-- Comment for documentation
COMMENT ON COLUMN dispensaries.crawl_interval_minutes IS 'Custom crawl interval in minutes. NULL = use state schedule (4h default). Set to 15/30/60 for high-frequency tracking.';
COMMENT ON COLUMN dispensaries.next_crawl_at IS 'When this store should next be crawled. Updated after each crawl with interval + jitter.';
COMMENT ON COLUMN dispensaries.last_crawl_started_at IS 'When the last crawl task was created. Used to enforce minimum spacing.';
COMMENT ON COLUMN dispensaries.last_inventory_hash IS 'Hash of inventory state from last crawl. Used to detect changes and skip unchanged payloads.';
COMMENT ON COLUMN dispensaries.last_price_hash IS 'Hash of price state from last crawl. Used to detect price changes.';
COMMENT ON COLUMN dispensaries.inventory_changes_24h IS 'Number of inventory changes detected in last 24h. Indicates store volatility.';
COMMENT ON COLUMN dispensaries.price_changes_24h IS 'Number of price changes detected in last 24h.';

View File

@@ -0,0 +1,48 @@
-- Migration 118: Inventory snapshots table
-- Lightweight per-product tracking for sales velocity estimation
-- Part of Real-Time Inventory Tracking feature
CREATE TABLE IF NOT EXISTS inventory_snapshots (
id BIGSERIAL PRIMARY KEY,
dispensary_id INT NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
product_id TEXT NOT NULL, -- provider_product_id (normalized across platforms)
captured_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Platform (for debugging/filtering)
platform TEXT NOT NULL, -- 'dutchie' | 'jane' | 'treez'
-- Inventory fields (normalized from all platforms)
quantity_available INT, -- Dutchie: quantityAvailable, Jane: quantity, Treez: quantityAvailable
is_below_threshold BOOLEAN, -- Dutchie: isBelowThreshold, Jane: computed, Treez: lowInventory
status TEXT, -- Active/Inactive/available
-- Price fields (normalized)
price_rec NUMERIC(10,2), -- recreational price
price_med NUMERIC(10,2), -- medical price (if different)
-- Denormalized for fast queries
brand_name TEXT,
category TEXT,
product_name TEXT
);
-- Primary query: get snapshots for a store over time
CREATE INDEX idx_inv_snap_store_time ON inventory_snapshots(dispensary_id, captured_at DESC);
-- Delta calculation: get consecutive snapshots for a product
CREATE INDEX idx_inv_snap_product_time ON inventory_snapshots(dispensary_id, product_id, captured_at DESC);
-- Brand-level analytics
CREATE INDEX idx_inv_snap_brand_time ON inventory_snapshots(brand_name, captured_at DESC) WHERE brand_name IS NOT NULL;
-- Platform filtering
CREATE INDEX idx_inv_snap_platform ON inventory_snapshots(platform, captured_at DESC);
-- Retention cleanup (30 days) - simple index, cleanup job handles the WHERE
CREATE INDEX IF NOT EXISTS idx_inv_snap_cleanup ON inventory_snapshots(captured_at);
-- Comments
COMMENT ON TABLE inventory_snapshots IS 'Lightweight inventory snapshots for sales velocity tracking. Retained 30 days.';
COMMENT ON COLUMN inventory_snapshots.product_id IS 'Provider product ID, normalized across platforms';
COMMENT ON COLUMN inventory_snapshots.platform IS 'Menu platform: dutchie, jane, or treez';
COMMENT ON COLUMN inventory_snapshots.quantity_available IS 'Current quantity in stock (Dutchie: quantityAvailable, Jane: quantity)';

View File

@@ -0,0 +1,53 @@
-- Migration 119: Product visibility events table
-- Tracks OOS, brand drops, and other notable events for alerts
-- Part of Real-Time Inventory Tracking feature
CREATE TABLE IF NOT EXISTS product_visibility_events (
id SERIAL PRIMARY KEY,
dispensary_id INT NOT NULL REFERENCES dispensaries(id) ON DELETE CASCADE,
-- Product identification (null for brand-level events)
product_id TEXT, -- provider_product_id
product_name TEXT, -- For display in alerts
-- Brand (always populated)
brand_name TEXT,
-- Event details
event_type TEXT NOT NULL, -- 'oos', 'back_in_stock', 'brand_dropped', 'brand_added', 'price_change'
detected_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
-- Context
previous_quantity INT, -- For OOS events: what quantity was before
previous_price NUMERIC(10,2), -- For price change events
new_price NUMERIC(10,2), -- For price change events
price_change_pct NUMERIC(5,2), -- Percentage change (e.g., -15.5 for 15.5% decrease)
-- Platform
platform TEXT, -- 'dutchie' | 'jane' | 'treez'
-- Alert status
notified BOOLEAN DEFAULT FALSE, -- Has external system been notified?
acknowledged_at TIMESTAMPTZ, -- When user acknowledged the alert
acknowledged_by TEXT -- User who acknowledged
);
-- Primary query: recent events by store
CREATE INDEX idx_vis_events_store_time ON product_visibility_events(dispensary_id, detected_at DESC);
-- Alert queries: unnotified events
CREATE INDEX idx_vis_events_unnotified ON product_visibility_events(notified, detected_at DESC) WHERE notified = FALSE;
-- Event type filtering
CREATE INDEX idx_vis_events_type ON product_visibility_events(event_type, detected_at DESC);
-- Brand-level queries
CREATE INDEX idx_vis_events_brand ON product_visibility_events(brand_name, event_type, detected_at DESC) WHERE brand_name IS NOT NULL;
-- Cleanup (90 days retention) - simple index, cleanup job handles the WHERE
CREATE INDEX IF NOT EXISTS idx_vis_events_cleanup ON product_visibility_events(detected_at);
-- Comments
COMMENT ON TABLE product_visibility_events IS 'Notable inventory events for alerting. OOS, brand drops, significant price changes. Retained 90 days.';
COMMENT ON COLUMN product_visibility_events.event_type IS 'Event type: oos (out of stock), back_in_stock, brand_dropped, brand_added, price_change';
COMMENT ON COLUMN product_visibility_events.notified IS 'Whether external systems (other apps) have been notified of this event';

View File

@@ -0,0 +1,13 @@
-- Migration 120: Daily baseline tracking
-- Track when each store's daily baseline payload was last saved
-- Part of Real-Time Inventory Tracking feature
-- Add column to track last baseline save time
ALTER TABLE dispensaries ADD COLUMN IF NOT EXISTS last_baseline_at TIMESTAMPTZ DEFAULT NULL;
-- Index for finding stores that need baselines
CREATE INDEX IF NOT EXISTS idx_dispensaries_baseline ON dispensaries(last_baseline_at)
WHERE crawl_enabled = TRUE;
-- Comment
COMMENT ON COLUMN dispensaries.last_baseline_at IS 'Timestamp of last daily baseline payload save. Baselines saved once per day between 12:01 AM - 3:00 AM.';

View File

@@ -35,6 +35,8 @@
"puppeteer-extra-plugin-stealth": "^2.11.2", "puppeteer-extra-plugin-stealth": "^2.11.2",
"sharp": "^0.32.0", "sharp": "^0.32.0",
"socks-proxy-agent": "^8.0.2", "socks-proxy-agent": "^8.0.2",
"swagger-jsdoc": "^6.2.8",
"swagger-ui-express": "^5.0.1",
"user-agents": "^1.1.669", "user-agents": "^1.1.669",
"uuid": "^9.0.1", "uuid": "^9.0.1",
"zod": "^3.22.4" "zod": "^3.22.4"
@@ -47,11 +49,53 @@
"@types/node": "^20.10.5", "@types/node": "^20.10.5",
"@types/node-cron": "^3.0.11", "@types/node-cron": "^3.0.11",
"@types/pg": "^8.15.6", "@types/pg": "^8.15.6",
"@types/swagger-jsdoc": "^6.0.4",
"@types/swagger-ui-express": "^4.1.8",
"@types/uuid": "^9.0.7", "@types/uuid": "^9.0.7",
"tsx": "^4.7.0", "tsx": "^4.7.0",
"typescript": "^5.3.3" "typescript": "^5.3.3"
} }
}, },
"node_modules/@apidevtools/json-schema-ref-parser": {
"version": "9.1.2",
"resolved": "https://registry.npmjs.org/@apidevtools/json-schema-ref-parser/-/json-schema-ref-parser-9.1.2.tgz",
"integrity": "sha512-r1w81DpR+KyRWd3f+rk6TNqMgedmAxZP5v5KWlXQWlgMUUtyEJch0DKEci1SorPMiSeM8XPl7MZ3miJ60JIpQg==",
"dependencies": {
"@jsdevtools/ono": "^7.1.3",
"@types/json-schema": "^7.0.6",
"call-me-maybe": "^1.0.1",
"js-yaml": "^4.1.0"
}
},
"node_modules/@apidevtools/openapi-schemas": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/@apidevtools/openapi-schemas/-/openapi-schemas-2.1.0.tgz",
"integrity": "sha512-Zc1AlqrJlX3SlpupFGpiLi2EbteyP7fXmUOGup6/DnkRgjP9bgMM/ag+n91rsv0U1Gpz0H3VILA/o3bW7Ua6BQ==",
"engines": {
"node": ">=10"
}
},
"node_modules/@apidevtools/swagger-methods": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-methods/-/swagger-methods-3.0.2.tgz",
"integrity": "sha512-QAkD5kK2b1WfjDS/UQn/qQkbwF31uqRjPTrsCs5ZG9BQGAkjwvqGFjjPqAuzac/IYzpPtRzjCP1WrTuAIjMrXg=="
},
"node_modules/@apidevtools/swagger-parser": {
"version": "10.0.3",
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-parser/-/swagger-parser-10.0.3.tgz",
"integrity": "sha512-sNiLY51vZOmSPFZA5TF35KZ2HbgYklQnTSDnkghamzLb3EkNtcQnrBQEj5AOCxHpTtXpqMCRM1CrmV2rG6nw4g==",
"dependencies": {
"@apidevtools/json-schema-ref-parser": "^9.0.6",
"@apidevtools/openapi-schemas": "^2.0.4",
"@apidevtools/swagger-methods": "^3.0.2",
"@jsdevtools/ono": "^7.1.3",
"call-me-maybe": "^1.0.1",
"z-schema": "^5.0.1"
},
"peerDependencies": {
"openapi-types": ">=7"
}
},
"node_modules/@babel/code-frame": { "node_modules/@babel/code-frame": {
"version": "7.27.1", "version": "7.27.1",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz", "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz",
@@ -494,6 +538,11 @@
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz", "resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ==" "integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
}, },
"node_modules/@jsdevtools/ono": {
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/@jsdevtools/ono/-/ono-7.1.3.tgz",
"integrity": "sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg=="
},
"node_modules/@jsep-plugin/assignment": { "node_modules/@jsep-plugin/assignment": {
"version": "1.3.0", "version": "1.3.0",
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz", "resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
@@ -761,6 +810,12 @@
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
}, },
"node_modules/@scarf/scarf": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/@scarf/scarf/-/scarf-1.4.0.tgz",
"integrity": "sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ==",
"hasInstallScript": true
},
"node_modules/@tootallnate/quickjs-emscripten": { "node_modules/@tootallnate/quickjs-emscripten": {
"version": "0.23.0", "version": "0.23.0",
"resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz", "resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz",
@@ -855,6 +910,11 @@
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz", "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==" "integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
}, },
"node_modules/@types/json-schema": {
"version": "7.0.15",
"resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
"integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA=="
},
"node_modules/@types/jsonwebtoken": { "node_modules/@types/jsonwebtoken": {
"version": "9.0.10", "version": "9.0.10",
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz", "resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
@@ -960,6 +1020,22 @@
"@types/node": "*" "@types/node": "*"
} }
}, },
"node_modules/@types/swagger-jsdoc": {
"version": "6.0.4",
"resolved": "https://registry.npmjs.org/@types/swagger-jsdoc/-/swagger-jsdoc-6.0.4.tgz",
"integrity": "sha512-W+Xw5epcOZrF/AooUM/PccNMSAFOKWZA5dasNyMujTwsBkU74njSJBpvCCJhHAJ95XRMzQrrW844Btu0uoetwQ==",
"dev": true
},
"node_modules/@types/swagger-ui-express": {
"version": "4.1.8",
"resolved": "https://registry.npmjs.org/@types/swagger-ui-express/-/swagger-ui-express-4.1.8.tgz",
"integrity": "sha512-AhZV8/EIreHFmBV5wAs0gzJUNq9JbbSXgJLQubCC0jtIo6prnI9MIRRxnU4MZX9RB9yXxF1V4R7jtLl/Wcj31g==",
"dev": true,
"dependencies": {
"@types/express": "*",
"@types/serve-static": "*"
}
},
"node_modules/@types/uuid": { "node_modules/@types/uuid": {
"version": "9.0.8", "version": "9.0.8",
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz", "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
@@ -1434,6 +1510,11 @@
"url": "https://github.com/sponsors/ljharb" "url": "https://github.com/sponsors/ljharb"
} }
}, },
"node_modules/call-me-maybe": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-me-maybe/-/call-me-maybe-1.0.2.tgz",
"integrity": "sha512-HpX65o1Hnr9HH25ojC1YGs7HCQLq0GCOibSaWER0eNpgJ/Z1MZv2mTc7+xh6WOPxbRVcmgbv4hGU+uSQ/2xFZQ=="
},
"node_modules/callsites": { "node_modules/callsites": {
"version": "3.1.0", "version": "3.1.0",
"resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
@@ -1594,6 +1675,14 @@
"node": ">= 0.8" "node": ">= 0.8"
} }
}, },
"node_modules/commander": {
"version": "6.2.0",
"resolved": "https://registry.npmjs.org/commander/-/commander-6.2.0.tgz",
"integrity": "sha512-zP4jEKbe8SHzKJYQmq8Y9gYjtO/POJLgIdKgV7B9qNmABVFVc+ctqSX6iXh4mCpJfRBOabiZ2YKPg8ciDw6C+Q==",
"engines": {
"node": ">= 6"
}
},
"node_modules/concat-map": { "node_modules/concat-map": {
"version": "0.0.1", "version": "0.0.1",
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
@@ -1863,6 +1952,17 @@
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1232444.tgz", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1232444.tgz",
"integrity": "sha512-pM27vqEfxSxRkTMnF+XCmxSEb6duO5R+t8A9DEEJgy4Wz2RVanje2mmj99B6A3zv2r/qGfYlOvYznUhuokizmg==" "integrity": "sha512-pM27vqEfxSxRkTMnF+XCmxSEb6duO5R+t8A9DEEJgy4Wz2RVanje2mmj99B6A3zv2r/qGfYlOvYznUhuokizmg=="
}, },
"node_modules/doctrine": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz",
"integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==",
"dependencies": {
"esutils": "^2.0.2"
},
"engines": {
"node": ">=6.0.0"
}
},
"node_modules/dom-serializer": { "node_modules/dom-serializer": {
"version": "2.0.0", "version": "2.0.0",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz",
@@ -3258,6 +3358,12 @@
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz", "resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
"integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==" "integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ=="
}, },
"node_modules/lodash.get": {
"version": "4.4.2",
"resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz",
"integrity": "sha512-z+Uw/vLuy6gQe8cfaFWD7p0wVv8fJl3mbzXh33RS+0oW2wvUqiRXiQ69gLWSLpgB5/6sU+r6BlQR0MBILadqTQ==",
"deprecated": "This package is deprecated. Use the optional chaining (?.) operator instead."
},
"node_modules/lodash.includes": { "node_modules/lodash.includes": {
"version": "4.3.0", "version": "4.3.0",
"resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz", "resolved": "https://registry.npmjs.org/lodash.includes/-/lodash.includes-4.3.0.tgz",
@@ -3273,6 +3379,12 @@
"resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz", "resolved": "https://registry.npmjs.org/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz",
"integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg==" "integrity": "sha512-Bz5mupy2SVbPHURB98VAcw+aHh4vRV5IPNhILUCsOzRmsTmSQ17jIuqopAentWoehktxGd9e/hbIXq980/1QJg=="
}, },
"node_modules/lodash.isequal": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/lodash.isequal/-/lodash.isequal-4.5.0.tgz",
"integrity": "sha512-pDo3lu8Jhfjqls6GkMgpahsF9kCyayhgykjyLMNFTKWrpVdAQtYyB4muAMWozBB4ig/dtWAmsMxLEI8wuz+DYQ==",
"deprecated": "This package is deprecated. Use require('node:util').isDeepStrictEqual instead."
},
"node_modules/lodash.isinteger": { "node_modules/lodash.isinteger": {
"version": "4.0.4", "version": "4.0.4",
"resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz", "resolved": "https://registry.npmjs.org/lodash.isinteger/-/lodash.isinteger-4.0.4.tgz",
@@ -3293,6 +3405,11 @@
"resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz", "resolved": "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz",
"integrity": "sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==" "integrity": "sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw=="
}, },
"node_modules/lodash.mergewith": {
"version": "4.6.2",
"resolved": "https://registry.npmjs.org/lodash.mergewith/-/lodash.mergewith-4.6.2.tgz",
"integrity": "sha512-GK3g5RPZWTRSeLSpgP8Xhra+pnjBC56q9FZYe1d5RN3TJ35dbkGy3YqBSMbyCrlbi+CM9Z3Jk5yTL7RCsqboyQ=="
},
"node_modules/lodash.once": { "node_modules/lodash.once": {
"version": "4.1.1", "version": "4.1.1",
"resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz", "resolved": "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz",
@@ -3748,6 +3865,12 @@
"wrappy": "1" "wrappy": "1"
} }
}, },
"node_modules/openapi-types": {
"version": "12.1.3",
"resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz",
"integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw==",
"peer": true
},
"node_modules/openid-client": { "node_modules/openid-client": {
"version": "6.8.1", "version": "6.8.1",
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz", "resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
@@ -5188,6 +5311,78 @@
} }
] ]
}, },
"node_modules/swagger-jsdoc": {
"version": "6.2.8",
"resolved": "https://registry.npmjs.org/swagger-jsdoc/-/swagger-jsdoc-6.2.8.tgz",
"integrity": "sha512-VPvil1+JRpmJ55CgAtn8DIcpBs0bL5L3q5bVQvF4tAW/k/9JYSj7dCpaYCAv5rufe0vcCbBRQXGvzpkWjvLklQ==",
"dependencies": {
"commander": "6.2.0",
"doctrine": "3.0.0",
"glob": "7.1.6",
"lodash.mergewith": "^4.6.2",
"swagger-parser": "^10.0.3",
"yaml": "2.0.0-1"
},
"bin": {
"swagger-jsdoc": "bin/swagger-jsdoc.js"
},
"engines": {
"node": ">=12.0.0"
}
},
"node_modules/swagger-jsdoc/node_modules/glob": {
"version": "7.1.6",
"resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz",
"integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==",
"deprecated": "Glob versions prior to v9 are no longer supported",
"dependencies": {
"fs.realpath": "^1.0.0",
"inflight": "^1.0.4",
"inherits": "2",
"minimatch": "^3.0.4",
"once": "^1.3.0",
"path-is-absolute": "^1.0.0"
},
"engines": {
"node": "*"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/swagger-parser": {
"version": "10.0.3",
"resolved": "https://registry.npmjs.org/swagger-parser/-/swagger-parser-10.0.3.tgz",
"integrity": "sha512-nF7oMeL4KypldrQhac8RyHerJeGPD1p2xDh900GPvc+Nk7nWP6jX2FcC7WmkinMoAmoO774+AFXcWsW8gMWEIg==",
"dependencies": {
"@apidevtools/swagger-parser": "10.0.3"
},
"engines": {
"node": ">=10"
}
},
"node_modules/swagger-ui-dist": {
"version": "5.31.0",
"resolved": "https://registry.npmjs.org/swagger-ui-dist/-/swagger-ui-dist-5.31.0.tgz",
"integrity": "sha512-zSUTIck02fSga6rc0RZP3b7J7wgHXwLea8ZjgLA3Vgnb8QeOl3Wou2/j5QkzSGeoz6HusP/coYuJl33aQxQZpg==",
"dependencies": {
"@scarf/scarf": "=1.4.0"
}
},
"node_modules/swagger-ui-express": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/swagger-ui-express/-/swagger-ui-express-5.0.1.tgz",
"integrity": "sha512-SrNU3RiBGTLLmFU8GIJdOdanJTl4TOmT27tt3bWWHppqYmAZ6IDuEuBvMU6nZq0zLEe6b/1rACXCgLZqO6ZfrA==",
"dependencies": {
"swagger-ui-dist": ">=5.0.0"
},
"engines": {
"node": ">= v0.10.32"
},
"peerDependencies": {
"express": ">=4.0.0 || >=5.0.0-beta"
}
},
"node_modules/tar": { "node_modules/tar": {
"version": "6.2.1", "version": "6.2.1",
"resolved": "https://registry.npmjs.org/tar/-/tar-6.2.1.tgz", "resolved": "https://registry.npmjs.org/tar/-/tar-6.2.1.tgz",
@@ -5406,6 +5601,14 @@
"uuid": "dist/bin/uuid" "uuid": "dist/bin/uuid"
} }
}, },
"node_modules/validator": {
"version": "13.15.23",
"resolved": "https://registry.npmjs.org/validator/-/validator-13.15.23.tgz",
"integrity": "sha512-4yoz1kEWqUjzi5zsPbAS/903QXSYp0UOtHsPpp7p9rHAw/W+dkInskAE386Fat3oKRROwO98d9ZB0G4cObgUyw==",
"engines": {
"node": ">= 0.10"
}
},
"node_modules/vary": { "node_modules/vary": {
"version": "1.1.2", "version": "1.1.2",
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
@@ -5584,6 +5787,14 @@
"resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
"integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
}, },
"node_modules/yaml": {
"version": "2.0.0-1",
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.0.0-1.tgz",
"integrity": "sha512-W7h5dEhywMKenDJh2iX/LABkbFnBxasD27oyXWDS/feDsxiw0dD5ncXdYXgkvAsXIY2MpW/ZKkr9IU30DBdMNQ==",
"engines": {
"node": ">= 6"
}
},
"node_modules/yargs": { "node_modules/yargs": {
"version": "17.7.2", "version": "17.7.2",
"resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
@@ -5618,6 +5829,34 @@
"fd-slicer": "~1.1.0" "fd-slicer": "~1.1.0"
} }
}, },
"node_modules/z-schema": {
"version": "5.0.5",
"resolved": "https://registry.npmjs.org/z-schema/-/z-schema-5.0.5.tgz",
"integrity": "sha512-D7eujBWkLa3p2sIpJA0d1pr7es+a7m0vFAnZLlCEKq/Ij2k0MLi9Br2UPxoxdYystm5K1yeBGzub0FlYUEWj2Q==",
"dependencies": {
"lodash.get": "^4.4.2",
"lodash.isequal": "^4.5.0",
"validator": "^13.7.0"
},
"bin": {
"z-schema": "bin/z-schema"
},
"engines": {
"node": ">=8.0.0"
},
"optionalDependencies": {
"commander": "^9.4.1"
}
},
"node_modules/z-schema/node_modules/commander": {
"version": "9.5.0",
"resolved": "https://registry.npmjs.org/commander/-/commander-9.5.0.tgz",
"integrity": "sha512-KRs7WVDKg86PWiuAqhDrAQnTXZKraVcCc6vFdL14qrZ/DcWwuRo7VoiYXalXO7S5GKpqYiVEwCbgFDfxNHKJBQ==",
"optional": true,
"engines": {
"node": "^12.20.0 || >=14"
}
},
"node_modules/zod": { "node_modules/zod": {
"version": "3.25.76", "version": "3.25.76",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",

View File

@@ -49,6 +49,8 @@
"puppeteer-extra-plugin-stealth": "^2.11.2", "puppeteer-extra-plugin-stealth": "^2.11.2",
"sharp": "^0.32.0", "sharp": "^0.32.0",
"socks-proxy-agent": "^8.0.2", "socks-proxy-agent": "^8.0.2",
"swagger-jsdoc": "^6.2.8",
"swagger-ui-express": "^5.0.1",
"user-agents": "^1.1.669", "user-agents": "^1.1.669",
"uuid": "^9.0.1", "uuid": "^9.0.1",
"zod": "^3.22.4" "zod": "^3.22.4"
@@ -61,6 +63,8 @@
"@types/node": "^20.10.5", "@types/node": "^20.10.5",
"@types/node-cron": "^3.0.11", "@types/node-cron": "^3.0.11",
"@types/pg": "^8.15.6", "@types/pg": "^8.15.6",
"@types/swagger-jsdoc": "^6.0.4",
"@types/swagger-ui-express": "^4.1.8",
"@types/uuid": "^9.0.7", "@types/uuid": "^9.0.7",
"tsx": "^4.7.0", "tsx": "^4.7.0",
"typescript": "^5.3.3" "typescript": "^5.3.3"

Binary file not shown.

View File

@@ -1 +1 @@
cannaiq-menus-1.6.0.zip cannaiq-menus-1.7.0.zip

View File

@@ -0,0 +1,130 @@
/**
* Count Jane stores - v2: Try Algolia store search
* Usage: npx ts-node scripts/count-jane-stores-v2.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
const STATES = [
'AZ', 'CA', 'CO', 'FL', 'IL', 'MA', 'MI', 'NV', 'NJ', 'NY', 'OH', 'PA', 'WA', 'OR'
];
async function main() {
console.log('Counting Jane stores by exploring state pages...\n');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
const allStores: Map<number, any> = new Map();
await page.setRequestInterception(true);
page.on('request', (req) => {
const type = req.resourceType();
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
req.abort();
} else {
req.continue();
}
});
page.on('response', async (response) => {
const url = response.url();
const contentType = response.headers()['content-type'] || '';
if (url.includes('iheartjane.com') && contentType.includes('json')) {
try {
const json = await response.json();
// Look for stores in any response
if (json.stores && Array.isArray(json.stores)) {
for (const s of json.stores) {
if (s.id) allStores.set(s.id, s);
}
}
// Also check hits (Algolia format)
if (json.hits && Array.isArray(json.hits)) {
for (const s of json.hits) {
if (s.id) allStores.set(s.id, s);
}
}
} catch {}
}
});
// First visit the main stores page
console.log('Visiting main stores page...');
await page.goto('https://www.iheartjane.com/stores', {
waitUntil: 'networkidle0',
timeout: 60000,
});
await new Promise(r => setTimeout(r, 3000));
// Try to scroll to load more stores
console.log('Scrolling to load more...');
for (let i = 0; i < 5; i++) {
await page.evaluate(() => window.scrollBy(0, 1000));
await new Promise(r => setTimeout(r, 1000));
}
// Try clicking "Load More" if it exists
try {
const loadMore = await page.$('button:has-text("Load More"), [class*="load-more"]');
if (loadMore) {
console.log('Clicking Load More...');
await loadMore.click();
await new Promise(r => setTimeout(r, 3000));
}
} catch {}
// Extract stores from DOM as fallback
const domStores = await page.evaluate(() => {
const storeElements = document.querySelectorAll('[data-store-id], [class*="StoreCard"], [class*="store-card"]');
return storeElements.length;
});
console.log(`\nStores from DOM elements: ${domStores}`);
await browser.close();
// Count by state
const byState: Record<string, number> = {};
for (const store of allStores.values()) {
const state = store.state || 'Unknown';
byState[state] = (byState[state] || 0) + 1;
}
console.log('\n=== JANE STORE COUNTS ===\n');
console.log(`Unique stores captured: ${allStores.size}`);
if (allStores.size > 0) {
console.log('\nBy State:');
const sorted = Object.entries(byState).sort((a, b) => b[1] - a[1]);
for (const [state, count] of sorted.slice(0, 20)) {
console.log(` ${state}: ${count}`);
}
// Check Arizona specifically
const azStores = Array.from(allStores.values()).filter(s =>
s.state === 'Arizona' || s.state === 'AZ'
);
console.log(`\nArizona stores: ${azStores.length}`);
if (azStores.length > 0) {
console.log('AZ stores:');
for (const s of azStores.slice(0, 10)) {
console.log(` - ${s.name} (ID: ${s.id}) - ${s.city}`);
}
}
}
// Note about total
console.log('\n--- Note ---');
console.log('Jane uses server-side rendering. To get full store count,');
console.log('you may need to check their public marketing materials or');
console.log('iterate through known store IDs.');
}
main().catch(console.error);

View File

@@ -0,0 +1,98 @@
/**
* Count Jane stores by state
* Usage: npx ts-node scripts/count-jane-stores.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
console.log('Counting Jane stores...\n');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
// Capture store data from API
const stores: any[] = [];
await page.setRequestInterception(true);
page.on('request', (req) => {
const type = req.resourceType();
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
req.abort();
} else {
req.continue();
}
});
page.on('response', async (response) => {
const url = response.url();
if (url.includes('iheartjane.com') && url.includes('stores')) {
try {
const json = await response.json();
if (json.stores && Array.isArray(json.stores)) {
stores.push(...json.stores);
}
} catch {}
}
});
// Visit the store directory
console.log('Loading Jane store directory...');
await page.goto('https://www.iheartjane.com/stores', {
waitUntil: 'networkidle2',
timeout: 60000,
});
// Wait for stores to load
await new Promise(r => setTimeout(r, 5000));
// Also try to get store count from page content
const pageStoreCount = await page.evaluate(() => {
// Look for store count in page text
const text = document.body.innerText;
const match = text.match(/(\d+)\s*stores?/i);
return match ? parseInt(match[1]) : null;
});
await browser.close();
// Count by state
const byState: Record<string, number> = {};
for (const store of stores) {
const state = store.state || 'Unknown';
byState[state] = (byState[state] || 0) + 1;
}
console.log('\n=== JANE STORE COUNTS ===\n');
console.log(`Total stores captured from API: ${stores.length}`);
if (pageStoreCount) {
console.log(`Page claims: ${pageStoreCount} stores`);
}
console.log('\nBy State:');
const sorted = Object.entries(byState).sort((a, b) => b[1] - a[1]);
for (const [state, count] of sorted) {
console.log(` ${state}: ${count}`);
}
// Check Arizona specifically
const azStores = stores.filter(s =>
s.state === 'Arizona' || s.state === 'AZ'
);
console.log(`\nArizona stores: ${azStores.length}`);
if (azStores.length > 0) {
console.log('Sample AZ stores:');
for (const s of azStores.slice(0, 5)) {
console.log(` - ${s.name} (ID: ${s.id}) - ${s.city}`);
}
}
}
main().catch(console.error);

View File

@@ -0,0 +1,184 @@
/**
* Explore all Treez page URLs to find the full product catalog
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log(' Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function countProducts(page: Page): Promise<number> {
return page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
}
async function scrollAndCount(page: Page, maxScrolls: number = 30): Promise<{ products: number; scrolls: number }> {
let previousHeight = 0;
let scrollCount = 0;
let sameHeightCount = 0;
while (scrollCount < maxScrolls) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
sameHeightCount++;
if (sameHeightCount >= 3) break;
} else {
sameHeightCount = 0;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
previousHeight = currentHeight;
scrollCount++;
}
const products = await countProducts(page);
return { products, scrolls: scrollCount };
}
async function testUrl(page: Page, path: string): Promise<{ products: number; scrolls: number; error?: string }> {
const url = `https://${STORE_ID}.treez.io${path}`;
console.log(`\nTesting: ${url}`);
try {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
await bypassAgeGate(page);
await sleep(1000);
const initialCount = await countProducts(page);
console.log(` Initial products: ${initialCount}`);
if (initialCount > 0) {
const result = await scrollAndCount(page);
console.log(` After scroll: ${result.products} products (${result.scrolls} scrolls)`);
return result;
}
// Check for brand/category cards instead
const cardCount = await page.evaluate(() => {
const selectors = [
'[class*="brand"]',
'[class*="Brand"]',
'[class*="category"]',
'[class*="Category"]',
'[class*="card"]',
'a[href*="/brand/"]',
'a[href*="/category/"]',
];
let count = 0;
selectors.forEach(sel => {
count += document.querySelectorAll(sel).length;
});
return count;
});
console.log(` Cards/links found: ${cardCount}`);
return { products: initialCount, scrolls: 0 };
} catch (error: any) {
console.log(` Error: ${error.message}`);
return { products: 0, scrolls: 0, error: error.message };
}
}
async function main() {
console.log('='.repeat(60));
console.log('Exploring Treez Page URLs');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Block images to speed up
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media', 'stylesheet'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
const urlsToTest = [
'/onlinemenu/?customerType=ADULT', // Homepage
'/onlinemenu/brands?customerType=ADULT', // Brands page
'/onlinemenu/shop?customerType=ADULT', // Shop page?
'/onlinemenu/products?customerType=ADULT', // Products page?
'/onlinemenu/menu?customerType=ADULT', // Menu page?
'/onlinemenu/all?customerType=ADULT', // All products?
'/onlinemenu/flower?customerType=ADULT', // Flower category
'/onlinemenu/vapes?customerType=ADULT', // Vapes category
'/onlinemenu/edibles?customerType=ADULT', // Edibles category
'/onlinemenu/concentrates?customerType=ADULT', // Concentrates category
];
const results: { path: string; products: number; scrolls: number }[] = [];
for (const path of urlsToTest) {
const result = await testUrl(page, path);
results.push({ path, ...result });
}
// Look for navigation links on the main page
console.log('\n' + '='.repeat(60));
console.log('Checking navigation structure on homepage...');
console.log('='.repeat(60));
await page.goto(`https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`, {
waitUntil: 'networkidle2',
timeout: 30000,
});
await sleep(2000);
await bypassAgeGate(page);
await sleep(1000);
const navLinks = await page.evaluate(() => {
const links: { text: string; href: string }[] = [];
document.querySelectorAll('a[href*="/onlinemenu/"]').forEach(el => {
const text = el.textContent?.trim() || '';
const href = el.getAttribute('href') || '';
if (text && !links.some(l => l.href === href)) {
links.push({ text: text.slice(0, 50), href });
}
});
return links;
});
console.log('\nNavigation links found:');
navLinks.forEach(l => console.log(` "${l.text}" → ${l.href}`));
// Summary
console.log('\n' + '='.repeat(60));
console.log('Summary');
console.log('='.repeat(60));
results.sort((a, b) => b.products - a.products);
results.forEach(r => {
console.log(`${r.products.toString().padStart(4)} products | ${r.path}`);
});
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,247 @@
/**
* Explore Treez site structure to find full product catalog
*
* Usage: npx ts-node scripts/explore-treez-structure.ts
*/
import puppeteer from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
console.log('='.repeat(60));
console.log('Exploring Treez Site Structure');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
try {
// Navigate to base menu URL
const baseUrl = `https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`;
console.log(`\n[1] Navigating to: ${baseUrl}`);
await page.goto(baseUrl, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
// Bypass age gate if present
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('[1] Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Get all navigation links
console.log('\n[2] Extracting navigation structure...');
const navInfo = await page.evaluate(() => {
const links: { text: string; href: string }[] = [];
// Look for nav links
document.querySelectorAll('nav a, [class*="nav"] a, [class*="menu"] a, header a').forEach(el => {
const text = el.textContent?.trim() || '';
const href = el.getAttribute('href') || '';
if (text && href && !links.some(l => l.href === href)) {
links.push({ text, href });
}
});
// Look for category tabs/buttons
document.querySelectorAll('[class*="category"], [class*="tab"], [role="tab"]').forEach(el => {
const text = el.textContent?.trim() || '';
const href = el.getAttribute('href') || el.getAttribute('data-href') || '';
if (text && !links.some(l => l.text === text)) {
links.push({ text, href: href || `(click: ${el.className})` });
}
});
// Get current URL
const currentUrl = window.location.href;
// Count products on page
const productCount = document.querySelectorAll('[class*="product_product__"]').length;
return { links, currentUrl, productCount };
});
console.log(`Current URL: ${navInfo.currentUrl}`);
console.log(`Products on homepage: ${navInfo.productCount}`);
console.log('\nNavigation links found:');
navInfo.links.forEach(l => {
console.log(` "${l.text}" → ${l.href}`);
});
// Look for category buttons/tabs specifically
console.log('\n[3] Looking for category navigation...');
const categories = await page.evaluate(() => {
const cats: { text: string; className: string; tagName: string }[] = [];
// Find all clickable elements that might be categories
const selectors = [
'[class*="CategoryNav"]',
'[class*="category"]',
'[class*="Category"]',
'[class*="nav"] button',
'[class*="tab"]',
'[role="tablist"] *',
'.MuiTab-root',
'[class*="filter"]',
];
selectors.forEach(sel => {
document.querySelectorAll(sel).forEach(el => {
const text = el.textContent?.trim() || '';
if (text && text.length < 50 && !cats.some(c => c.text === text)) {
cats.push({
text,
className: el.className?.toString().slice(0, 80) || '',
tagName: el.tagName,
});
}
});
});
return cats;
});
console.log('Category-like elements:');
categories.forEach(c => {
console.log(` [${c.tagName}] "${c.text}" (class: ${c.className})`);
});
// Try clicking on "Flower" or "All" if found
console.log('\n[4] Looking for "Flower" or "All Products" link...');
const clickTargets = ['Flower', 'All', 'All Products', 'Shop All', 'View All'];
for (const target of clickTargets) {
const element = await page.evaluate((targetText) => {
const els = Array.from(document.querySelectorAll('a, button, [role="tab"], [class*="category"]'));
const match = els.find(el =>
el.textContent?.trim().toLowerCase() === targetText.toLowerCase()
);
if (match) {
return {
found: true,
text: match.textContent?.trim(),
tag: match.tagName,
};
}
return { found: false };
}, target);
if (element.found) {
console.log(`Found "${element.text}" (${element.tag}), clicking...`);
await page.evaluate((targetText) => {
const els = Array.from(document.querySelectorAll('a, button, [role="tab"], [class*="category"]'));
const match = els.find(el =>
el.textContent?.trim().toLowerCase() === targetText.toLowerCase()
);
if (match) (match as HTMLElement).click();
}, target);
await sleep(3000);
const newUrl = page.url();
const newCount = await page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
console.log(` New URL: ${newUrl}`);
console.log(` Products after click: ${newCount}`);
if (newCount > navInfo.productCount) {
console.log(` ✓ Found more products! (${navInfo.productCount}${newCount})`);
}
break;
}
}
// Check page height and scroll behavior
console.log('\n[5] Checking scroll behavior on current page...');
let previousHeight = 0;
let scrollCount = 0;
let previousProductCount = await page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
while (scrollCount < 10) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
console.log(` Scroll ${scrollCount + 1}: No height change, stopping`);
break;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
const currentProductCount = await page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
console.log(` Scroll ${scrollCount + 1}: height=${currentHeight}, products=${currentProductCount}`);
if (currentProductCount === previousProductCount && scrollCount > 2) {
console.log(' No new products loading, stopping');
break;
}
previousHeight = currentHeight;
previousProductCount = currentProductCount;
scrollCount++;
}
// Try direct URL patterns
console.log('\n[6] Testing URL patterns...');
const urlPatterns = [
'/onlinemenu/flower?customerType=ADULT',
'/onlinemenu/all?customerType=ADULT',
'/onlinemenu?category=flower&customerType=ADULT',
'/onlinemenu?view=all&customerType=ADULT',
];
for (const pattern of urlPatterns) {
const testUrl = `https://${STORE_ID}.treez.io${pattern}`;
console.log(`\nTrying: ${testUrl}`);
await page.goto(testUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
// Bypass age gate again if needed
const gate = await page.$('[data-testid="age-gate-modal"]');
if (gate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
const productCount = await page.evaluate(() =>
document.querySelectorAll('[class*="product_product__"]').length
);
console.log(` Products found: ${productCount}`);
}
// Screenshot the final state
await page.screenshot({ path: '/tmp/treez-explore.png', fullPage: true });
console.log('\n[7] Screenshot saved to /tmp/treez-explore.png');
} catch (error: any) {
console.error('Error:', error.message);
} finally {
await browser.close();
}
}
main().catch(console.error);

View File

@@ -0,0 +1,138 @@
/**
* Run Jane product discovery for stores in database
* Usage: npx ts-node scripts/run-jane-product-discovery.ts [DISPENSARY_ID]
* Example: npx ts-node scripts/run-jane-product-discovery.ts 4220
* Or run for all Jane stores: npx ts-node scripts/run-jane-product-discovery.ts all
*/
import { Pool } from 'pg';
import { fetchProductsByStoreIdDirect } from '../src/platforms/jane';
import { saveRawPayload } from '../src/utils/payload-storage';
async function main() {
const arg = process.argv[2];
console.log('='.repeat(60));
console.log('Jane Product Discovery');
console.log('='.repeat(60));
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
});
try {
// Get dispensaries to process
let dispensaries: any[];
if (arg === 'all') {
const result = await pool.query(
`SELECT id, name, menu_url, platform_dispensary_id
FROM dispensaries
WHERE platform = 'jane' AND menu_url IS NOT NULL
ORDER BY id`
);
dispensaries = result.rows;
} else if (arg) {
const result = await pool.query(
`SELECT id, name, menu_url, platform_dispensary_id
FROM dispensaries
WHERE id = $1`,
[parseInt(arg)]
);
dispensaries = result.rows;
} else {
// Default: get first Jane store
const result = await pool.query(
`SELECT id, name, menu_url, platform_dispensary_id
FROM dispensaries
WHERE platform = 'jane' AND menu_url IS NOT NULL
ORDER BY id LIMIT 1`
);
dispensaries = result.rows;
}
if (dispensaries.length === 0) {
console.log('No Jane dispensaries found');
return;
}
console.log(`Processing ${dispensaries.length} dispensary(ies)...\n`);
let successCount = 0;
let failCount = 0;
for (const disp of dispensaries) {
console.log(`\n${'─'.repeat(60)}`);
console.log(`${disp.name} (ID: ${disp.id}, Jane ID: ${disp.platform_dispensary_id})`);
console.log('─'.repeat(60));
try {
const result = await fetchProductsByStoreIdDirect(disp.platform_dispensary_id);
if (result.products.length === 0) {
console.log(' ✗ No products captured');
failCount++;
continue;
}
console.log(` ✓ Captured ${result.products.length} products`);
// Build payload
const rawPayload = {
hits: result.products.map(p => p.raw),
store: result.store?.raw || null,
capturedAt: new Date().toISOString(),
platform: 'jane',
dispensaryId: disp.id,
storeId: disp.platform_dispensary_id,
};
// Save payload
const { id: payloadId, sizeBytes } = await saveRawPayload(
pool,
disp.id,
rawPayload,
null,
result.products.length,
'jane'
);
console.log(` ✓ Saved payload ${payloadId} (${Math.round(sizeBytes / 1024)}KB)`);
// Update dispensary
await pool.query(
`UPDATE dispensaries
SET stage = 'hydrating',
last_fetch_at = NOW(),
product_count = $2,
consecutive_successes = consecutive_successes + 1,
consecutive_failures = 0,
updated_at = NOW()
WHERE id = $1`,
[disp.id, result.products.length]
);
console.log(` ✓ Updated dispensary (product_count: ${result.products.length})`);
successCount++;
} catch (error: any) {
console.log(` ✗ Error: ${error.message}`);
failCount++;
}
}
console.log('\n' + '='.repeat(60));
console.log('RESULTS');
console.log('='.repeat(60));
console.log(`Success: ${successCount}`);
console.log(`Failed: ${failCount}`);
} catch (error: any) {
console.error('Error:', error.message);
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -0,0 +1,137 @@
/**
* Run Jane store discovery and insert into database
* Usage: npx ts-node scripts/run-jane-store-discovery.ts [STATE_CODE]
* Example: npx ts-node scripts/run-jane-store-discovery.ts AZ
*/
import { Pool } from 'pg';
import { discoverStoresByState } from '../src/platforms/jane';
/**
* Generate slug from store name
* e.g., "Hana Meds - Phoenix (REC)" -> "hana-meds-phoenix-rec"
*/
function generateSlug(name: string): string {
return name
.toLowerCase()
.replace(/[()]/g, '') // Remove parentheses
.replace(/[^a-z0-9\s-]/g, '') // Remove special chars
.replace(/\s+/g, '-') // Spaces to hyphens
.replace(/-+/g, '-') // Collapse multiple hyphens
.replace(/^-|-$/g, ''); // Trim hyphens
}
async function main() {
const stateCode = process.argv[2] || 'AZ';
console.log('='.repeat(60));
console.log(`Jane Store Discovery - ${stateCode}`);
console.log('='.repeat(60));
// Connect to database
const pool = new Pool({
connectionString: process.env.DATABASE_URL,
});
try {
// Test connection
const testResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']);
console.log(`Current Jane stores in DB: ${testResult.rows[0].count}`);
// Discover stores
console.log(`\nDiscovering Jane stores in ${stateCode}...`);
const stores = await discoverStoresByState(stateCode);
if (stores.length === 0) {
console.log(`No stores found in ${stateCode}`);
return;
}
console.log(`\nFound ${stores.length} stores. Inserting into database...`);
// Insert stores
let inserted = 0;
let updated = 0;
const newIds: number[] = [];
for (const store of stores) {
const menuUrl = `https://www.iheartjane.com/stores/${store.storeId}/${store.urlSlug || 'menu'}`;
const slug = generateSlug(store.name);
try {
const result = await pool.query(
`INSERT INTO dispensaries (
name, slug, address1, city, state, zipcode,
latitude, longitude, menu_url, menu_type, platform,
platform_dispensary_id, is_medical, is_recreational,
stage, created_at, updated_at
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, NOW(), NOW())
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
DO UPDATE SET
name = EXCLUDED.name,
slug = EXCLUDED.slug,
address1 = EXCLUDED.address1,
city = EXCLUDED.city,
latitude = EXCLUDED.latitude,
longitude = EXCLUDED.longitude,
menu_url = EXCLUDED.menu_url,
is_medical = EXCLUDED.is_medical,
is_recreational = EXCLUDED.is_recreational,
updated_at = NOW()
RETURNING id, (xmax = 0) AS is_new`,
[
store.name,
slug,
store.address,
store.city,
stateCode,
store.zip,
store.lat,
store.long,
menuUrl,
'embedded', // menu_type: how it's displayed
'jane', // platform: who provides the menu
store.storeId,
store.medical,
store.recreational,
'discovered',
]
);
if (result.rows.length > 0) {
const { id, is_new } = result.rows[0];
if (is_new) {
inserted++;
newIds.push(id);
console.log(` + Inserted: ${store.name} (DB ID: ${id}, Jane ID: ${store.storeId})`);
} else {
updated++;
console.log(` ~ Updated: ${store.name} (DB ID: ${id})`);
}
}
} catch (error: any) {
console.error(` ! Error inserting ${store.name}: ${error.message}`);
}
}
console.log('\n' + '='.repeat(60));
console.log('RESULTS');
console.log('='.repeat(60));
console.log(`Stores discovered: ${stores.length}`);
console.log(`New stores inserted: ${inserted}`);
console.log(`Existing stores updated: ${updated}`);
console.log(`New dispensary IDs: ${newIds.join(', ') || '(none)'}`);
// Show final count
const finalResult = await pool.query('SELECT COUNT(*) FROM dispensaries WHERE platform = $1', ['jane']);
console.log(`\nTotal Jane stores in DB: ${finalResult.rows[0].count}`);
} catch (error: any) {
console.error('Error:', error.message);
process.exit(1);
} finally {
await pool.end();
}
}
main();

View File

@@ -0,0 +1,179 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
console.log('Loading ALL brands from https://shop.bestdispensary.com/brands');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
console.log('Current URL:', page.url());
// Get initial brand count
let brandCount = await page.evaluate(() => {
const seen = new Set<string>();
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
const href = a.getAttribute('href');
if (href) seen.add(href);
});
return seen.size;
});
console.log(`Initial brand count: ${brandCount}`);
// Aggressive scrolling
console.log('\nScrolling to load ALL brands...');
let previousCount = 0;
let sameCount = 0;
for (let i = 0; i < 50; i++) {
// Scroll to bottom
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1000);
brandCount = await page.evaluate(() => {
const seen = new Set<string>();
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
const href = a.getAttribute('href');
if (href) seen.add(href);
});
return seen.size;
});
if (brandCount === previousCount) {
sameCount++;
if (sameCount >= 5) {
console.log(` Scroll ${i+1}: ${brandCount} brands (stopping - no change)`);
break;
}
} else {
sameCount = 0;
console.log(` Scroll ${i+1}: ${brandCount} brands`);
}
previousCount = brandCount;
}
// Get all unique brands
const brands = await page.evaluate(() => {
const results: { name: string; href: string }[] = [];
const seen = new Set<string>();
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
const href = a.getAttribute('href') || '';
const normalizedHref = href.toLowerCase();
if (seen.has(normalizedHref)) return;
seen.add(normalizedHref);
// Get brand name
let name = '';
const heading = a.querySelector('h3, h4, h5, [class*="name"]');
if (heading) {
name = heading.textContent?.trim() || '';
}
if (!name) {
name = a.textContent?.trim().split('\n')[0] || '';
}
if (!name) {
name = href.split('/brand/')[1]?.replace(/-/g, ' ') || '';
}
results.push({ name: name.slice(0, 50), href });
});
return results.sort((a, b) => a.name.localeCompare(b.name));
});
console.log('\n' + '='.repeat(60));
console.log('TOTAL BRANDS FOUND: ' + brands.length);
console.log('='.repeat(60));
brands.forEach((b, i) => {
const num = (i + 1).toString().padStart(3, ' ');
console.log(`${num}. ${b.name} (${b.href})`);
});
// Now visit each brand page and count products
console.log('\n' + '='.repeat(60));
console.log('PRODUCTS PER BRAND');
console.log('='.repeat(60));
const brandProducts: { brand: string; products: number }[] = [];
for (let i = 0; i < brands.length; i++) {
const brand = brands[i];
try {
const brandUrl = brand.href.startsWith('http')
? brand.href
: `https://shop.bestdispensary.com${brand.href}`;
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(1500);
// Scroll to load products
for (let j = 0; j < 10; j++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(800);
}
const productCount = await page.evaluate(() => {
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
const img = a.querySelector('img');
const name = img?.getAttribute('alt') || a.textContent?.trim() || '';
if (name) seen.add(name);
});
return seen.size;
});
brandProducts.push({ brand: brand.name, products: productCount });
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ${productCount} products`);
} catch (err: any) {
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ERROR - ${err.message?.slice(0, 30)}`);
brandProducts.push({ brand: brand.name, products: 0 });
}
}
// Summary
const totalProducts = brandProducts.reduce((sum, b) => sum + b.products, 0);
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
console.log(`Total brands: ${brands.length}`);
console.log(`Total products: ${totalProducts}`);
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,92 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
console.log('Navigating to https://shop.bestdispensary.com/brands');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
// Go directly to the brands page
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate if present
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
console.log('Current URL:', page.url());
// Scroll to load all content
console.log('\nScrolling to load all brands...');
let previousHeight = 0;
for (let i = 0; i < 20; i++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
console.log(` Scroll ${i+1}: No new content`);
break;
}
previousHeight = currentHeight;
const brandCount = await page.evaluate(() =>
document.querySelectorAll('a[href*="/brand/"]').length
);
console.log(` Scroll ${i+1}: height=${currentHeight}, brand links=${brandCount}`);
}
// Get all brand links
const brands = await page.evaluate(() => {
const results: { name: string; href: string }[] = [];
const seen = new Set<string>();
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
const href = a.getAttribute('href') || '';
if (seen.has(href)) return;
seen.add(href);
const name = a.textContent?.trim() || href.split('/brand/')[1] || '';
results.push({ name, href });
});
return results;
});
console.log(`\nFound ${brands.length} brands:`);
brands.forEach(b => console.log(` - ${b.name} (${b.href})`));
// Take screenshot
await page.screenshot({ path: '/tmp/bestdispensary-brands.png', fullPage: true });
console.log('\nScreenshot saved to /tmp/bestdispensary-brands.png');
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,108 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Check Load More button
const btnInfo = await page.evaluate(() => {
const btn = document.querySelector('button.collection__load-more');
if (!btn) return { found: false };
const rect = btn.getBoundingClientRect();
return {
found: true,
text: btn.textContent?.trim(),
visible: rect.width > 0 && rect.height > 0,
top: rect.top,
disabled: (btn as HTMLButtonElement).disabled,
class: btn.className,
};
});
console.log('Load More button:', btnInfo);
// Scroll to button and click
console.log('\nScrolling to button and clicking...');
for (let i = 0; i < 10; i++) {
const btn = await page.$('button.collection__load-more');
if (!btn) {
console.log('Button not found');
break;
}
// Scroll button into view
await page.evaluate((b) => b.scrollIntoView({ behavior: 'smooth', block: 'center' }), btn);
await sleep(500);
// Check if button is still there and clickable
const stillThere = await page.evaluate(() => {
const b = document.querySelector('button.collection__load-more');
return b ? b.textContent?.trim() : null;
});
if (!stillThere) {
console.log('Button disappeared - all loaded');
break;
}
// Click it
await btn.click();
console.log(`Click ${i+1}...`);
await sleep(2000);
const count = await page.evaluate(() =>
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').length
);
console.log(` Brands: ${count}`);
}
// Final count
const brands = await page.evaluate(() => {
const list: string[] = [];
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
list.push(a.textContent?.trim() || '');
});
return list;
});
console.log(`\nTotal brands: ${brands.length}`);
console.log(brands.join(', '));
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,157 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Bypassing age gate...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Click "LOAD MORE" until all brands are loaded
console.log('Loading all brands...\n');
let loadMoreClicks = 0;
while (true) {
const loadMoreBtn = await page.$('button.collection__load-more');
if (!loadMoreBtn) {
console.log('No more "Load More" button - all brands loaded!');
break;
}
const isVisible = await page.evaluate((btn) => {
const rect = btn.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
}, loadMoreBtn);
if (!isVisible) {
console.log('Load More button not visible - all brands loaded!');
break;
}
await loadMoreBtn.click();
loadMoreClicks++;
await sleep(1500);
const brandCount = await page.evaluate(() =>
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').length
);
console.log(` Click ${loadMoreClicks}: ${brandCount} brands loaded`);
if (loadMoreClicks > 20) break; // Safety limit
}
// Get all brands
const brands = await page.evaluate(() => {
const results: { name: string; href: string }[] = [];
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
const href = a.getAttribute('href') || '';
const name = a.textContent?.trim() || '';
if (name && href) {
results.push({ name, href });
}
});
return results;
});
console.log('\n' + '='.repeat(60));
console.log(`TOTAL BRANDS: ${brands.length}`);
console.log('='.repeat(60));
// Visit each brand and count products
console.log('\nCounting products per brand...\n');
const results: { brand: string; products: number }[] = [];
for (let i = 0; i < brands.length; i++) {
const brand = brands[i];
const brandUrl = `https://shop.bestdispensary.com${brand.href}`;
try {
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(1000);
// Click load more on brand page too
for (let j = 0; j < 10; j++) {
const loadMore = await page.$('button.collection__load-more');
if (!loadMore) break;
const isVisible = await page.evaluate((btn) => {
const rect = btn.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
}, loadMore);
if (!isVisible) break;
await loadMore.click();
await sleep(1000);
}
const productCount = await page.evaluate(() => {
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
const href = a.getAttribute('href');
if (href) seen.add(href);
});
return seen.size;
});
results.push({ brand: brand.name, products: productCount });
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ${productCount} products`);
} catch (err: any) {
console.log(`${(i+1).toString().padStart(3)}. ${brand.name}: ERROR`);
results.push({ brand: brand.name, products: 0 });
}
}
// Summary
const totalProducts = results.reduce((sum, r) => sum + r.products, 0);
const brandsWithProducts = results.filter(r => r.products > 0).length;
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
console.log(`Total brands: ${brands.length}`);
console.log(`Brands with products: ${brandsWithProducts}`);
console.log(`Total products: ${totalProducts}`);
// Top brands by product count
console.log('\nTop 20 brands by product count:');
results
.sort((a, b) => b.products - a.products)
.slice(0, 20)
.forEach((r, i) => console.log(` ${i+1}. ${r.brand}: ${r.products}`));
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,108 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Try clicking Load More multiple times with JS
console.log('Loading all brands...');
for (let i = 0; i < 15; i++) {
const clicked = await page.evaluate(() => {
const btn = document.querySelector('button.collection__load-more') as HTMLButtonElement;
if (btn) { btn.click(); return true; }
return false;
});
if (!clicked) break;
await sleep(2000);
}
// Get all brands
const brands = await page.evaluate(() => {
const list: { name: string; href: string }[] = [];
document.querySelectorAll('.brands-page__list a[href*="/brand/"]').forEach((a: Element) => {
list.push({
name: a.textContent?.trim() || '',
href: a.getAttribute('href') || '',
});
});
return list;
});
console.log('Total brands found: ' + brands.length + '\n');
console.log('PRODUCTS PER BRAND');
console.log('==================\n');
const results: { brand: string; products: number }[] = [];
for (let i = 0; i < brands.length; i++) {
const brand = brands[i];
const url = 'https://shop.bestdispensary.com' + brand.href;
try {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(1000);
// Click load more on brand page
for (let j = 0; j < 20; j++) {
const clicked = await page.evaluate(() => {
const btn = document.querySelector('button.collection__load-more') as HTMLButtonElement;
if (btn) { btn.click(); return true; }
return false;
});
if (!clicked) break;
await sleep(1000);
}
const productCount = await page.evaluate(() => {
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
const href = a.getAttribute('href');
if (href) seen.add(href);
});
return seen.size;
});
results.push({ brand: brand.name, products: productCount });
const num = (i + 1).toString().padStart(2, ' ');
console.log(num + '. ' + brand.name + ': ' + productCount);
} catch (err) {
results.push({ brand: brand.name, products: 0 });
const num = (i + 1).toString().padStart(2, ' ');
console.log(num + '. ' + brand.name + ': ERROR');
}
}
// Summary
const total = results.reduce((s, r) => s + r.products, 0);
console.log('\n==================');
console.log('TOTAL: ' + brands.length + ' brands, ' + total + ' products');
console.log('==================');
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,130 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Use the selector hint: /html/body/main/section
console.log('Looking at main > section structure...\n');
const sectionInfo = await page.evaluate(() => {
const main = document.querySelector('main');
if (!main) return { error: 'No main element' };
const sections = main.querySelectorAll('section');
const results: any[] = [];
sections.forEach((section, i) => {
const children = section.children;
const childInfo: string[] = [];
for (let j = 0; j < Math.min(children.length, 10); j++) {
const child = children[j];
childInfo.push(child.tagName + '.' + (child.className?.slice(0, 30) || ''));
}
results.push({
index: i,
class: section.className?.slice(0, 50),
childCount: children.length,
sampleChildren: childInfo,
});
});
return results;
});
console.log('Sections in main:');
console.log(JSON.stringify(sectionInfo, null, 2));
// Look for brand cards within the section
console.log('\nLooking for brand cards in main > section...');
const brandCards = await page.evaluate(() => {
const section = document.querySelector('main > section');
if (!section) return [];
// Get all child elements that might be brand cards
const cards: { tag: string; text: string; href: string }[] = [];
section.querySelectorAll('a').forEach((a: Element) => {
const href = a.getAttribute('href') || '';
const text = a.textContent?.trim().slice(0, 50) || '';
cards.push({ tag: 'a', text, href });
});
return cards;
});
console.log(`Found ${brandCards.length} links in section:`);
brandCards.slice(0, 30).forEach(c => console.log(` ${c.text} -> ${c.href}`));
// Get the grid of brand cards
console.log('\nLooking for grid container...');
const gridCards = await page.evaluate(() => {
// Look for grid-like containers
const grids = document.querySelectorAll('[class*="grid"], [class*="Grid"], main section > div');
const results: any[] = [];
grids.forEach((grid, i) => {
const links = grid.querySelectorAll('a[href*="/brand/"]');
if (links.length > 5) {
const brands: string[] = [];
links.forEach((a: Element) => {
const text = a.textContent?.trim().split('\n')[0] || '';
if (text && !brands.includes(text)) brands.push(text);
});
results.push({
class: grid.className?.slice(0, 40),
brandCount: brands.length,
brands: brands.slice(0, 50),
});
}
});
return results;
});
console.log('Grid containers with brands:');
gridCards.forEach(g => {
console.log(`\n[${g.brandCount} brands] class="${g.class}"`);
g.brands.forEach((b: string, i: number) => console.log(` ${i+1}. ${b}`));
});
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,188 @@
/**
* One-off script to test iHeartJane scraping
* Mimics remote worker: Puppeteer + stealth + proxy
*
* Usage: npx ts-node scripts/test-iheartjane.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
const TARGET_URL = 'https://theflowershopusa.com/mesa/menu/';
const STORE_ID = 2788;
async function main() {
console.log('[iHeartJane Test] Starting...');
// No proxy for local testing
const browser = await puppeteer.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Intercept network requests to capture API calls
const apiResponses: any[] = [];
await page.setRequestInterception(true);
page.on('request', (req) => {
// Block heavy resources
const type = req.resourceType();
if (['image', 'font', 'media', 'stylesheet'].includes(type)) {
req.abort();
} else {
req.continue();
}
});
page.on('response', async (response) => {
const url = response.url();
const contentType = response.headers()['content-type'] || '';
// Capture any JSON response from iheartjane domains
if ((url.includes('iheartjane.com') || url.includes('algolia')) && contentType.includes('json')) {
try {
const json = await response.json();
const type = url.includes('store') ? 'STORE' :
url.includes('product') ? 'PRODUCT' :
url.includes('algolia') ? 'ALGOLIA' : 'API';
apiResponses.push({ type, url, data: json });
console.log(`[${type}] ${url.substring(0, 120)}...`);
} catch {
// Not JSON
}
}
});
console.log(`[iHeartJane Test] Navigating to ${TARGET_URL}`);
try {
await page.goto(TARGET_URL, {
waitUntil: 'networkidle2',
timeout: 60000,
});
console.log('[iHeartJane Test] Menu page loaded, waiting for data...');
// Wait a bit for all API calls to complete
await new Promise(r => setTimeout(r, 3000));
// Also try to get store info by visiting the store page
console.log('[iHeartJane Test] Fetching store info...');
const storeInfoUrl = `https://api.iheartjane.com/v1/stores/${STORE_ID}`;
// Try to fetch store info via page.evaluate (uses browser context)
const storeInfo = await page.evaluate(async (storeId) => {
try {
const resp = await fetch(`https://api.iheartjane.com/v1/stores/${storeId}`);
if (resp.ok) return await resp.json();
return { error: resp.status };
} catch (e: any) {
return { error: e.message };
}
}, STORE_ID);
if (storeInfo && !storeInfo.error) {
apiResponses.push({ type: 'STORE_DIRECT', url: storeInfoUrl, data: storeInfo });
console.log('[STORE_DIRECT] Got store info via fetch');
} else {
console.log(`[STORE_DIRECT] Failed: ${JSON.stringify(storeInfo)}`);
}
console.log('[iHeartJane Test] Processing results...');
// Wait for products to load
await page.waitForSelector('[data-testid="product-card"], .product-card, [class*="ProductCard"]', {
timeout: 30000,
}).catch(() => console.log('[iHeartJane Test] No product cards found via selector'));
// Try to extract product data from the page
const products = await page.evaluate(() => {
// Look for product data in various places
const results: any[] = [];
// Method 1: Look for __INITIAL_STATE__ or similar
const scripts = Array.from(document.querySelectorAll('script'));
for (const script of scripts) {
const text = script.textContent || '';
if (text.includes('products') && text.includes('price')) {
try {
// Try to find JSON object
const match = text.match(/\{[\s\S]*"products"[\s\S]*\}/);
if (match) {
results.push({ source: 'script', data: match[0].substring(0, 500) });
}
} catch {}
}
}
// Method 2: Look for product elements in DOM
const productElements = document.querySelectorAll('[data-testid="product-card"], .product-card, [class*="product"]');
for (const el of Array.from(productElements).slice(0, 5)) {
const name = el.querySelector('[class*="name"], h3, h4')?.textContent;
const price = el.querySelector('[class*="price"]')?.textContent;
if (name) {
results.push({ source: 'dom', name, price });
}
}
return results;
});
console.log('\n[iHeartJane Test] === RESULTS ===');
console.log(`Total API responses captured: ${apiResponses.length}`);
// Group by type
const byType: Record<string, any[]> = {};
for (const r of apiResponses) {
byType[r.type] = byType[r.type] || [];
byType[r.type].push(r);
}
for (const [type, items] of Object.entries(byType)) {
console.log(`\n--- ${type} (${items.length} responses) ---`);
for (const item of items) {
console.log(`URL: ${item.url}`);
// Show structure
if (item.data.hits) {
console.log(` Products: ${item.data.hits.length} hits`);
if (item.data.hits[0]) {
console.log(` Fields: ${Object.keys(item.data.hits[0]).join(', ')}`);
}
} else if (item.data.store) {
console.log(` Store: ${JSON.stringify(item.data.store, null, 2).substring(0, 1000)}`);
} else {
console.log(` Keys: ${Object.keys(item.data).join(', ')}`);
}
}
}
// Write full data to file
const fs = await import('fs');
fs.writeFileSync('/tmp/iheartjane-data.json', JSON.stringify(apiResponses, null, 2));
console.log('\n[iHeartJane Test] Full data saved to /tmp/iheartjane-data.json');
// Take screenshot
await page.screenshot({ path: '/tmp/iheartjane-test.png', fullPage: false });
console.log('[iHeartJane Test] Screenshot saved to /tmp/iheartjane-test.png');
} catch (error: any) {
console.error('[iHeartJane Test] Error:', error.message);
await page.screenshot({ path: '/tmp/iheartjane-error.png' });
} finally {
await browser.close();
}
console.log('[iHeartJane Test] Done');
}
main().catch(console.error);

View File

@@ -0,0 +1,224 @@
/**
* Explore Jane API to understand data structure
* Usage: npx ts-node scripts/test-jane-api-explore.ts
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
console.log('Exploring Jane API from browser context...\n');
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
// Intercept network requests to find store data API calls
const capturedResponses: Array<{ url: string; data: any }> = [];
await page.setRequestInterception(true);
page.on('request', (req) => req.continue());
page.on('response', async (response) => {
const url = response.url();
if (url.includes('iheartjane.com') &&
(url.includes('/stores') || url.includes('/search') || url.includes('algolia'))) {
try {
const text = await response.text();
if (text.startsWith('{') || text.startsWith('[')) {
const data = JSON.parse(text);
capturedResponses.push({ url, data });
console.log(`Captured: ${url.substring(0, 100)}...`);
}
} catch {
// Not JSON
}
}
});
// Visit Jane to establish session
console.log('Visiting Jane stores page to capture network requests...');
await page.goto('https://www.iheartjane.com/stores', {
waitUntil: 'networkidle2',
timeout: 60000,
});
console.log(`\nCaptured ${capturedResponses.length} API responses`);
for (const resp of capturedResponses) {
console.log(`\n--- ${resp.url.substring(0, 80)} ---`);
const keys = Object.keys(resp.data);
console.log('Keys:', keys);
// Check for stores array
if (resp.data.stores && Array.isArray(resp.data.stores)) {
console.log(`Stores count: ${resp.data.stores.length}`);
const firstStore = resp.data.stores[0];
if (firstStore) {
console.log('First store keys:', Object.keys(firstStore));
console.log('Sample:', JSON.stringify(firstStore, null, 2).substring(0, 500));
}
}
// Check for hits (Algolia)
if (resp.data.hits && Array.isArray(resp.data.hits)) {
console.log(`Hits count: ${resp.data.hits.length}`);
const firstHit = resp.data.hits[0];
if (firstHit) {
console.log('First hit keys:', Object.keys(firstHit));
}
}
}
// Look for __NEXT_DATA__ or similar embedded data
console.log('\n--- Checking for embedded page data ---');
const pageData = await page.evaluate(() => {
// Check for Next.js data
const nextData = (window as any).__NEXT_DATA__;
if (nextData?.props?.pageProps?.stores) {
return {
source: '__NEXT_DATA__',
storeCount: nextData.props.pageProps.stores.length,
firstStore: nextData.props.pageProps.stores[0],
};
}
// Check for any global store data
const win = window as any;
if (win.stores) return { source: 'window.stores', data: win.stores };
if (win.__stores) return { source: 'window.__stores', data: win.__stores };
return null;
});
if (pageData) {
console.log('Found embedded data:', pageData.source);
console.log('Store count:', pageData.storeCount);
if (pageData.firstStore) {
console.log('First store keys:', Object.keys(pageData.firstStore));
console.log('Sample:', JSON.stringify({
id: pageData.firstStore.id,
name: pageData.firstStore.name,
city: pageData.firstStore.city,
state: pageData.firstStore.state,
}, null, 2));
}
} else {
console.log('No embedded page data found');
}
// Try alternative API endpoints from browser context
console.log('\n--- Testing alternative API endpoints ---');
// Try the map endpoint
const mapData = await page.evaluate(async () => {
try {
const res = await fetch('https://api.iheartjane.com/v1/stores/map?per_page=100');
if (res.ok) return await res.json();
} catch {}
return null;
});
if (mapData) {
console.log('\n/v1/stores/map response:');
console.log('Keys:', Object.keys(mapData));
if (mapData.stores?.[0]) {
console.log('First store keys:', Object.keys(mapData.stores[0]));
}
}
// Try index endpoint
const indexData = await page.evaluate(async () => {
try {
const res = await fetch('https://api.iheartjane.com/v1/stores/index?per_page=10');
if (res.ok) return await res.json();
} catch {}
return null;
});
if (indexData) {
console.log('\n/v1/stores/index response:');
console.log('Keys:', Object.keys(indexData));
if (indexData.stores?.[0]) {
console.log('First store keys:', Object.keys(indexData.stores[0]));
}
}
// Try with state parameter
const stateData = await page.evaluate(async () => {
try {
const res = await fetch('https://api.iheartjane.com/v1/stores?state=AZ&per_page=10');
if (res.ok) return await res.json();
} catch {}
return null;
});
if (stateData) {
console.log('\n/v1/stores?state=AZ response:');
console.log('Keys:', Object.keys(stateData));
console.log('Stores count:', stateData.stores?.length);
if (stateData.stores?.[0]) {
console.log('First store keys:', Object.keys(stateData.stores[0]));
console.log('Sample:', JSON.stringify(stateData.stores[0], null, 2).substring(0, 300));
}
}
// Try Algolia directly for stores
console.log('\n--- Testing Algolia for stores ---');
const algoliaStores = await page.evaluate(async () => {
try {
// Common Algolia search pattern
const res = await fetch('https://search.iheartjane.com/1/indexes/stores-production/query', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Algolia-Application-Id': 'HKXSXRD7RA',
'X-Algolia-API-Key': 'YjZhYjQxZjU4ZTNjMTRhYzExZTk2YjU2MzliMGE4ZTE5YjJkMmZkZTI2ODllYTY2MThlMzQ3Y2QxOTFkMjI5Y3RhZ0ZpbHRlcnM9',
},
body: JSON.stringify({
query: 'Arizona',
hitsPerPage: 20,
}),
});
if (res.ok) return await res.json();
} catch {}
return null;
});
if (algoliaStores) {
console.log('Algolia stores-production response:');
console.log('Keys:', Object.keys(algoliaStores));
console.log('Hits count:', algoliaStores.hits?.length);
if (algoliaStores.hits?.[0]) {
console.log('First hit keys:', Object.keys(algoliaStores.hits[0]));
console.log('Sample:', JSON.stringify(algoliaStores.hits[0], null, 2).substring(0, 500));
}
}
// Check if there's a /v2 endpoint
const v2Data = await page.evaluate(async () => {
try {
const res = await fetch('https://api.iheartjane.com/v2/stores?per_page=10');
if (res.ok) return await res.json();
} catch {}
return null;
});
if (v2Data) {
console.log('\n/v2/stores response:');
console.log('Keys:', Object.keys(v2Data));
if (v2Data.stores?.[0]) {
console.log('First store keys:', Object.keys(v2Data.stores[0]));
}
}
await browser.close();
console.log('\nDone!');
}
main().catch(console.error);

View File

@@ -0,0 +1,126 @@
/**
* Test script for Jane platform client
* Tests the new Jane integration with The Flower Shop Mesa
*
* Usage: npx ts-node scripts/test-jane-client.ts
*/
import {
startSession,
endSession,
fetchProductsFromUrl,
resolveStoreFromUrl,
} from '../src/platforms/jane';
import { JaneNormalizer } from '../src/hydration/normalizers/jane';
const TEST_URL = 'https://theflowershopusa.com/mesa/menu/';
async function main() {
console.log('='.repeat(60));
console.log('Jane Platform Client Test');
console.log('='.repeat(60));
console.log(`Test URL: ${TEST_URL}`);
console.log('');
try {
// Test 1: Fetch products from URL
console.log('[Test 1] Fetching products from menu URL...');
const result = await fetchProductsFromUrl(TEST_URL);
console.log('');
console.log('[Results]');
console.log(` Store: ${result.store?.name || 'Not captured'}`);
console.log(` Store ID: ${result.store?.id || 'N/A'}`);
console.log(` Products captured: ${result.products.length}`);
console.log(` API responses: ${result.responses.length}`);
if (result.store) {
console.log('');
console.log('[Store Info]');
console.log(` Address: ${result.store.address}, ${result.store.city}, ${result.store.state} ${result.store.zip}`);
console.log(` Phone: ${result.store.phone}`);
console.log(` Coordinates: ${result.store.lat}, ${result.store.long}`);
console.log(` Medical: ${result.store.medical}, Recreational: ${result.store.recreational}`);
console.log(` Rating: ${result.store.rating} (${result.store.reviews_count} reviews)`);
console.log(` Product count (store): ${result.store.product_count}`);
}
if (result.products.length > 0) {
console.log('');
console.log('[Sample Products (first 5)]');
for (const p of result.products.slice(0, 5)) {
const price = p.price_gram || p.price_each || 'N/A';
console.log(` - ${p.name} (${p.brand}) - $${price}`);
console.log(` Kind: ${p.kind}, Category: ${p.category}, THC: ${p.percent_thc}%`);
}
// Test 2: Normalize products
console.log('');
console.log('[Test 2] Testing normalizer...');
const normalizer = new JaneNormalizer();
// Build a fake payload structure
const fakePayload = {
id: 'test-payload',
dispensary_id: 9999,
crawl_run_id: null,
platform: 'jane',
payload_version: 1,
raw_json: { hits: result.products.map(p => p.raw) },
product_count: result.products.length,
pricing_type: null,
crawl_mode: null,
fetched_at: new Date(),
processed: false,
normalized_at: null,
hydration_error: null,
hydration_attempts: 0,
created_at: new Date(),
};
const normalized = normalizer.normalize(fakePayload);
console.log(` Products normalized: ${normalized.products.length}`);
console.log(` Brands extracted: ${normalized.brands.length}`);
console.log(` Categories extracted: ${normalized.categories.length}`);
console.log(` Errors: ${normalized.errors.length}`);
if (normalized.products.length > 0) {
console.log('');
console.log('[Sample Normalized Product]');
const np = normalized.products[0];
console.log(` External ID: ${np.externalProductId}`);
console.log(` Name: ${np.name}`);
console.log(` Brand: ${np.brandName}`);
console.log(` Category: ${np.category}`);
console.log(` Type: ${np.type}`);
console.log(` Strain: ${np.strainType}`);
console.log(` THC: ${np.thcPercent}%`);
console.log(` CBD: ${np.cbdPercent}%`);
console.log(` Image: ${np.primaryImageUrl?.slice(0, 60)}...`);
const pricing = normalized.pricing.get(np.externalProductId);
if (pricing) {
console.log(` Price (cents): ${pricing.priceRec}`);
console.log(` On Special: ${pricing.isOnSpecial}`);
}
}
}
console.log('');
console.log('='.repeat(60));
console.log('TEST PASSED');
console.log('='.repeat(60));
} catch (error: any) {
console.error('');
console.error('='.repeat(60));
console.error('TEST FAILED');
console.error('='.repeat(60));
console.error(`Error: ${error.message}`);
console.error(error.stack);
process.exit(1);
}
}
main().catch(console.error);

View File

@@ -0,0 +1,50 @@
/**
* Smoke test: Discover Jane stores in Arizona
* Usage: npx ts-node scripts/test-jane-discovery-az.ts
*/
import { discoverStoresByState } from '../src/platforms/jane';
async function main() {
console.log('='.repeat(60));
console.log('Jane Store Discovery - Arizona Smoke Test');
console.log('='.repeat(60));
console.log('Using local IP (no proxy)\n');
try {
const stores = await discoverStoresByState('AZ');
console.log(`\n${'='.repeat(60)}`);
console.log(`RESULTS: Found ${stores.length} Jane stores in Arizona`);
console.log('='.repeat(60));
if (stores.length > 0) {
console.log('\nSample stores:');
for (const store of stores.slice(0, 10)) {
console.log(` - ${store.name}`);
console.log(` ID: ${store.storeId} | ${store.city}, AZ`);
console.log(` Types: ${store.storeTypes?.join(', ') || 'unknown'}`);
console.log(` Products: ${store.productCount || 'N/A'}`);
console.log('');
}
if (stores.length > 10) {
console.log(` ... and ${stores.length - 10} more stores`);
}
}
console.log('\n' + '='.repeat(60));
console.log('SMOKE TEST PASSED');
console.log('='.repeat(60));
} catch (error: any) {
console.error('\n' + '='.repeat(60));
console.error('SMOKE TEST FAILED');
console.error('='.repeat(60));
console.error(`Error: ${error.message}`);
console.error(error.stack);
process.exit(1);
}
}
main();

View File

@@ -0,0 +1,55 @@
/**
* Compare MED vs REC product menus for same location
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox'] });
const page = await browser.newPage();
await page.goto('https://www.iheartjane.com/stores', { waitUntil: 'domcontentloaded' });
await new Promise(r => setTimeout(r, 2000));
// Fetch REC products (store 3379)
const recProducts: number[] = await page.evaluate(async () => {
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=3379' }),
});
const data = await res.json();
return data.hits?.map((h: any) => h.product_id) || [];
});
// Fetch MED products (store 4540)
const medProducts: number[] = await page.evaluate(async () => {
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=4540' }),
});
const data = await res.json();
return data.hits?.map((h: any) => h.product_id) || [];
});
const recSet = new Set(recProducts);
const medSet = new Set(medProducts);
const recOnly = recProducts.filter(id => !medSet.has(id)).length;
const medOnly = medProducts.filter(id => !recSet.has(id)).length;
const shared = recProducts.filter(id => medSet.has(id)).length;
console.log('\nHana Phoenix - MED vs REC comparison (100 products each):');
console.log(' REC products fetched:', recProducts.length);
console.log(' MED products fetched:', medProducts.length);
console.log(' REC-only:', recOnly);
console.log(' MED-only:', medOnly);
console.log(' Shared:', shared);
console.log(' Menus are:', shared === 0 ? 'COMPLETELY DIFFERENT' : shared === recProducts.length ? 'IDENTICAL' : 'PARTIALLY OVERLAPPING');
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,79 @@
/**
* Find ALL differing fields between MED and REC product payloads
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
async function main() {
const browser = await puppeteer.launch({ headless: 'new', args: ['--no-sandbox'] });
const page = await browser.newPage();
await page.goto('https://www.iheartjane.com/stores', { waitUntil: 'domcontentloaded' });
await new Promise(r => setTimeout(r, 2000));
// Get full product payload from REC store
const recProduct = await page.evaluate(async () => {
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '', hitsPerPage: 1, filters: 'store_id=3379' }),
});
const data = await res.json();
return data.hits?.[0];
});
const productId = recProduct?.product_id;
// Get same product from MED store
const medProduct = await page.evaluate(async (pid: number) => {
const res = await fetch('https://search.iheartjane.com/1/indexes/menu-products-production/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: '', hitsPerPage: 100, filters: 'store_id=4540' }),
});
const data = await res.json();
return data.hits?.find((h: any) => h.product_id === pid);
}, productId);
console.log('Product:', recProduct?.name, '(ID:', productId, ')\n');
// Get all keys
const allKeys = new Set([...Object.keys(recProduct || {}), ...Object.keys(medProduct || {})]);
const sortedKeys = [...allKeys].sort();
console.log('=== ALL KEYS IN PAYLOAD ===');
console.log(sortedKeys.join(', '));
console.log('\n=== FIELDS THAT DIFFER ===');
let diffCount = 0;
for (const key of sortedKeys) {
const recVal = JSON.stringify(recProduct?.[key]);
const medVal = JSON.stringify(medProduct?.[key]);
if (recVal !== medVal) {
diffCount++;
console.log(`${key}:`);
console.log(` REC: ${recVal?.substring(0, 100)}`);
console.log(` MED: ${medVal?.substring(0, 100)}`);
}
}
if (diffCount === 0) {
console.log('(none - payloads are identical)');
}
// Check for limit/allowance related fields
console.log('\n=== LIMIT-RELATED FIELDS ===');
const limitFields = sortedKeys.filter(k =>
k.includes('limit') || k.includes('max') || k.includes('allow') ||
k.includes('quantity') || k.includes('cart') || k.includes('medical') ||
k.includes('rec') || k.includes('weight')
);
for (const key of limitFields) {
console.log(`${key}: REC=${JSON.stringify(recProduct?.[key])} | MED=${JSON.stringify(medProduct?.[key])}`);
}
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,35 @@
/**
* Test script to capture and save full Jane payload
* Usage: npx ts-node scripts/test-jane-payload.ts
*/
import * as fs from 'fs';
import { fetchProductsFromUrl } from '../src/platforms/jane';
const TEST_URL = 'https://theflowershopusa.com/mesa/menu/';
const OUTPUT_FILE = '/tmp/jane-test-payload.json';
async function main() {
console.log('Fetching Jane payload...');
const result = await fetchProductsFromUrl(TEST_URL);
// Build payload structure matching what would be saved
const payload = {
hits: result.products.map(p => p.raw),
store: result.store?.raw || null,
capturedAt: new Date().toISOString(),
platform: 'jane',
storeId: result.store?.id,
productCount: result.products.length,
responseCount: result.responses.length,
};
// Save to file
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(payload, null, 2));
console.log(`\nPayload saved to: ${OUTPUT_FILE}`);
console.log(`Products: ${result.products.length}`);
console.log(`Size: ${Math.round(fs.statSync(OUTPUT_FILE).size / 1024)}KB`);
}
main().catch(console.error);

View File

@@ -0,0 +1,138 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Capture ALL requests to treez.io
const treezRequests: any[] = [];
page.on('request', (req) => {
const url = req.url();
if (url.includes('treez.io') && !url.includes('.js') && !url.includes('.css')) {
treezRequests.push({
url: url,
method: req.method(),
});
}
});
// Also intercept and capture ES API responses
page.on('response', async (res) => {
const url = res.url();
if (url.includes('gapcommerceapi.com') && res.status() === 200) {
try {
const json = await res.json();
const total = json.hits?.total?.value;
const count = json.hits?.hits?.length;
if (total || count) {
console.log('\nES Response: total=' + total + ', returned=' + count);
if (json.hits?.hits?.[0]?._source) {
const src = json.hits.hits[0]._source;
console.log('First product fields: ' + Object.keys(src).slice(0, 20).join(', '));
}
}
} catch {}
}
});
console.log('Loading /shop page...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Click load more several times
console.log('\nClicking Load More...');
for (let i = 0; i < 5; i++) {
const btn = await page.$('button.collection__load-more');
if (!btn) break;
await btn.click();
await sleep(2000);
}
console.log('\n=== TREEZ API ENDPOINTS CALLED ===\n');
const uniqueUrls = [...new Set(treezRequests.map(r => r.url.split('?')[0]))];
uniqueUrls.forEach(url => console.log(url));
// Now intercept the ES response data by making a request from browser context
console.log('\n=== FETCHING ALL PRODUCTS VIA BROWSER ===\n');
const allProducts = await page.evaluate(async () => {
const apiKey = 'V3jHL9dFzi3Gj4UISM4lr38Nm0GSxcps5OBz1PbS';
const url = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
const query = {
from: 0,
size: 1000,
query: {
bool: {
must: [
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
]
}
}
};
try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': apiKey,
},
body: JSON.stringify(query),
});
const data = await response.json();
return {
total: data.hits?.total?.value,
count: data.hits?.hits?.length,
sample: data.hits?.hits?.[0]?._source,
allProducts: data.hits?.hits?.map((h: any) => h._source),
};
} catch (err: any) {
return { error: err.message };
}
});
if (allProducts.error) {
console.log('Error: ' + allProducts.error);
} else {
console.log('Total products: ' + allProducts.total);
console.log('Returned: ' + allProducts.count);
if (allProducts.sample) {
console.log('\n=== PRODUCT FIELDS ===\n');
console.log(Object.keys(allProducts.sample).sort().join('\n'));
console.log('\n=== SAMPLE PRODUCT ===\n');
console.log(JSON.stringify(allProducts.sample, null, 2));
}
}
await browser.close();
}
main();

View File

@@ -0,0 +1,203 @@
/**
* Extract ALL product elements and find unique products
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function main() {
console.log('='.repeat(60));
console.log('Extracting ALL product elements');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
// Get ALL elements with product_product__ class
console.log('\n[1] Counting all product_product__ elements...');
const elementAnalysis = await page.evaluate(() => {
const all = document.querySelectorAll('[class*="product_product__"]');
const byTag: Record<string, number> = {};
const anchorHrefs: string[] = [];
const imgAlts: string[] = [];
all.forEach(el => {
const tag = el.tagName;
byTag[tag] = (byTag[tag] || 0) + 1;
if (tag === 'A') {
const href = el.getAttribute('href');
if (href && href.includes('/product/')) {
anchorHrefs.push(href);
}
}
if (tag === 'IMG') {
const alt = el.getAttribute('alt');
if (alt) imgAlts.push(alt);
}
});
return {
total: all.length,
byTag,
anchorHrefs: anchorHrefs.slice(0, 20),
uniqueAnchors: new Set(anchorHrefs).size,
imgAlts: imgAlts.slice(0, 20),
uniqueImgAlts: new Set(imgAlts).size,
};
});
console.log(`Total elements: ${elementAnalysis.total}`);
console.log(`By tag:`, elementAnalysis.byTag);
console.log(`Unique anchor hrefs: ${elementAnalysis.uniqueAnchors}`);
console.log(`Unique image alts: ${elementAnalysis.uniqueImgAlts}`);
console.log(`\nSample anchor hrefs:`, elementAnalysis.anchorHrefs.slice(0, 5));
console.log(`Sample image alts:`, elementAnalysis.imgAlts.slice(0, 5));
// Try to extract using different approaches
console.log('\n[2] Testing extraction approaches...');
const approaches = await page.evaluate(() => {
const results: Record<string, { count: number; unique: number; sample: string[] }> = {};
// Approach 1: Anchor elements with product links
const anchors = document.querySelectorAll('a[href*="/product/"]');
const anchorNames = new Set<string>();
anchors.forEach(a => {
const img = a.querySelector('img');
const name = img?.getAttribute('alt') || a.textContent?.trim().split('\n')[0] || '';
if (name) anchorNames.add(name);
});
results['a[href*="/product/"]'] = {
count: anchors.length,
unique: anchorNames.size,
sample: Array.from(anchorNames).slice(0, 5),
};
// Approach 2: Images with alt text inside product areas
const productImgs = document.querySelectorAll('[class*="product_product__"] img[alt]');
const imgNames = new Set<string>();
productImgs.forEach(img => {
const alt = img.getAttribute('alt');
if (alt && alt.length > 2) imgNames.add(alt);
});
results['[class*="product_product__"] img[alt]'] = {
count: productImgs.length,
unique: imgNames.size,
sample: Array.from(imgNames).slice(0, 5),
};
// Approach 3: H5 elements (product names)
const h5s = document.querySelectorAll('h5.product_product__name__JcEk0, h5[class*="product__name"]');
const h5Names = new Set<string>();
h5s.forEach(h5 => {
const text = h5.textContent?.trim();
if (text) h5Names.add(text);
});
results['h5[class*="product__name"]'] = {
count: h5s.length,
unique: h5Names.size,
sample: Array.from(h5Names).slice(0, 5),
};
// Approach 4: Link class with product_product__
const links = document.querySelectorAll('a.product_product__ERWtJ, a[class*="product_product__"][class*="link"]');
const linkNames = new Set<string>();
links.forEach(link => {
const h5 = link.querySelector('h5');
const img = link.querySelector('img');
const name = h5?.textContent?.trim() || img?.getAttribute('alt') || '';
if (name) linkNames.add(name);
});
results['a.product_product__ERWtJ'] = {
count: links.length,
unique: linkNames.size,
sample: Array.from(linkNames).slice(0, 5),
};
return results;
});
Object.entries(approaches).forEach(([sel, data]) => {
console.log(`\n${sel}:`);
console.log(` Count: ${data.count}, Unique: ${data.unique}`);
console.log(` Sample: ${data.sample.join(', ')}`);
});
// The best approach: use images with alt as the source of truth
console.log('\n[3] Full product extraction using img[alt] approach...');
const products = await page.evaluate(() => {
const seen = new Set<string>();
const products: { name: string; href: string; price: string }[] = [];
// Get all product links
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
const img = a.querySelector('img');
const name = img?.getAttribute('alt') || '';
if (!name || seen.has(name)) return;
seen.add(name);
const href = a.getAttribute('href') || '';
// Get price from within the link or parent
let price = '';
const priceEl = a.querySelector('[class*="price"]');
if (priceEl) {
const priceMatch = priceEl.textContent?.match(/\$(\d+(?:\.\d{2})?)/);
price = priceMatch ? priceMatch[1] : '';
}
products.push({ name, href, price });
});
return products;
});
console.log(`Extracted ${products.length} unique products`);
console.log('\nSample products:');
products.slice(0, 10).forEach(p => {
console.log(` - ${p.name} | ${p.price ? '$' + p.price : 'N/A'} | ${p.href.slice(0, 40)}...`);
});
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,52 @@
import axios from 'axios';
async function main() {
const url = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
const query = {
from: 0,
size: 500,
query: {
bool: {
must: [
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
]
}
}
};
console.log('Querying Treez Elasticsearch API...\n');
try {
const response = await axios.post(url, query, {
headers: { 'Content-Type': 'application/json' }
});
const data = response.data;
const total = data.hits?.total?.value || data.hits?.total;
const products = data.hits?.hits || [];
console.log('Total products: ' + total);
console.log('Products returned: ' + products.length + '\n');
if (products.length > 0) {
const first = products[0]._source;
console.log('=== PRODUCT FIELDS AVAILABLE ===\n');
console.log(Object.keys(first).sort().join('\n'));
console.log('\n=== SAMPLE PRODUCT ===\n');
console.log(JSON.stringify(first, null, 2));
}
} catch (err: any) {
console.log('Error: ' + err.message);
if (err.response) {
console.log('Status: ' + err.response.status);
console.log('Data: ' + JSON.stringify(err.response.data));
}
}
}
main();

View File

@@ -0,0 +1,97 @@
import axios from 'axios';
async function main() {
// Test Elasticsearch API with API key
console.log('=== ELASTICSEARCH API ===\n');
const esUrl = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
const apiKey = 'V3jHL9dFzi3Gj4UISM4lr38Nm0GSxcps5OBz1PbS';
const query = {
from: 0,
size: 1000,
query: {
bool: {
must: [
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
]
}
}
};
try {
const response = await axios.post(esUrl, query, {
headers: {
'Content-Type': 'application/json',
'x-api-key': apiKey,
'Origin': 'https://shop.bestdispensary.com',
'Referer': 'https://shop.bestdispensary.com/',
},
timeout: 30000,
});
const data = response.data;
const total = data.hits?.total?.value || data.hits?.total;
const products = data.hits?.hits || [];
console.log('Total products: ' + total);
console.log('Products returned: ' + products.length);
if (products.length > 0) {
const first = products[0]._source;
console.log('\n=== PRODUCT FIELDS ===\n');
console.log(Object.keys(first).sort().join('\n'));
console.log('\n=== SAMPLE PRODUCT ===\n');
console.log(JSON.stringify(first, null, 2));
}
} catch (err: any) {
console.log('Elasticsearch Error: ' + err.message);
if (err.response) {
console.log('Status: ' + err.response.status);
}
}
// Test Treez Headless API
console.log('\n\n=== TREEZ HEADLESS API ===\n');
const treezUrl = 'https://headless.treez.io/v2.0/dispensary/best/ecommerce/discounts?excludeInactive=true&hideUnset=true&includeProdInfo=true';
try {
const response = await axios.get(treezUrl, {
headers: {
'client_id': '29dce682258145c6b1cf71027282d083',
'client_secret': 'A57bB49AfD7F4233B1750a0B501B4E16',
'cache-control': 'max-age=0, no-cache, must-revalidate, proxy-revalidate',
'Origin': 'https://shop.bestdispensary.com',
'Referer': 'https://shop.bestdispensary.com/',
},
timeout: 30000,
});
const data = response.data;
console.log('Response type: ' + typeof data);
if (Array.isArray(data)) {
console.log('Array length: ' + data.length);
if (data.length > 0) {
console.log('First item: ' + JSON.stringify(data[0], null, 2).slice(0, 1000));
}
} else {
console.log('Keys: ' + Object.keys(data).join(', '));
console.log('Data: ' + JSON.stringify(data, null, 2).slice(0, 2000));
}
} catch (err: any) {
console.log('Treez Error: ' + err.message);
if (err.response) {
console.log('Status: ' + err.response.status);
console.log('Data: ' + JSON.stringify(err.response.data).slice(0, 500));
}
}
}
main();

View File

@@ -0,0 +1,243 @@
/**
* Visit each brand page and extract products
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function scrollToLoadAll(page: Page): Promise<void> {
let previousHeight = 0;
let sameCount = 0;
for (let i = 0; i < 30; i++) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
sameCount++;
if (sameCount >= 3) break;
} else {
sameCount = 0;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1000);
previousHeight = currentHeight;
}
}
async function extractProducts(page: Page): Promise<{ name: string; price: string; href: string }[]> {
return page.evaluate(() => {
const products: { name: string; price: string; href: string }[] = [];
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
const href = a.getAttribute('href') || '';
const img = a.querySelector('img');
const h5 = a.querySelector('h5');
const name = img?.getAttribute('alt') || h5?.textContent?.trim() || '';
if (!name || seen.has(name)) return;
seen.add(name);
const priceEl = a.querySelector('[class*="price"]');
const priceMatch = priceEl?.textContent?.match(/\$(\d+(?:\.\d{2})?)/);
const price = priceMatch ? priceMatch[1] : '';
products.push({ name, price, href });
});
return products;
});
}
async function main() {
console.log('='.repeat(60));
console.log('Extracting Products from All Brands');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
// Go to brands page and get all brand links
const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
console.log(`\n[1] Getting brand list from ${brandsUrl}`);
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
// The 142 items on /brands ARE brands (shown as product cards with brand info)
// Get the brand names from the product hrefs (they contain brand name in URL)
const brandInfo = await page.evaluate(() => {
const brands: { name: string; slug: string }[] = [];
const seen = new Set<string>();
// Extract brand info from product URLs
// URL pattern: /product/{brand}-{product}-{details}
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
const href = a.getAttribute('href') || '';
// Try to extract brand from URL - first segment before product name
const match = href.match(/\/product\/([^-]+(?:-[^-]+)?)-/);
if (match) {
const slug = match[1];
if (!seen.has(slug)) {
seen.add(slug);
// Also look for brand text in the card
const brandEl = a.querySelector('[class*="brand"], [class*="Brand"]');
const name = brandEl?.textContent?.trim() || slug;
brands.push({ name, slug });
}
}
});
return brands;
});
console.log(`Found ${brandInfo.length} potential brands from product URLs`);
console.log('Sample:', brandInfo.slice(0, 5));
// Actually, let's look for brand page links directly
console.log('\n[2] Looking for brand page links...');
const brandLinks = await page.evaluate(() => {
const links: { name: string; href: string }[] = [];
// Look for links to /brand/ pages
document.querySelectorAll('a[href*="/brand/"]').forEach(a => {
const href = a.getAttribute('href') || '';
const text = a.textContent?.trim() || '';
if (href && !links.some(l => l.href === href)) {
links.push({ name: text, href });
}
});
return links;
});
console.log(`Found ${brandLinks.length} brand page links`);
if (brandLinks.length > 0) {
console.log('Sample:', brandLinks.slice(0, 10));
}
// If no brand links, try to find them in section headers
console.log('\n[3] Looking for brand sections...');
const brandSections = await page.evaluate(() => {
const sections: { brandName: string; sampleProduct: string }[] = [];
document.querySelectorAll('[class*="products_product__section"]').forEach(section => {
const header = section.querySelector('h2, h3, [class*="heading"]');
const brandName = header?.textContent?.trim() || '';
const firstProduct = section.querySelector('a[href*="/product/"]');
const productName = firstProduct?.querySelector('h5')?.textContent?.trim() ||
firstProduct?.querySelector('img')?.getAttribute('alt') || '';
if (brandName) {
sections.push({ brandName, sampleProduct: productName });
}
});
return sections;
});
console.log(`Found ${brandSections.length} brand sections`);
brandSections.slice(0, 10).forEach(s => {
console.log(` - Brand: "${s.brandName}" | Sample: "${s.sampleProduct}"`);
});
// Try visiting a brand page directly using the section name
if (brandSections.length > 0) {
console.log('\n[4] Testing brand page URLs...');
// Try different URL patterns for first brand
const testBrand = brandSections[0].brandName;
const testSlug = testBrand.toLowerCase().replace(/[^a-z0-9]+/g, '-');
const urlPatterns = [
`/onlinemenu/brand/${encodeURIComponent(testBrand)}`,
`/onlinemenu/brand/${testSlug}`,
`/brand/${encodeURIComponent(testBrand)}`,
`/brand/${testSlug}`,
];
for (const path of urlPatterns) {
const testUrl = `https://${STORE_ID}.treez.io${path}?customerType=ADULT`;
try {
console.log(` Trying: ${testUrl}`);
await page.goto(testUrl, { waitUntil: 'networkidle2', timeout: 15000 });
await sleep(2000);
const products = await extractProducts(page);
console.log(` Products found: ${products.length}`);
if (products.length > 0) {
console.log(` ✓ Working URL pattern: ${path}`);
break;
}
} catch (e: any) {
console.log(` Error: ${e.message.slice(0, 50)}`);
}
}
}
// Check if clicking on a brand section leads to a brand page
console.log('\n[5] Checking if brand sections have clickable headers...');
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
const clickableHeaders = await page.evaluate(() => {
const results: { text: string; tag: string; href: string; clickable: boolean }[] = [];
document.querySelectorAll('[class*="products_product__section"] h2, [class*="products_product__section"] h3').forEach(header => {
const link = header.closest('a') || header.querySelector('a');
const text = header.textContent?.trim() || '';
const href = link?.getAttribute('href') || '';
results.push({
text,
tag: header.tagName,
href,
clickable: !!link,
});
});
return results;
});
console.log('Section headers:');
clickableHeaders.slice(0, 10).forEach(h => {
console.log(` [${h.tag}] "${h.text}" - ${h.clickable ? `Link: ${h.href}` : 'Not clickable'}`);
});
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,183 @@
/**
* Detailed brand section analysis
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log(' Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function main() {
console.log('='.repeat(60));
console.log('Detailed Brand Section Analysis');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
console.log(`\nNavigating to ${url}`);
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
// Scroll multiple times to load all content
console.log('\n[1] Scrolling to load all content...');
let previousHeight = 0;
let scrollCount = 0;
for (let i = 0; i < 30; i++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
const productCount = await page.evaluate(() =>
document.querySelectorAll('a[href*="/product/"]').length
);
console.log(` Scroll ${i + 1}: height=${currentHeight}, products=${productCount}`);
if (currentHeight === previousHeight) {
scrollCount++;
if (scrollCount >= 3) break;
} else {
scrollCount = 0;
}
previousHeight = currentHeight;
}
// Look at ALL h2/h3 headers on page
console.log('\n[2] Finding ALL h2/h3 headers on page...');
const headers = await page.evaluate(() => {
const results: { tag: string; text: string; parentClass: string }[] = [];
document.querySelectorAll('h2, h3').forEach((el: Element) => {
results.push({
tag: el.tagName,
text: el.textContent?.trim().slice(0, 80) || '',
parentClass: el.parentElement?.className?.slice(0, 50) || '',
});
});
return results;
});
console.log(`Found ${headers.length} headers:`);
headers.forEach((h: { tag: string; text: string }) =>
console.log(` [${h.tag}] "${h.text}"`)
);
// Get products grouped by their section heading
console.log('\n[3] Getting products per section...');
const sectionProducts = await page.evaluate(() => {
const results: { heading: string; products: number }[] = [];
// Find all sections that contain products
document.querySelectorAll('[class*="products_product__section"]').forEach((section: Element) => {
const heading = section.querySelector('h2, h3');
const headingText = heading?.textContent?.trim() || 'Unknown';
const products = section.querySelectorAll('a[href*="/product/"]');
results.push({
heading: headingText,
products: products.length,
});
});
return results;
});
console.log(`Found ${sectionProducts.length} brand sections:`);
let totalProducts = 0;
sectionProducts.forEach((s: { heading: string; products: number }) => {
console.log(` ${s.heading}: ${s.products} products`);
totalProducts += s.products;
});
console.log(`\nTotal products across all sections: ${totalProducts}`);
// Also extract brand from each product's URL/card
console.log('\n[4] Extracting brand from product URLs/cards...');
const brandCounts = await page.evaluate(() => {
const byBrand: Record<string, number> = {};
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach((a: Element) => {
const href = a.getAttribute('href') || '';
const img = a.querySelector('img');
const name = img?.getAttribute('alt') || '';
if (!name || seen.has(name)) return;
seen.add(name);
// Try to find brand from the card
const brandEl = a.querySelector('[class*="brand"], [class*="Brand"], span, p');
let brand = '';
// Try various methods to find brand
const allSpans = a.querySelectorAll('span, p');
allSpans.forEach((span: Element) => {
const text = span.textContent?.trim() || '';
if (text && text.length < 50 && text !== name && !text.includes('$')) {
if (!brand) brand = text;
}
});
// Fallback: get brand from parent section heading
if (!brand) {
const section = a.closest('[class*="products_product__section"]');
const heading = section?.querySelector('h2, h3');
brand = heading?.textContent?.trim() || 'Unknown';
}
byBrand[brand] = (byBrand[brand] || 0) + 1;
});
return byBrand;
});
console.log('Products by brand:');
Object.entries(brandCounts)
.sort((a, b) => (b[1] as number) - (a[1] as number))
.forEach(([brand, count]) => {
console.log(` ${brand}: ${count}`);
});
const uniqueTotal = Object.values(brandCounts).reduce((sum: number, c) => sum + (c as number), 0);
console.log(`\nTotal unique products: ${uniqueTotal}`);
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,257 @@
/**
* Test Treez brand-based product extraction
* 1. Load /brands page
* 2. Click "load more brands" to get all brands
* 3. Extract brand URLs
* 4. Visit each brand and extract products
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('[AgeGate] Detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function loadAllBrands(page: Page): Promise<void> {
console.log('[Brands] Looking for "load more" option...');
// Look for select/dropdown with "load more" or "all brands" option
const selectInfo = await page.evaluate(() => {
const selects = document.querySelectorAll('select');
const info: { selector: string; options: string[] }[] = [];
selects.forEach((sel, i) => {
const options = Array.from(sel.options).map(o => o.text);
info.push({ selector: `select:nth-of-type(${i + 1})`, options });
});
return info;
});
console.log('[Brands] Found selects:', JSON.stringify(selectInfo, null, 2));
// Look for any button or link with "load more" or "show all"
const loadMoreButtons = await page.evaluate(() => {
const elements = document.querySelectorAll('button, a, [role="button"]');
const matches: { text: string; tag: string }[] = [];
elements.forEach(el => {
const text = el.textContent?.toLowerCase() || '';
if (text.includes('load more') || text.includes('show all') || text.includes('view all')) {
matches.push({ text: el.textContent?.trim() || '', tag: el.tagName });
}
});
return matches;
});
console.log('[Brands] Found load more buttons:', loadMoreButtons);
// Try to find and interact with the brands dropdown
// First, let's see all interactive elements with "brand" in them
const brandElements = await page.evaluate(() => {
const all = document.querySelectorAll('*');
const matches: { tag: string; class: string; text: string }[] = [];
all.forEach(el => {
const className = el.className?.toString?.() || '';
const text = el.textContent?.trim().slice(0, 100) || '';
if (className.toLowerCase().includes('brand') || className.toLowerCase().includes('select')) {
matches.push({
tag: el.tagName,
class: className.slice(0, 100),
text: text.slice(0, 50),
});
}
});
return matches.slice(0, 20);
});
console.log('[Brands] Brand-related elements:', JSON.stringify(brandElements.slice(0, 10), null, 2));
}
async function extractBrandLinks(page: Page): Promise<{ name: string; url: string }[]> {
const brands = await page.evaluate(() => {
const links: { name: string; url: string }[] = [];
// Look for brand cards/links
const selectors = [
'a[href*="/brand/"]',
'a[href*="/brands/"]',
'[class*="brand"] a',
'[class*="Brand"] a',
];
selectors.forEach(sel => {
document.querySelectorAll(sel).forEach(el => {
const href = el.getAttribute('href');
const name = el.textContent?.trim() || '';
if (href && name && !links.some(l => l.url === href)) {
links.push({ name, url: href });
}
});
});
return links;
});
return brands;
}
async function extractProductsFromBrandPage(page: Page): Promise<any[]> {
// Scroll to load all products
let previousHeight = 0;
let scrollCount = 0;
let sameHeightCount = 0;
while (scrollCount < 20) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
sameHeightCount++;
if (sameHeightCount >= 3) break;
} else {
sameHeightCount = 0;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1000);
previousHeight = currentHeight;
scrollCount++;
}
// Extract products
const products = await page.evaluate(() => {
const results: any[] = [];
const seen = new Set<string>();
document.querySelectorAll('[class*="product_product__"]').forEach(el => {
const nameEl = el.querySelector('[class*="product__name"], [class*="name__"]');
const name = nameEl?.textContent?.trim() || '';
if (!name || seen.has(name)) return;
seen.add(name);
const priceEl = el.querySelector('[class*="price"]');
const priceText = priceEl?.textContent || '';
const priceMatch = priceText.match(/\$(\d+(?:\.\d{2})?)/);
const price = priceMatch ? parseFloat(priceMatch[1]) : null;
const linkEl = el.querySelector('a[href*="/product/"]');
let productId = '';
if (linkEl) {
const href = linkEl.getAttribute('href') || '';
const match = href.match(/\/product\/([^\/?]+)/);
productId = match ? match[1] : '';
}
results.push({
productId: productId || `treez_${name.replace(/\s+/g, '_').toLowerCase().slice(0, 30)}`,
name,
price,
});
});
return results;
});
return products;
}
async function main() {
console.log('='.repeat(60));
console.log('Testing Treez Brand-Based Extraction');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Block images
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
try {
// Navigate to brands page
const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
console.log(`\n[1] Navigating to ${brandsUrl}`);
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(2000);
await bypassAgeGate(page);
await sleep(1000);
// Screenshot to see what we're working with
await page.screenshot({ path: '/tmp/treez-brands-page.png', fullPage: false });
console.log('[1] Screenshot saved to /tmp/treez-brands-page.png');
// Try to load all brands
console.log('\n[2] Exploring brand selection options...');
await loadAllBrands(page);
// Extract brand links
console.log('\n[3] Extracting brand links...');
const brandLinks = await extractBrandLinks(page);
console.log(`Found ${brandLinks.length} brand links:`);
brandLinks.slice(0, 10).forEach(b => console.log(` - ${b.name}: ${b.url}`));
// If we found brand links, visit a couple to test
if (brandLinks.length > 0) {
console.log('\n[4] Testing product extraction from first 3 brands...');
let totalProducts = 0;
const allProducts: any[] = [];
for (const brand of brandLinks.slice(0, 3)) {
const brandUrl = brand.url.startsWith('http')
? brand.url
: `https://${STORE_ID}.treez.io${brand.url}`;
console.log(`\n Visiting brand: ${brand.name}`);
console.log(` URL: ${brandUrl}`);
await page.goto(brandUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
const products = await extractProductsFromBrandPage(page);
console.log(` Products found: ${products.length}`);
allProducts.push(...products.map(p => ({ ...p, brand: brand.name })));
totalProducts += products.length;
}
console.log(`\n[5] Summary from 3 brands: ${totalProducts} products`);
console.log(`Estimated total (${brandLinks.length} brands): ~${Math.round(totalProducts / 3 * brandLinks.length)} products`);
}
} catch (error: any) {
console.error('Error:', error.message);
} finally {
await browser.close();
}
}
main().catch(console.error);

View File

@@ -0,0 +1,113 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Capture request headers for API calls
const apiRequests: any[] = [];
page.on('request', (req) => {
const url = req.url();
if (url.includes('treez.io') || url.includes('gapcommerce')) {
apiRequests.push({
url: url,
method: req.method(),
headers: req.headers(),
postData: req.postData(),
});
}
});
console.log('Loading page to capture API auth headers...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
console.log('=== API REQUESTS WITH HEADERS ===\n');
apiRequests.forEach((req, i) => {
console.log((i+1) + '. ' + req.method + ' ' + req.url.slice(0, 100));
console.log(' Headers:');
Object.entries(req.headers).forEach(([k, v]) => {
if (k.toLowerCase().includes('auth') ||
k.toLowerCase().includes('token') ||
k.toLowerCase().includes('key') ||
k.toLowerCase().includes('api') ||
k.toLowerCase() === 'authorization' ||
k.toLowerCase() === 'x-api-key') {
console.log(' >>> ' + k + ': ' + v);
}
});
// Show all headers for treez.io requests
if (req.url.includes('headless.treez.io')) {
console.log(' ALL HEADERS:');
Object.entries(req.headers).forEach(([k, v]) => {
console.log(' ' + k + ': ' + String(v).slice(0, 80));
});
}
console.log('');
});
// Also check for API keys in page scripts
console.log('=== CHECKING FOR API KEYS IN PAGE ===\n');
const pageData = await page.evaluate(() => {
const data: any = {};
// Check window object for API keys
const win = window as any;
if (win.__NEXT_DATA__) {
data.nextData = win.__NEXT_DATA__;
}
// Check for any global config
if (win.config || win.CONFIG) {
data.config = win.config || win.CONFIG;
}
// Look for treez-related globals
Object.keys(win).forEach(key => {
if (key.toLowerCase().includes('treez') ||
key.toLowerCase().includes('api') ||
key.toLowerCase().includes('config')) {
try {
data[key] = JSON.stringify(win[key]).slice(0, 500);
} catch {}
}
});
return data;
});
if (pageData.nextData?.props?.pageProps) {
console.log('Next.js pageProps keys: ' + Object.keys(pageData.nextData.props.pageProps).join(', '));
}
if (pageData.nextData?.runtimeConfig) {
console.log('Runtime config: ' + JSON.stringify(pageData.nextData.runtimeConfig).slice(0, 500));
}
await browser.close();
}
main();

View File

@@ -0,0 +1,100 @@
import puppeteer from 'puppeteer';
import fs from 'fs';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Capture ES API responses
let allProductData: any[] = [];
page.on('response', async (res) => {
const url = res.url();
if (url.includes('gapcommerceapi.com/product/search') && res.status() === 200) {
try {
const json = await res.json();
const products = json.hits?.hits?.map((h: any) => h._source) || [];
allProductData = allProductData.concat(products);
console.log('Captured ' + products.length + ' products (total: ' + allProductData.length + ')');
} catch {}
}
});
console.log('Loading /shop page to capture product data...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Click load more many times to get all products
console.log('\nClicking Load More to capture all products...');
for (let i = 0; i < 50; i++) {
const btn = await page.$('button.collection__load-more');
if (!btn) {
console.log('No more Load More button');
break;
}
const isVisible = await page.evaluate((b) => {
const rect = b.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
}, btn);
if (!isVisible) {
console.log('Load More not visible');
break;
}
await btn.click();
await sleep(1500);
console.log('Click ' + (i+1) + ': ' + allProductData.length + ' total products');
}
console.log('\n=== RESULTS ===\n');
console.log('Total products captured: ' + allProductData.length);
if (allProductData.length > 0) {
// Dedupe by some ID
const seen = new Set();
const unique = allProductData.filter(p => {
const id = p.id || p.productId || p.name;
if (seen.has(id)) return false;
seen.add(id);
return true;
});
console.log('Unique products: ' + unique.length);
console.log('\n=== PRODUCT FIELDS ===\n');
console.log(Object.keys(unique[0]).sort().join('\n'));
console.log('\n=== SAMPLE PRODUCT ===\n');
console.log(JSON.stringify(unique[0], null, 2));
// Save to file
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(unique, null, 2));
console.log('\nSaved to /tmp/treez-products.json');
}
await browser.close();
}
main();

View File

@@ -0,0 +1,88 @@
import puppeteer from 'puppeteer';
import fs from 'fs';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Capture ES API responses as text
let allProducts: any[] = [];
page.on('response', async (res) => {
const url = res.url();
if (url.includes('gapcommerceapi.com/product/search')) {
console.log('ES Response: status=' + res.status());
if (res.status() === 200) {
try {
const text = await res.text();
console.log('Response length: ' + text.length);
const json = JSON.parse(text);
const products = json.hits?.hits?.map((h: any) => h._source) || [];
allProducts = allProducts.concat(products);
console.log('Got ' + products.length + ' products (total: ' + allProducts.length + ')');
} catch (err: any) {
console.log('Parse error: ' + err.message);
}
}
}
});
console.log('Loading page...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(5000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Bypassing age gate...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(3000);
}
// Wait for initial products to load
await sleep(3000);
console.log('\nInitial products captured: ' + allProducts.length);
// Try scrolling to trigger more loads
console.log('\nScrolling...');
for (let i = 0; i < 20; i++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
// Also click load more if present
try {
await page.click('button.collection__load-more');
console.log('Clicked load more');
} catch {}
}
console.log('\n=== FINAL RESULTS ===\n');
console.log('Total products: ' + allProducts.length);
if (allProducts.length > 0) {
console.log('\nFields: ' + Object.keys(allProducts[0]).sort().join(', '));
console.log('\nSample:\n' + JSON.stringify(allProducts[0], null, 2));
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(allProducts, null, 2));
console.log('\nSaved to /tmp/treez-products.json');
}
await browser.close();
}
main();

View File

@@ -0,0 +1,192 @@
/**
* Navigate to each category page and count products
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function scrollToLoadAll(page: Page): Promise<void> {
let previousHeight = 0;
let scrollCount = 0;
let sameCount = 0;
while (scrollCount < 50) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
sameCount++;
if (sameCount >= 3) break;
} else {
sameCount = 0;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
previousHeight = currentHeight;
scrollCount++;
}
}
async function countProducts(page: Page): Promise<number> {
return page.evaluate(() => {
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
const img = a.querySelector('img');
const name = img?.getAttribute('alt') || a.querySelector('h5')?.textContent?.trim() || '';
if (name) seen.add(name);
});
return seen.size;
});
}
async function main() {
console.log('='.repeat(60));
console.log('Testing Treez Category Pages');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
// Categories from the nav menu
const categories = [
'cartridges',
'flower',
'pre-rolls',
'edibles',
'extracts',
'tinctures',
'capsules',
'topicals',
'accessories',
'drink',
];
const results: { category: string; products: number }[] = [];
let ageGateBypassed = false;
for (const category of categories) {
// Try different URL patterns
const urls = [
`https://${STORE_ID}.treez.io/onlinemenu/${category}?customerType=ADULT`,
`https://${STORE_ID}.treez.io/onlinemenu/category/${category}?customerType=ADULT`,
`https://${STORE_ID}.treez.io/${category}?customerType=ADULT`,
];
for (const url of urls) {
try {
console.log(`\nTrying: ${url}`);
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
if (!ageGateBypassed) {
await bypassAgeGate(page);
ageGateBypassed = true;
await sleep(1000);
}
const initialCount = await countProducts(page);
if (initialCount > 0) {
console.log(` Initial: ${initialCount} products`);
await scrollToLoadAll(page);
const finalCount = await countProducts(page);
console.log(` After scroll: ${finalCount} products`);
results.push({ category, products: finalCount });
break; // Found working URL, move to next category
} else {
console.log(` No products found`);
}
} catch (error: any) {
console.log(` Error: ${error.message}`);
}
}
}
// Also try the main shop page
console.log('\nTrying main shop page...');
try {
const shopUrl = `https://${STORE_ID}.treez.io/onlinemenu/shop?customerType=ADULT`;
await page.goto(shopUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
const initialCount = await countProducts(page);
console.log(`Shop page initial: ${initialCount} products`);
if (initialCount > 0) {
await scrollToLoadAll(page);
const finalCount = await countProducts(page);
console.log(`Shop page after scroll: ${finalCount} products`);
results.push({ category: 'shop', products: finalCount });
}
} catch (error: any) {
console.log(`Shop page error: ${error.message}`);
}
// Try to find and click on category links from the nav
console.log('\n[Alternative] Trying to find nav category links...');
const homeUrl = `https://${STORE_ID}.treez.io/onlinemenu/?customerType=ADULT`;
await page.goto(homeUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(1000);
const navLinks = await page.evaluate(() => {
const links: { text: string; href: string }[] = [];
document.querySelectorAll('nav a, [class*="nav"] a').forEach(a => {
const text = a.textContent?.trim() || '';
const href = a.getAttribute('href') || '';
if (href && text && !links.some(l => l.href === href)) {
links.push({ text, href });
}
});
return links;
});
console.log('Nav links found:');
navLinks.forEach(l => console.log(` - "${l.text}" → ${l.href}`));
// Summary
console.log('\n' + '='.repeat(60));
console.log('Summary');
console.log('='.repeat(60));
let total = 0;
results.forEach(r => {
console.log(`${r.category}: ${r.products} products`);
total += r.products;
});
console.log(`\nTotal across categories: ${total} products`);
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,178 @@
/**
* ============================================================
* TREEZ CLIENT TEST SCRIPT
* ============================================================
*
* Tests the Treez CDP interception client using Best Dispensary.
*
* This verifies:
* - Stealth plugin bypasses headless detection
* - CDP intercepts Elasticsearch API responses
* - Products are captured and normalized correctly
* - Inventory data is available
*
* Usage: npx ts-node scripts/test-treez-client.ts
*
* ============================================================
*/
import { fetchProductsFromUrl } from '../src/platforms/treez';
const TEST_URL = 'https://shop.bestdispensary.com/shop';
async function main() {
console.log('='.repeat(60));
console.log('TREEZ CLIENT TEST - CDP INTERCEPTION');
console.log('='.repeat(60));
console.log(`URL: ${TEST_URL}`);
console.log('Method: Puppeteer + Stealth + CDP response capture');
console.log('');
try {
console.log('[Starting] Launching browser with Stealth plugin...\n');
const result = await fetchProductsFromUrl(TEST_URL);
console.log('\n' + '='.repeat(60));
console.log('RESULTS');
console.log('='.repeat(60));
console.log(`Total products: ${result.totalCaptured}`);
console.log(`Store ID: ${result.storeId || 'N/A (custom domain)'}`);
console.log(`Source URL: ${result.sourceUrl}`);
console.log(`Fetched at: ${result.fetchedAt.toISOString()}`);
if (result.products.length === 0) {
console.log('\n[WARNING] No products captured!');
console.log('This could mean:');
console.log(' - Stealth plugin is not bypassing detection');
console.log(' - CDP is not intercepting the correct URLs');
console.log(' - Page structure has changed');
process.exit(1);
}
// Show sample raw product
console.log('\n' + '='.repeat(60));
console.log('SAMPLE RAW PRODUCT (from Elasticsearch)');
console.log('='.repeat(60));
const raw = result.products[0];
console.log(JSON.stringify({
id: raw.id,
name: raw.name,
menuTitle: raw.menuTitle,
brand: raw.brand,
category: raw.category,
subtype: raw.subtype,
status: raw.status,
availableUnits: raw.availableUnits,
customMinPrice: raw.customMinPrice,
customMaxPrice: raw.customMaxPrice,
isActive: raw.isActive,
isAboveThreshold: raw.isAboveThreshold,
}, null, 2));
// Show sample normalized product
console.log('\n' + '='.repeat(60));
console.log('SAMPLE NORMALIZED PRODUCT');
console.log('='.repeat(60));
const normalized = result.normalized[0];
console.log(JSON.stringify({
id: normalized.id,
name: normalized.name,
brand: normalized.brand,
category: normalized.category,
subtype: normalized.subtype,
price: normalized.price,
priceMin: normalized.priceMin,
priceMax: normalized.priceMax,
discountedPrice: normalized.discountedPrice,
discountPercent: normalized.discountPercent,
availableUnits: normalized.availableUnits,
inStock: normalized.inStock,
thcPercent: normalized.thcPercent,
cbdPercent: normalized.cbdPercent,
strainType: normalized.strainType,
effects: normalized.effects,
flavors: normalized.flavors,
imageUrl: normalized.imageUrl,
images: normalized.images?.slice(0, 2),
}, null, 2));
// Brand breakdown
console.log('\n' + '='.repeat(60));
console.log('BRANDS (top 15)');
console.log('='.repeat(60));
const brandCounts = new Map<string, number>();
for (const p of result.normalized) {
const brand = p.brand || 'Unknown';
brandCounts.set(brand, (brandCounts.get(brand) || 0) + 1);
}
const sorted = [...brandCounts.entries()].sort((a, b) => b[1] - a[1]);
console.log(`Total unique brands: ${sorted.length}\n`);
sorted.slice(0, 15).forEach(([brand, count]) => {
console.log(` ${brand}: ${count} products`);
});
// Category breakdown
console.log('\n' + '='.repeat(60));
console.log('CATEGORIES');
console.log('='.repeat(60));
const categoryCounts = new Map<string, number>();
for (const p of result.normalized) {
const cat = p.category || 'Unknown';
categoryCounts.set(cat, (categoryCounts.get(cat) || 0) + 1);
}
const catSorted = [...categoryCounts.entries()].sort((a, b) => b[1] - a[1]);
catSorted.forEach(([cat, count]) => {
console.log(` ${cat}: ${count} products`);
});
// Inventory stats
console.log('\n' + '='.repeat(60));
console.log('INVENTORY STATS');
console.log('='.repeat(60));
const inStock = result.normalized.filter(p => p.inStock).length;
const outOfStock = result.normalized.filter(p => !p.inStock).length;
const hasInventoryData = result.normalized.filter(p => p.availableUnits > 0).length;
console.log(`In stock: ${inStock}`);
console.log(`Out of stock: ${outOfStock}`);
console.log(`With inventory levels: ${hasInventoryData}`);
// Show inventory examples
if (hasInventoryData > 0) {
console.log('\nSample inventory levels:');
result.normalized
.filter(p => p.availableUnits > 0)
.slice(0, 5)
.forEach(p => {
console.log(` ${p.name}: ${p.availableUnits} units`);
});
}
// Check for THC/CBD data
const hasThc = result.normalized.filter(p => p.thcPercent !== null).length;
const hasCbd = result.normalized.filter(p => p.cbdPercent !== null).length;
console.log(`\nWith THC data: ${hasThc} (${Math.round(hasThc / result.totalCaptured * 100)}%)`);
console.log(`With CBD data: ${hasCbd} (${Math.round(hasCbd / result.totalCaptured * 100)}%)`);
// Check for images
const hasImages = result.normalized.filter(p => p.imageUrl).length;
console.log(`With images: ${hasImages} (${Math.round(hasImages / result.totalCaptured * 100)}%)`);
console.log('\n' + '='.repeat(60));
console.log('TEST PASSED');
console.log('='.repeat(60));
} catch (error: any) {
console.error('\n' + '='.repeat(60));
console.error('TEST FAILED');
console.error('='.repeat(60));
console.error(`Error: ${error.message}`);
console.error(error.stack);
process.exit(1);
}
}
main().catch(console.error);

View File

@@ -0,0 +1,160 @@
/**
* Find the correct product card container selector
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function main() {
console.log('Finding Treez product card containers...\n');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
// Find product card containers by looking for elements that contain both name AND price
const analysis = await page.evaluate(() => {
// Strategy: find all H5 elements (which contain names), then get their parent containers
const nameElements = document.querySelectorAll('h5.product_product__name__JcEk0');
const containers: Map<string, { count: number; sample: string }> = new Map();
nameElements.forEach(nameEl => {
// Walk up to find the product card container
let current = nameEl.parentElement;
let depth = 0;
while (current && depth < 10) {
const className = current.className?.toString?.() || '';
// Look for ProductCard in the class name
if (className.includes('ProductCard')) {
const key = className.slice(0, 100);
const existing = containers.get(key) || { count: 0, sample: '' };
existing.count++;
if (!existing.sample) {
existing.sample = current.outerHTML.slice(0, 300);
}
containers.set(key, existing);
break;
}
current = current.parentElement;
depth++;
}
});
return Array.from(containers.entries()).map(([cls, data]) => ({
class: cls,
count: data.count,
sample: data.sample,
}));
});
console.log('Product card containers found:');
analysis.forEach(({ class: cls, count, sample }) => {
console.log(`\n[${count}x] ${cls}`);
console.log(`Sample: ${sample.slice(0, 200)}...`);
});
// Now test various container selectors
console.log('\n\n--- Testing container selectors ---');
const selectorTests = await page.evaluate(() => {
const tests: Record<string, { total: number; withName: number; withPrice: number }> = {};
const selectors = [
'[class*="ProductCardWithBtn"]',
'[class*="ProductCard_product"]',
'[class*="ProductCard__"]',
'article[class*="product"]',
'div[class*="ProductCard"]',
'a[class*="ProductCard"]',
'[class*="product_product__"][class*="link"]',
'article',
];
selectors.forEach(sel => {
const elements = document.querySelectorAll(sel);
let withName = 0;
let withPrice = 0;
elements.forEach(el => {
if (el.querySelector('h5, [class*="product__name"]')) withName++;
if (el.querySelector('[class*="price"]')) withPrice++;
});
tests[sel] = { total: elements.length, withName, withPrice };
});
return tests;
});
Object.entries(selectorTests).forEach(([sel, { total, withName, withPrice }]) => {
console.log(`${sel}: ${total} total, ${withName} with name, ${withPrice} with price`);
});
// Get the actual product card class pattern
console.log('\n\n--- Finding exact product card class ---');
const exactClasses = await page.evaluate(() => {
// Find elements that have both h5 name AND price child
const allElements = document.querySelectorAll('*');
const matches: { tag: string; class: string }[] = [];
allElements.forEach(el => {
const hasName = el.querySelector('h5.product_product__name__JcEk0');
const hasPrice = el.querySelector('[class*="price__ins"], [class*="price__"]');
if (hasName && hasPrice) {
const className = el.className?.toString?.() || '';
if (className && !matches.some(m => m.class === className)) {
matches.push({ tag: el.tagName, class: className.slice(0, 150) });
}
}
});
return matches;
});
console.log('Elements containing both name and price:');
exactClasses.forEach(({ tag, class: cls }) => {
console.log(` [${tag}] ${cls}`);
});
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,559 @@
/**
* Treez Platform Smoke Test
*
* Discovers DOM structure and extracts products from Treez menu pages.
* Used to determine actual CSS selectors for the platform client.
*
* Usage: npx ts-node scripts/test-treez-discovery.ts
*/
import puppeteer, { Page } from 'puppeteer';
import puppeteerExtra from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
// Register stealth plugin (even though Treez doesn't use Cloudflare, good practice)
puppeteerExtra.use(StealthPlugin());
const TEST_URL = 'https://best.treez.io/onlinemenu/?customerType=ADULT';
const STORE_ID = 'best';
interface TreezProductRaw {
productId: string;
name: string;
brand: string;
category: string;
subcategory: string;
thcPercent: number | null;
cbdPercent: number | null;
price: number | null;
priceUnit: string;
imageUrl: string | null;
inStock: boolean;
weight: string | null;
}
async function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Scroll to load all products (infinite scroll)
*/
async function scrollToLoadAll(page: Page, maxScrolls = 30): Promise<number> {
let previousHeight = 0;
let scrollCount = 0;
let sameHeightCount = 0;
console.log('[Scroll] Starting infinite scroll...');
while (scrollCount < maxScrolls) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
sameHeightCount++;
if (sameHeightCount >= 3) {
console.log('[Scroll] No new content after 3 attempts, stopping');
break;
}
} else {
sameHeightCount = 0;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500); // Wait for products to load
previousHeight = currentHeight;
scrollCount++;
// Check how many products we have
const productCount = await page.evaluate(() => {
// Try multiple possible selectors
const selectors = [
'[class*="product"]',
'[class*="Product"]',
'[data-product]',
'.menu-item',
'[class*="card"]',
'[class*="Card"]',
];
for (const sel of selectors) {
const els = document.querySelectorAll(sel);
if (els.length > 10) return els.length;
}
return 0;
});
console.log(`[Scroll] Scroll ${scrollCount}: height=${currentHeight}, products~${productCount}`);
}
return scrollCount;
}
/**
* Analyze DOM structure to find product selectors
*/
async function analyzeDOM(page: Page): Promise<void> {
console.log('\n' + '='.repeat(60));
console.log('DOM STRUCTURE ANALYSIS');
console.log('='.repeat(60));
// Find elements with "product" in class name
const productClasses = await page.evaluate(() => {
const classes = new Set<string>();
document.querySelectorAll('*').forEach((el) => {
const className = el.className;
if (typeof className === 'string' && className.toLowerCase().includes('product')) {
className.split(' ').forEach((c) => {
if (c.toLowerCase().includes('product')) {
classes.add(c);
}
});
}
});
return Array.from(classes).slice(0, 20);
});
console.log('\n[Classes containing "product"]:');
productClasses.forEach((c: string) => console.log(` .${c}`));
// Find elements with "card" in class name
const cardClasses = await page.evaluate(() => {
const classes = new Set<string>();
document.querySelectorAll('*').forEach((el) => {
const className = el.className;
if (typeof className === 'string' && className.toLowerCase().includes('card')) {
className.split(' ').forEach((c) => {
if (c.toLowerCase().includes('card')) {
classes.add(c);
}
});
}
});
return Array.from(classes).slice(0, 20);
});
console.log('\n[Classes containing "card"]:');
cardClasses.forEach((c: string) => console.log(` .${c}`));
// Find data attributes
const dataAttrs = await page.evaluate(() => {
const attrs = new Set<string>();
document.querySelectorAll('*').forEach((el) => {
Array.from(el.attributes).forEach((attr) => {
if (attr.name.startsWith('data-') && !attr.name.includes('reactid')) {
attrs.add(attr.name);
}
});
});
return Array.from(attrs).slice(0, 30);
});
console.log('\n[Data attributes found]:');
dataAttrs.forEach((attr: string) => console.log(` ${attr}`));
// Get sample HTML of potential product container
const sampleHTML = await page.evaluate(() => {
// Try to find a product container
const selectors = [
'[class*="ProductCard"]',
'[class*="product-card"]',
'[class*="menuItem"]',
'[class*="menu-item"]',
'[data-testid*="product"]',
];
for (const sel of selectors) {
const el = document.querySelector(sel);
if (el) {
return {
selector: sel,
html: el.outerHTML.slice(0, 2000),
childCount: el.children.length,
};
}
}
// Fallback: find repeating structures
const containers = document.querySelectorAll('div[class]');
const classCounts = new Map<string, number>();
containers.forEach((el) => {
if (el.children.length > 2 && el.className) {
classCounts.set(el.className, (classCounts.get(el.className) || 0) + 1);
}
});
// Find class that appears many times (likely product cards)
let bestClass = '';
let bestCount = 0;
classCounts.forEach((count, className) => {
if (count > bestCount && count > 5) {
bestCount = count;
bestClass = className;
}
});
if (bestClass) {
const el = document.querySelector(`.${bestClass.split(' ')[0]}`);
if (el) {
return {
selector: `.${bestClass.split(' ')[0]}`,
html: el.outerHTML.slice(0, 2000),
childCount: el.children.length,
count: bestCount,
};
}
}
return null;
});
if (sampleHTML) {
console.log('\n[Sample Product Container]:');
console.log(` Selector: ${sampleHTML.selector}`);
console.log(` Children: ${sampleHTML.childCount}`);
if ((sampleHTML as any).count) {
console.log(` Occurrences: ${(sampleHTML as any).count}`);
}
console.log('\n[Sample HTML (first 1000 chars)]:');
console.log(sampleHTML.html.slice(0, 1000));
}
}
/**
* Extract products using discovered selectors
* Based on DOM analysis of Treez/GapCommerce React app
*/
async function extractProducts(page: Page): Promise<TreezProductRaw[]> {
console.log('\n' + '='.repeat(60));
console.log('PRODUCT EXTRACTION');
console.log('='.repeat(60));
const products = await page.evaluate(() => {
const results: any[] = [];
// Treez uses classes like: product_product__ERWtJ
// Find all product cards using the discovered class patterns
const productSelectors = [
'[class*="product_product__"]', // Main product container
'[class*="ProductCard"]', // Alternative pattern
];
let productElements: Element[] = [];
for (const selector of productSelectors) {
const elements = document.querySelectorAll(selector);
// Filter to only get the actual product cards, not child elements
const filtered = Array.from(elements).filter(el => {
// Must have a name element and price
const hasName = el.querySelector('[class*="product__name"]') || el.querySelector('[class*="name__"]');
const hasPrice = el.querySelector('[class*="price"]');
return hasName || hasPrice;
});
if (filtered.length > 0) {
productElements = filtered;
console.log(`Found ${filtered.length} products with selector: ${selector}`);
break;
}
}
// Dedupe - some cards may be captured multiple times
const seen = new Set<string>();
// Extract data from each product element
for (const el of productElements) {
try {
// Get product name - look for name class
const nameEl = el.querySelector('[class*="product__name"], [class*="name__"]');
const name = nameEl?.textContent?.trim() || '';
if (!name || seen.has(name)) continue;
seen.add(name);
// Get product ID from link
const linkEl = el.querySelector('a[href*="/product/"]');
let productId = '';
if (linkEl) {
const href = linkEl.getAttribute('href') || '';
const match = href.match(/\/product\/([^\/\?]+)/);
productId = match ? match[1] : '';
}
if (!productId) {
productId = `treez_${name.replace(/\s+/g, '_').toLowerCase().slice(0, 30)}`;
}
// Get brand from the info section or product name parsing
const brandEl = el.querySelector('[class*="brand"], [class*="Brand"]');
let brand = brandEl?.textContent?.trim() || '';
// Get price - look for price class with $ symbol
const priceEl = el.querySelector('[class*="price__ins"], [class*="price"]');
const priceText = priceEl?.textContent || '';
const priceMatch = priceText.match(/\$(\d+(?:\.\d{2})?)/);
const price = priceMatch ? parseFloat(priceMatch[1]) : null;
// Get image URL
const imgEl = el.querySelector('img');
let imageUrl = imgEl?.getAttribute('src') || null;
// Handle Next.js image optimization URLs
if (imageUrl && imageUrl.includes('/_next/image')) {
const urlMatch = imageUrl.match(/url=([^&]+)/);
if (urlMatch) {
imageUrl = decodeURIComponent(urlMatch[1]);
}
}
// Get text content for THC/CBD extraction
const text = el.textContent || '';
// Get THC/CBD - look for patterns like "THC 25.5%" or "25.5% THC"
const thcMatch = text.match(/(?:THC[:\s]*)?(\d+(?:\.\d+)?)\s*%?\s*THC/i) ||
text.match(/THC[:\s]*(\d+(?:\.\d+)?)\s*%?/i);
const cbdMatch = text.match(/(?:CBD[:\s]*)?(\d+(?:\.\d+)?)\s*%?\s*CBD/i) ||
text.match(/CBD[:\s]*(\d+(?:\.\d+)?)\s*%?/i);
const thcPercent = thcMatch ? parseFloat(thcMatch[1]) : null;
const cbdPercent = cbdMatch ? parseFloat(cbdMatch[1]) : null;
// Get weight from name or text (e.g., "3.5G", "1G")
const weightMatch = name.match(/(\d+(?:\.\d+)?)\s*(G|g|MG|mg|OZ|oz)/i) ||
text.match(/(\d+(?:\.\d+)?)\s*(G|g|MG|mg|OZ|oz)/i);
const weight = weightMatch ? `${weightMatch[1]}${weightMatch[2].toLowerCase()}` : null;
// Price unit from weight
let priceUnit = '';
if (weight) {
priceUnit = weight;
}
// Get category/strain type
const strainTypes = ['indica', 'sativa', 'hybrid'];
let subcategory = '';
const textLower = text.toLowerCase();
for (const strain of strainTypes) {
if (textLower.includes(strain)) {
subcategory = strain;
break;
}
}
// Determine category from various signals
let category = '';
const categoryPatterns = [
{ pattern: /flower|bud/i, category: 'flower' },
{ pattern: /vape|cart|pen/i, category: 'vape' },
{ pattern: /edible|gummy|chocolate/i, category: 'edible' },
{ pattern: /concentrate|dab|wax|shatter/i, category: 'concentrate' },
{ pattern: /pre.?roll|joint/i, category: 'pre-roll' },
{ pattern: /topical|balm|cream/i, category: 'topical' },
{ pattern: /tincture/i, category: 'tincture' },
];
for (const { pattern, category: cat } of categoryPatterns) {
if (pattern.test(text)) {
category = cat;
break;
}
}
// Check stock status
const inStock = !textLower.includes('out of stock') && !textLower.includes('sold out');
results.push({
productId,
name,
brand,
category,
subcategory,
thcPercent,
cbdPercent,
price,
priceUnit,
imageUrl,
inStock,
weight,
});
} catch (err) {
console.log('Error extracting product:', err);
}
}
return results;
});
return products;
}
/**
* Bypass age gate if present
*/
async function bypassAgeGate(page: Page): Promise<boolean> {
console.log('[Age Gate] Checking for age gate...');
try {
// Wait for either age gate or main content
const ageGate = await page.$('[data-testid="age-gate-modal"], [class*="AgeGate"]');
if (ageGate) {
console.log('[Age Gate] Age gate detected, clicking confirm button...');
// Click the submit button
const submitBtn = await page.$('[data-testid="age-gate-submit-button"], button[type="submit"]');
if (submitBtn) {
await submitBtn.click();
console.log('[Age Gate] Clicked confirm button');
// Wait for age gate to disappear and menu to load
await sleep(2000);
// Wait for navigation or content change
await page.waitForFunction(
() => !document.querySelector('[data-testid="age-gate-modal"]'),
{ timeout: 10000 }
).catch(() => {
console.log('[Age Gate] Gate may still be visible, continuing anyway');
});
console.log('[Age Gate] Age gate bypassed');
return true;
} else {
console.log('[Age Gate] No submit button found');
}
} else {
console.log('[Age Gate] No age gate detected');
}
return false;
} catch (err: any) {
console.log(`[Age Gate] Error: ${err.message}`);
return false;
}
}
async function main() {
console.log('='.repeat(60));
console.log('TREEZ PLATFORM SMOKE TEST');
console.log('='.repeat(60));
console.log(`Store ID: ${STORE_ID}`);
console.log(`URL: ${TEST_URL}`);
console.log('');
const browser = await puppeteerExtra.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
],
});
try {
const page = await browser.newPage();
// Set viewport
await page.setViewport({ width: 1920, height: 1080 });
// Set user agent
await page.setUserAgent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
);
console.log('[Navigation] Going to Treez menu page...');
await page.goto(TEST_URL, {
waitUntil: 'networkidle2',
timeout: 60000,
});
console.log('[Navigation] Page loaded, waiting for React app...');
await sleep(2000);
// Bypass age gate
await bypassAgeGate(page);
// Wait for menu content to load
console.log('[Navigation] Waiting for menu content...');
await sleep(3000);
// Check if page loaded correctly
const pageTitle = await page.title();
console.log(`[Navigation] Page title: ${pageTitle}`);
// Take a screenshot for debugging
await page.screenshot({ path: '/tmp/treez-smoke-test.png', fullPage: false });
console.log('[Debug] Screenshot saved to /tmp/treez-smoke-test.png');
// Analyze DOM structure
await analyzeDOM(page);
// Scroll to load all products
await scrollToLoadAll(page);
// Extract products
const products = await extractProducts(page);
console.log('\n' + '='.repeat(60));
console.log('RESULTS');
console.log('='.repeat(60));
console.log(`Total products extracted: ${products.length}`);
if (products.length > 0) {
// Show statistics
const withPrice = products.filter((p) => p.price !== null).length;
const withThc = products.filter((p) => p.thcPercent !== null).length;
const withBrand = products.filter((p) => p.brand).length;
const withImage = products.filter((p) => p.imageUrl).length;
console.log(`\n[Data Quality]`);
console.log(` With price: ${withPrice}/${products.length} (${Math.round((withPrice / products.length) * 100)}%)`);
console.log(` With THC%: ${withThc}/${products.length} (${Math.round((withThc / products.length) * 100)}%)`);
console.log(` With brand: ${withBrand}/${products.length} (${Math.round((withBrand / products.length) * 100)}%)`);
console.log(` With image: ${withImage}/${products.length} (${Math.round((withImage / products.length) * 100)}%)`);
// Show sample products
console.log('\n[Sample Products (first 10)]:');
for (const p of products.slice(0, 10)) {
console.log(`\n ${p.name}`);
console.log(` ID: ${p.productId}`);
console.log(` Brand: ${p.brand || 'N/A'}`);
console.log(` Category: ${p.category || 'N/A'} / ${p.subcategory || 'N/A'}`);
console.log(` THC: ${p.thcPercent !== null ? p.thcPercent + '%' : 'N/A'}`);
console.log(` CBD: ${p.cbdPercent !== null ? p.cbdPercent + '%' : 'N/A'}`);
console.log(` Price: ${p.price !== null ? '$' + p.price : 'N/A'} ${p.priceUnit}`);
console.log(` Weight: ${p.weight || 'N/A'}`);
console.log(` Image: ${p.imageUrl?.slice(0, 60) || 'N/A'}...`);
console.log(` In Stock: ${p.inStock}`);
}
// Save full results to file
const fs = await import('fs');
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(products, null, 2));
console.log('\n[Debug] Full product list saved to /tmp/treez-products.json');
} else {
console.log('\n[WARNING] No products extracted!');
console.log('Check /tmp/treez-smoke-test.png for page state');
// Dump page HTML for debugging
const html = await page.content();
const fs = await import('fs');
fs.writeFileSync('/tmp/treez-page.html', html);
console.log('[Debug] Page HTML saved to /tmp/treez-page.html');
}
console.log('\n' + '='.repeat(60));
console.log(products.length > 0 ? 'SMOKE TEST PASSED' : 'SMOKE TEST NEEDS ADJUSTMENT');
console.log('='.repeat(60));
} catch (error: any) {
console.error('\n' + '='.repeat(60));
console.error('SMOKE TEST FAILED');
console.error('='.repeat(60));
console.error(`Error: ${error.message}`);
console.error(error.stack);
process.exit(1);
} finally {
await browser.close();
}
}
main().catch(console.error);

View File

@@ -0,0 +1,191 @@
/**
* Find actual brand elements on /brands page
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log(' Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function main() {
console.log('='.repeat(60));
console.log('Finding Brand Elements on /brands page');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
console.log(`\nNavigating to ${url}`);
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
// Check current URL
const currentUrl = page.url();
console.log(`\nCurrent URL: ${currentUrl}`);
// Look for ANY links on the page that might be brand links
console.log('\n[1] Looking for all anchor links with "brand" in href or class...');
const brandLinks = await page.evaluate(() => {
const links: { href: string; text: string }[] = [];
document.querySelectorAll('a').forEach((a: Element) => {
const href = a.getAttribute('href') || '';
const text = a.textContent?.trim().slice(0, 50) || '';
const className = a.className || '';
if (href.includes('brand') || href.includes('Brand') ||
className.includes('brand') || className.includes('Brand')) {
links.push({ href, text });
}
});
return links;
});
console.log(`Found ${brandLinks.length} brand-related links:`);
brandLinks.slice(0, 30).forEach(l => console.log(` "${l.text}" → ${l.href}`));
// Look for the navigation/dropdown
console.log('\n[2] Looking at navigation structure...');
const navItems = await page.evaluate(() => {
const items: string[] = [];
document.querySelectorAll('nav a, [class*="nav"] a, header a').forEach((a: Element) => {
const text = a.textContent?.trim();
const href = a.getAttribute('href') || '';
if (text && text.length < 30) {
items.push(`${text} (${href})`);
}
});
return [...new Set(items)];
});
console.log('Navigation items:');
navItems.forEach(item => console.log(` - ${item}`));
// Look for grid containers that might hold brand cards
console.log('\n[3] Looking for brand card containers...');
const containers = await page.evaluate(() => {
const results: { selector: string; count: number; sample: string }[] = [];
// Try various selectors for brand cards
const selectors = [
'[class*="brand_brand"]',
'[class*="brands_brand"]',
'[class*="brand-card"]',
'[class*="brandCard"]',
'[class*="BrandCard"]',
'a[href*="/brand/"]',
'[data-testid*="brand"]',
];
selectors.forEach(sel => {
const els = document.querySelectorAll(sel);
if (els.length > 0) {
const first = els[0];
results.push({
selector: sel,
count: els.length,
sample: first.textContent?.trim().slice(0, 50) || '',
});
}
});
return results;
});
console.log('Brand containers found:');
containers.forEach(c => console.log(` ${c.selector}: ${c.count} elements, sample: "${c.sample}"`));
// Get ALL unique hrefs that contain /brand/
console.log('\n[4] All links containing "/brand/" in href...');
const brandHrefs = await page.evaluate(() => {
const hrefs: string[] = [];
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
const href = a.getAttribute('href');
if (href && !hrefs.includes(href)) {
hrefs.push(href);
}
});
return hrefs;
});
console.log(`Found ${brandHrefs.length} unique brand hrefs:`);
brandHrefs.forEach(href => console.log(` ${href}`));
// Take screenshot
await page.screenshot({ path: '/tmp/treez-brands-page.png', fullPage: false });
console.log('\n[5] Screenshot saved to /tmp/treez-brands-page.png');
// Scroll and see if more brands load
console.log('\n[6] Scrolling to load more brands...');
for (let i = 0; i < 10; i++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
const brandCount = await page.evaluate(() =>
document.querySelectorAll('a[href*="/brand/"]').length
);
const productCount = await page.evaluate(() =>
document.querySelectorAll('a[href*="/product/"]').length
);
console.log(` Scroll ${i + 1}: brand links=${brandCount}, product links=${productCount}`);
}
// Final brand href list
const finalBrandHrefs = await page.evaluate(() => {
const hrefs: string[] = [];
document.querySelectorAll('a[href*="/brand/"]').forEach((a: Element) => {
const href = a.getAttribute('href');
if (href && !hrefs.includes(href)) hrefs.push(href);
});
return hrefs;
});
console.log(`\n[7] Final brand href list (${finalBrandHrefs.length} brands):`);
finalBrandHrefs.forEach(href => console.log(` ${href}`));
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,221 @@
/**
* Full crawl: Visit each brand page and aggregate all products
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function scrollToLoadAll(page: Page): Promise<void> {
let previousHeight = 0;
let sameCount = 0;
for (let i = 0; i < 30; i++) {
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
sameCount++;
if (sameCount >= 3) break;
} else {
sameCount = 0;
}
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1000);
previousHeight = currentHeight;
}
}
async function extractProducts(page: Page): Promise<{ name: string; brand: string; price: string; href: string }[]> {
return page.evaluate(() => {
const products: { name: string; brand: string; price: string; href: string }[] = [];
const seen = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
const href = a.getAttribute('href') || '';
const img = a.querySelector('img');
const h5 = a.querySelector('h5');
const name = img?.getAttribute('alt') || h5?.textContent?.trim() || '';
if (!name || seen.has(href)) return;
seen.add(href);
// Extract brand from href pattern: /product/{brand}-{product}
const brandMatch = href.match(/\/product\/([^\/]+)/);
const productSlug = brandMatch ? brandMatch[1] : '';
const priceEl = a.querySelector('[class*="price"]');
const priceMatch = priceEl?.textContent?.match(/\$(\d+(?:\.\d{2})?)/);
const price = priceMatch ? priceMatch[1] : '';
products.push({ name, brand: productSlug.split('-')[0] || '', price, href });
});
return products;
});
}
async function main() {
console.log('='.repeat(60));
console.log('Full Treez Crawl - All Brands');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
// Step 1: Go to brands page and extract all brand links
const brandsUrl = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
console.log(`\n[1] Getting brand list...`);
await page.goto(brandsUrl, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
// Get all brand links from the page
const brandLinks = await page.evaluate(() => {
const links: string[] = [];
const seen = new Set<string>();
// Get all /brand/ links
document.querySelectorAll('a[href*="/brand/"]').forEach(a => {
const href = a.getAttribute('href') || '';
if (href && !seen.has(href)) {
seen.add(href);
links.push(href);
}
});
return links;
});
console.log(`Found ${brandLinks.length} brand links: ${brandLinks.join(', ')}`);
// Step 2: Also extract unique brands from product URLs
const productBrands = await page.evaluate(() => {
const brands = new Set<string>();
document.querySelectorAll('a[href*="/product/"]').forEach(a => {
const href = a.getAttribute('href') || '';
// Pattern: /product/{brand}-{product}-...
// Extract first part before first hyphen that looks like brand
const match = href.match(/\/product\/([a-z0-9]+(?:-[a-z0-9]+)?)-/i);
if (match) {
brands.add(match[1].toLowerCase());
}
});
return Array.from(brands);
});
console.log(`Found ${productBrands.length} brands from product URLs`);
// Step 3: Build full brand URL list
const allBrandUrls = new Set<string>();
// Add direct brand links
brandLinks.forEach(link => {
if (link.startsWith('/')) {
allBrandUrls.add(`https://${STORE_ID}.treez.io${link}`);
} else {
allBrandUrls.add(link);
}
});
// Add brand URLs from product slugs
productBrands.forEach(brand => {
allBrandUrls.add(`https://${STORE_ID}.treez.io/brand/${encodeURIComponent(brand)}`);
});
console.log(`Total brand URLs to visit: ${allBrandUrls.size}`);
// Step 4: Visit each brand page and collect products
const allProducts = new Map<string, { name: string; brand: string; price: string; href: string }>();
let visitedBrands = 0;
for (const brandUrl of allBrandUrls) {
try {
const fullUrl = brandUrl.includes('customerType') ? brandUrl : `${brandUrl}?customerType=ADULT`;
console.log(`\n[${++visitedBrands}/${allBrandUrls.size}] Visiting: ${fullUrl}`);
await page.goto(fullUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(1500);
// Scroll to load all
await scrollToLoadAll(page);
const products = await extractProducts(page);
console.log(` Found ${products.length} products`);
products.forEach(p => {
if (!allProducts.has(p.href)) {
allProducts.set(p.href, p);
}
});
console.log(` Total unique so far: ${allProducts.size}`);
} catch (error: any) {
console.log(` Error: ${error.message.slice(0, 50)}`);
}
// Small delay between requests
await sleep(500);
}
// Summary
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
console.log(`Brands visited: ${visitedBrands}`);
console.log(`Total unique products: ${allProducts.size}`);
// Count by brand
const brandCounts: Record<string, number> = {};
allProducts.forEach(p => {
brandCounts[p.brand] = (brandCounts[p.brand] || 0) + 1;
});
console.log('\nProducts by brand:');
Object.entries(brandCounts)
.sort((a, b) => b[1] - a[1])
.slice(0, 20)
.forEach(([brand, count]) => {
console.log(` ${brand}: ${count}`);
});
// Sample products
console.log('\nSample products:');
Array.from(allProducts.values()).slice(0, 10).forEach(p => {
console.log(` - ${p.name} | ${p.brand} | $${p.price || 'N/A'}`);
});
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,57 @@
import axios from 'axios';
async function main() {
const storeId = 'best';
const baseUrl = `https://headless.treez.io/v2.0/dispensary/${storeId}`;
// Try various endpoints
const endpoints = [
'/ecommerce/discounts?excludeInactive=true&hideUnset=true&includeProdInfo=true',
'/ecommerce/products',
'/products',
'/menu',
'/inventory',
'/catalog',
];
console.log('Testing Treez Headless API endpoints...\n');
for (const endpoint of endpoints) {
const url = baseUrl + endpoint;
console.log('GET ' + url);
try {
const response = await axios.get(url, {
headers: {
'Content-Type': 'application/json',
'Accept': 'application/json',
},
timeout: 10000,
});
console.log(' Status: ' + response.status);
const data = response.data;
if (Array.isArray(data)) {
console.log(' Array length: ' + data.length);
if (data.length > 0) {
console.log(' First item keys: ' + Object.keys(data[0]).join(', '));
console.log(' Sample: ' + JSON.stringify(data[0]).slice(0, 300));
}
} else if (typeof data === 'object') {
console.log(' Keys: ' + Object.keys(data).join(', '));
console.log(' Sample: ' + JSON.stringify(data).slice(0, 500));
}
console.log('');
} catch (err: any) {
console.log(' Error: ' + (err.response?.status || err.message));
if (err.response?.data) {
console.log(' Data: ' + JSON.stringify(err.response.data).slice(0, 200));
}
console.log('');
}
}
}
main();

View File

@@ -0,0 +1,166 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Go to a product detail page
await page.goto('https://shop.bestdispensary.com/brand/dime', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Get first product URL
const productUrl = await page.evaluate(() => {
const a = document.querySelector('a[href*="/product/"]');
return a ? 'https://shop.bestdispensary.com' + a.getAttribute('href') : null;
});
if (!productUrl) {
console.log('No product found');
await browser.close();
return;
}
console.log('Checking product: ' + productUrl + '\n');
await page.goto(productUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
// Look for inventory/stock info
const inventoryData = await page.evaluate(() => {
const data: any = {};
// Check for stock/inventory elements
const stockSelectors = [
'[class*="stock"]',
'[class*="Stock"]',
'[class*="inventory"]',
'[class*="Inventory"]',
'[class*="quantity"]',
'[class*="Quantity"]',
'[class*="available"]',
'[class*="Available"]',
'[class*="in-stock"]',
'[class*="out-of-stock"]',
'[data-stock]',
'[data-quantity]',
'[data-inventory]',
];
data.stockElements = [];
stockSelectors.forEach(sel => {
document.querySelectorAll(sel).forEach(el => {
data.stockElements.push({
selector: sel,
text: el.textContent?.trim().slice(0, 100),
dataAttrs: Object.keys((el as HTMLElement).dataset || {}),
});
});
});
// Check for "Add to cart" button state (disabled = out of stock)
const addToCartBtn = document.querySelector('button[class*="add"], button[class*="cart"]');
data.addToCartBtn = {
found: !!addToCartBtn,
disabled: (addToCartBtn as HTMLButtonElement)?.disabled,
text: addToCartBtn?.textContent?.trim(),
};
// Check page source for inventory keywords
const bodyText = document.body.innerText;
data.hasStockText = bodyText.includes('stock') || bodyText.includes('Stock');
data.hasInventoryText = bodyText.includes('inventory') || bodyText.includes('Inventory');
data.hasQuantityText = bodyText.includes('quantity') || bodyText.includes('Quantity');
data.hasAvailableText = bodyText.includes('available') || bodyText.includes('Available');
// Get all data attributes on the page
data.allDataAttrs = [];
document.querySelectorAll('[data-product-id], [data-sku], [data-variant]').forEach(el => {
const attrs: any = {};
Object.entries((el as HTMLElement).dataset).forEach(([k, v]) => {
attrs[k] = v;
});
if (Object.keys(attrs).length > 0) {
data.allDataAttrs.push(attrs);
}
});
// Check for JSON-LD or schema data
const scripts = document.querySelectorAll('script[type="application/ld+json"]');
data.jsonLd = [];
scripts.forEach(s => {
try {
const json = JSON.parse(s.textContent || '');
data.jsonLd.push(json);
} catch {}
});
// Check Next.js data
const nextData = document.getElementById('__NEXT_DATA__');
if (nextData) {
try {
const json = JSON.parse(nextData.textContent || '');
data.hasNextData = true;
data.nextDataKeys = Object.keys(json);
// Look for product data in props
if (json.props?.pageProps?.product) {
data.productFromNext = json.props.pageProps.product;
}
if (json.props?.pageProps) {
data.pagePropsKeys = Object.keys(json.props.pageProps);
}
} catch {}
}
return data;
});
console.log('Inventory Analysis:\n');
console.log('Stock elements found: ' + inventoryData.stockElements.length);
inventoryData.stockElements.forEach((s: any) => {
console.log(' - ' + s.selector + ': "' + s.text + '"');
});
console.log('\nAdd to Cart button: ' + JSON.stringify(inventoryData.addToCartBtn));
console.log('\nText checks:');
console.log(' Has "stock": ' + inventoryData.hasStockText);
console.log(' Has "inventory": ' + inventoryData.hasInventoryText);
console.log(' Has "quantity": ' + inventoryData.hasQuantityText);
console.log(' Has "available": ' + inventoryData.hasAvailableText);
console.log('\nData attributes: ' + JSON.stringify(inventoryData.allDataAttrs));
console.log('\nJSON-LD: ' + JSON.stringify(inventoryData.jsonLd, null, 2));
if (inventoryData.hasNextData) {
console.log('\nNext.js data found!');
console.log(' Keys: ' + inventoryData.nextDataKeys);
console.log(' Page props keys: ' + inventoryData.pagePropsKeys);
if (inventoryData.productFromNext) {
console.log('\n Product data from Next.js:');
console.log(JSON.stringify(inventoryData.productFromNext, null, 2));
}
}
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,211 @@
/**
* Find and interact with "load more brands" selector
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log(' Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function main() {
console.log('='.repeat(60));
console.log('Finding "Load More Brands" control');
console.log('='.repeat(60));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Don't block stylesheets - might affect layout
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
console.log(`\nNavigating to ${url}`);
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
// Find all selects and dropdowns
console.log('\n[1] Looking for select elements...');
const selectInfo = await page.evaluate(() => {
const results: any[] = [];
// Native select elements
document.querySelectorAll('select').forEach((sel, i) => {
const options = Array.from(sel.options).map(o => ({ value: o.value, text: o.text }));
results.push({
type: 'select',
id: sel.id || `select-${i}`,
class: sel.className,
options: options.slice(0, 10),
totalOptions: sel.options.length,
});
});
return results;
});
console.log('Native selects found:', JSON.stringify(selectInfo, null, 2));
// Look for custom dropdown buttons
console.log('\n[2] Looking for dropdown/button elements...');
const dropdownInfo = await page.evaluate(() => {
const results: any[] = [];
// Look for common dropdown patterns
const selectors = [
'[class*="dropdown"]',
'[class*="Dropdown"]',
'[class*="select"]',
'[class*="Select"]',
'[class*="picker"]',
'[class*="Picker"]',
'[role="listbox"]',
'[role="combobox"]',
'button[aria-haspopup]',
'[class*="brand"] button',
'[class*="Brand"] button',
'[class*="filter"]',
'[class*="Filter"]',
];
selectors.forEach(sel => {
document.querySelectorAll(sel).forEach((el, i) => {
const text = el.textContent?.trim().slice(0, 100) || '';
const className = el.className?.toString?.().slice(0, 100) || '';
if (text.toLowerCase().includes('brand') || text.toLowerCase().includes('more') || text.toLowerCase().includes('all')) {
results.push({
selector: sel,
tag: el.tagName,
class: className,
text: text.slice(0, 50),
});
}
});
});
return results;
});
console.log('Dropdown-like elements:', JSON.stringify(dropdownInfo.slice(0, 10), null, 2));
// Look for any element containing "brand" text
console.log('\n[3] Looking for elements with "brand" or "more" text...');
const brandTextElements = await page.evaluate(() => {
const results: any[] = [];
const textContent = ['brand', 'more', 'load', 'view all', 'show all'];
document.querySelectorAll('button, a, [role="button"], select, [class*="select"]').forEach(el => {
const text = el.textContent?.toLowerCase() || '';
if (textContent.some(t => text.includes(t))) {
results.push({
tag: el.tagName,
class: el.className?.toString?.().slice(0, 80) || '',
text: el.textContent?.trim().slice(0, 100) || '',
href: el.getAttribute('href') || '',
});
}
});
return results;
});
console.log('Elements with brand/more text:', JSON.stringify(brandTextElements.slice(0, 15), null, 2));
// Count current brand sections
console.log('\n[4] Counting brand sections...');
const brandSections = await page.evaluate(() => {
// Look for brand section headers or containers
const sections: { title: string; productCount: number }[] = [];
document.querySelectorAll('[class*="products_product__section"]').forEach(section => {
const header = section.querySelector('h2, h3, [class*="heading"]');
const title = header?.textContent?.trim() || 'Unknown';
const products = section.querySelectorAll('a[class*="product_product__"]');
sections.push({ title, productCount: products.length });
});
return sections;
});
console.log(`Found ${brandSections.length} brand sections:`);
brandSections.slice(0, 20).forEach(s => console.log(` - ${s.title}: ${s.productCount} products`));
// Take a screenshot
await page.screenshot({ path: '/tmp/treez-brands-full.png', fullPage: true });
console.log('\n[5] Full page screenshot saved to /tmp/treez-brands-full.png');
// Try scrolling to bottom to trigger any lazy loading
console.log('\n[6] Scrolling to load more content...');
let previousHeight = 0;
for (let i = 0; i < 20; i++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1500);
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
const sectionCount = await page.evaluate(() =>
document.querySelectorAll('[class*="products_product__section"]').length
);
console.log(` Scroll ${i + 1}: height=${currentHeight}, sections=${sectionCount}`);
if (currentHeight === previousHeight) {
console.log(' No new content, stopping');
break;
}
previousHeight = currentHeight;
}
// Final count
const finalSections = await page.evaluate(() => {
const sections: { title: string; productCount: number }[] = [];
document.querySelectorAll('[class*="products_product__section"]').forEach(section => {
const header = section.querySelector('h2, h3, [class*="heading"]');
const title = header?.textContent?.trim() || 'Unknown';
const products = section.querySelectorAll('a[class*="product_product__"]');
sections.push({ title, productCount: products.length });
});
return sections;
});
console.log(`\n[7] After scrolling: ${finalSections.length} brand sections`);
finalSections.forEach(s => console.log(` - ${s.title}: ${s.productCount} products`));
const totalProducts = finalSections.reduce((sum, s) => sum + s.productCount, 0);
console.log(`\nTotal products across all sections: ${totalProducts}`);
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,104 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Capture all network requests
const requests: any[] = [];
const responses: any[] = [];
page.on('request', (req) => {
const url = req.url();
if (url.includes('api') || url.includes('graphql') ||
url.includes('product') || url.includes('menu') ||
url.includes('treez') || url.includes('inventory')) {
requests.push({
url: url.slice(0, 150),
method: req.method(),
headers: req.headers(),
postData: req.postData()?.slice(0, 500),
});
}
});
page.on('response', async (res) => {
const url = res.url();
if (url.includes('api') || url.includes('graphql') ||
url.includes('product') || url.includes('menu') ||
url.includes('inventory')) {
try {
const contentType = res.headers()['content-type'] || '';
if (contentType.includes('json')) {
const body = await res.text();
responses.push({
url: url.slice(0, 150),
status: res.status(),
bodyPreview: body.slice(0, 1000),
});
}
} catch {}
}
});
console.log('Loading page and capturing network requests...\n');
await page.goto('https://shop.bestdispensary.com/brands', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Click load more to trigger more API calls
for (let i = 0; i < 3; i++) {
const btn = await page.$('button.collection__load-more');
if (btn) {
await btn.click();
await sleep(2000);
}
}
// Also visit a product page
console.log('\nVisiting a product page...\n');
await page.goto('https://shop.bestdispensary.com/product/dime-sour-grapes-2g-disposable-cartridge-2-grams', {
waitUntil: 'networkidle2',
timeout: 30000
});
await sleep(2000);
console.log('=== API REQUESTS FOUND ===\n');
requests.forEach((r, i) => {
console.log((i+1) + '. ' + r.method + ' ' + r.url);
if (r.postData) {
console.log(' POST data: ' + r.postData);
}
});
console.log('\n=== JSON RESPONSES ===\n');
responses.forEach((r, i) => {
console.log((i+1) + '. ' + r.url);
console.log(' Status: ' + r.status);
console.log(' Body: ' + r.bodyPreview.slice(0, 300) + '...\n');
});
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,110 @@
import puppeteer from 'puppeteer';
import fs from 'fs';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
console.log('Loading page first to establish session...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Bypassing age gate...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(3000);
}
// Wait for page to fully load
await sleep(2000);
console.log('\nMaking fetch request from page context...\n');
// Try to make the ES request from within page context
const result = await page.evaluate(async () => {
const url = 'https://search-kyrok9udlk.gapcommerceapi.com/product/search';
const apiKey = 'V3jHL9dFzi3Gj4UISM4lr38Nm0GSxcps5OBz1PbS';
const query = {
from: 0,
size: 1000,
query: {
bool: {
must: [
{ bool: { filter: { range: { customMinPrice: { gte: 0.01, lte: 500000 }}}}},
{ bool: { should: [{ match: { isAboveThreshold: true }}]}},
{ bool: { should: [{ match: { isHideFromMenu: false }}]}}
]
}
}
};
try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': apiKey,
},
body: JSON.stringify(query),
credentials: 'include',
});
if (!response.ok) {
return { error: 'HTTP ' + response.status, statusText: response.statusText };
}
const data = await response.json();
return {
total: data.hits?.total?.value,
count: data.hits?.hits?.length,
firstProduct: data.hits?.hits?.[0]?._source,
products: data.hits?.hits?.map((h: any) => h._source),
};
} catch (err: any) {
return { error: err.message };
}
});
if (result.error) {
console.log('Error: ' + result.error);
if (result.statusText) console.log('Status: ' + result.statusText);
} else {
console.log('Total products in ES: ' + result.total);
console.log('Products returned: ' + result.count);
if (result.firstProduct) {
console.log('\n=== PRODUCT FIELDS ===\n');
console.log(Object.keys(result.firstProduct).sort().join('\n'));
console.log('\n=== SAMPLE PRODUCT ===\n');
console.log(JSON.stringify(result.firstProduct, null, 2));
// Save all products
if (result.products) {
fs.writeFileSync('/tmp/treez-all-products.json', JSON.stringify(result.products, null, 2));
console.log('\nSaved ' + result.products.length + ' products to /tmp/treez-all-products.json');
}
}
}
await browser.close();
}
main();

View File

@@ -0,0 +1,171 @@
import puppeteer from 'puppeteer';
import fs from 'fs';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
console.log('Loading page...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Bypassing age gate...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(3000);
}
// Extract __NEXT_DATA__
console.log('\n=== NEXT.JS DATA ===\n');
const nextData = await page.evaluate(() => {
const script = document.getElementById('__NEXT_DATA__');
if (script) {
try {
return JSON.parse(script.textContent || '');
} catch { return null; }
}
return null;
});
if (nextData) {
console.log('Top keys: ' + Object.keys(nextData).join(', '));
if (nextData.props?.pageProps) {
console.log('pageProps keys: ' + Object.keys(nextData.props.pageProps).join(', '));
// Look for products
const pp = nextData.props.pageProps;
if (pp.products) {
console.log('\nFound products: ' + pp.products.length);
if (pp.products[0]) {
console.log('Product fields: ' + Object.keys(pp.products[0]).join(', '));
console.log('\nSample:\n' + JSON.stringify(pp.products[0], null, 2));
}
}
if (pp.initialProducts) {
console.log('\nFound initialProducts: ' + pp.initialProducts.length);
}
if (pp.data) {
console.log('\nFound data: ' + (Array.isArray(pp.data) ? pp.data.length + ' items' : typeof pp.data));
}
}
}
// Also check window object
console.log('\n=== WINDOW GLOBALS ===\n');
const windowData = await page.evaluate(() => {
const win = window as any;
const result: any = {};
// Common patterns for storing product data
const patterns = ['products', 'items', 'data', 'state', 'store', 'redux', 'apollo'];
Object.keys(win).forEach(key => {
const lowerKey = key.toLowerCase();
if (patterns.some(p => lowerKey.includes(p))) {
try {
const val = win[key];
if (typeof val === 'object' && val !== null) {
result[key] = {
type: Array.isArray(val) ? 'array' : 'object',
keys: Object.keys(val).slice(0, 10),
length: Array.isArray(val) ? val.length : undefined,
};
}
} catch {}
}
});
return result;
});
console.log('Window globals with data-like names:');
Object.entries(windowData).forEach(([k, v]: [string, any]) => {
console.log(' ' + k + ': ' + v.type + (v.length ? ' (' + v.length + ')' : '') + ' - keys: ' + v.keys?.join(', '));
});
// Try to find React state
console.log('\n=== EXTRACTING FROM DOM ===\n');
const domProducts = await page.evaluate(() => {
const products: any[] = [];
document.querySelectorAll('a[href*="/product/"]').forEach((card: Element) => {
const product: any = {};
product.href = card.getAttribute('href');
product.name = card.querySelector('h3, h4, h5')?.textContent?.trim();
// Get all text
const allText = card.textContent || '';
// Extract THC %
const thcMatch = allText.match(/(\d+(?:\.\d+)?)\s*%/);
if (thcMatch) product.thc = thcMatch[1];
// Extract price
const priceMatch = allText.match(/\$(\d+(?:\.\d+)?)/);
if (priceMatch) product.price = priceMatch[1];
// Extract weight
const weightMatch = allText.match(/(\d+(?:\.\d+)?)\s*[gG]/);
if (weightMatch) product.weight = weightMatch[1] + 'g';
// Get brand from card
const brandEl = card.querySelector('[class*="brand"]');
product.brand = brandEl?.textContent?.trim();
// Get strain type
const strainTypes = ['Indica', 'Sativa', 'Hybrid', 'I/S', 'S/I', 'CBD'];
strainTypes.forEach(st => {
if (allText.includes(st)) product.strainType = st;
});
// Get image
const img = card.querySelector('img');
product.image = img?.getAttribute('src');
products.push(product);
});
return products;
});
console.log('Products from DOM: ' + domProducts.length);
if (domProducts.length > 0) {
console.log('\nSample:\n' + JSON.stringify(domProducts[0], null, 2));
// Show variety
console.log('\n=== DATA QUALITY ===');
const withThc = domProducts.filter(p => p.thc).length;
const withPrice = domProducts.filter(p => p.price).length;
const withBrand = domProducts.filter(p => p.brand).length;
const withStrain = domProducts.filter(p => p.strainType).length;
console.log('With THC%: ' + withThc + '/' + domProducts.length);
console.log('With Price: ' + withPrice + '/' + domProducts.length);
console.log('With Brand: ' + withBrand + '/' + domProducts.length);
console.log('With Strain: ' + withStrain + '/' + domProducts.length);
}
await browser.close();
}
main();

View File

@@ -0,0 +1,89 @@
import axios from 'axios';
async function main() {
const clientId = '29dce682258145c6b1cf71027282d083';
const clientSecret = 'A57bB49AfD7F4233B1750a0B501B4E16';
const storeId = 'best';
// Try various Treez API endpoints for products
const endpoints = [
'https://headless.treez.io/v2.0/dispensary/' + storeId + '/ecommerce/products',
'https://headless.treez.io/v2.0/dispensary/' + storeId + '/ecommerce/menu',
'https://headless.treez.io/v2.0/dispensary/' + storeId + '/ecommerce/inventory',
'https://headless.treez.io/v2.0/dispensary/' + storeId + '/ecommerce/catalog',
'https://headless.treez.io/v2.0/dispensary/' + storeId + '/menu',
'https://headless.treez.io/v2.0/dispensary/' + storeId + '/products',
'https://api.treez.io/v2.0/dispensary/' + storeId + '/ecommerce/products',
'https://api.treez.io/v2.0/dispensary/' + storeId + '/products',
'https://selltreez.com/api/dispensary/' + storeId + '/products',
];
console.log('Testing Treez product endpoints...\n');
for (const url of endpoints) {
console.log('GET ' + url);
try {
const response = await axios.get(url, {
headers: {
'client_id': clientId,
'client_secret': clientSecret,
'Content-Type': 'application/json',
'Accept': 'application/json',
},
timeout: 10000,
});
console.log(' Status: ' + response.status);
const data = response.data;
if (Array.isArray(data)) {
console.log(' Array: ' + data.length + ' items');
if (data[0]) console.log(' Keys: ' + Object.keys(data[0]).slice(0, 10).join(', '));
} else if (data?.data && Array.isArray(data.data)) {
console.log(' data[]: ' + data.data.length + ' items');
if (data.data[0]) console.log(' Keys: ' + Object.keys(data.data[0]).slice(0, 10).join(', '));
} else {
console.log(' Type: ' + typeof data);
console.log(' Keys: ' + (typeof data === 'object' ? Object.keys(data).join(', ') : 'N/A'));
}
console.log('');
} catch (err: any) {
const status = err.response?.status || 'network error';
console.log(' Error: ' + status + '\n');
}
}
// Also check the working discounts endpoint for clues
console.log('\n=== CHECKING DISCOUNTS FOR PRODUCT REFERENCES ===\n');
try {
const response = await axios.get(
'https://headless.treez.io/v2.0/dispensary/' + storeId + '/ecommerce/discounts?excludeInactive=true&hideUnset=true&includeProdInfo=true',
{
headers: {
'client_id': clientId,
'client_secret': clientSecret,
},
}
);
const data = response.data?.data || response.data;
if (Array.isArray(data) && data.length > 0) {
console.log('Discounts: ' + data.length);
console.log('First discount keys: ' + Object.keys(data[0]).join(', '));
// Check if it has product info
if (data[0].products) {
console.log('\nHas products array: ' + data[0].products.length);
if (data[0].products[0]) {
console.log('Product keys: ' + Object.keys(data[0].products[0]).join(', '));
}
}
}
} catch (err: any) {
console.log('Error: ' + err.message);
}
}
main();

View File

@@ -0,0 +1,174 @@
import puppeteer from 'puppeteer';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
// Go to a brand page with products
await page.goto('https://shop.bestdispensary.com/brand/best', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
// Get detailed product card structure
console.log('Analyzing product card structure...\n');
const productData = await page.evaluate(() => {
const products: any[] = [];
document.querySelectorAll('a[href*="/product/"]').forEach((card: Element) => {
const product: any = {};
// URL/slug
product.href = card.getAttribute('href');
product.slug = product.href?.split('/product/')[1];
// Image
const img = card.querySelector('img');
product.imageUrl = img?.getAttribute('src');
product.imageAlt = img?.getAttribute('alt');
// Name (usually in h3 or similar)
const nameEl = card.querySelector('h3, h4, h5, [class*="name"], [class*="title"]');
product.name = nameEl?.textContent?.trim();
// Brand
const brandEl = card.querySelector('[class*="brand"], [class*="Brand"]');
product.brand = brandEl?.textContent?.trim();
// Price
const priceEl = card.querySelector('[class*="price"], [class*="Price"]');
product.priceText = priceEl?.textContent?.trim();
// Category/Type badges
const badges: string[] = [];
card.querySelectorAll('[class*="badge"], [class*="tag"], [class*="label"]').forEach((b: Element) => {
const text = b.textContent?.trim();
if (text) badges.push(text);
});
product.badges = badges;
// THC/CBD info
const thcEl = card.querySelector('[class*="thc"], [class*="THC"]');
const cbdEl = card.querySelector('[class*="cbd"], [class*="CBD"]');
product.thc = thcEl?.textContent?.trim();
product.cbd = cbdEl?.textContent?.trim();
// Weight/size
const weightEl = card.querySelector('[class*="weight"], [class*="size"], [class*="gram"]');
product.weight = weightEl?.textContent?.trim();
// Get all text content for analysis
product.allText = card.textContent?.replace(/\s+/g, ' ').trim().slice(0, 200);
// Get all classes on the card
product.cardClasses = card.className;
products.push(product);
});
return products;
});
console.log('Found ' + productData.length + ' products\n');
console.log('Sample product data:\n');
// Show first 3 products in detail
productData.slice(0, 3).forEach((p: any, i: number) => {
console.log('Product ' + (i+1) + ':');
console.log(' Name: ' + p.name);
console.log(' Brand: ' + p.brand);
console.log(' Slug: ' + p.slug);
console.log(' Price: ' + p.priceText);
console.log(' THC: ' + p.thc);
console.log(' CBD: ' + p.cbd);
console.log(' Weight: ' + p.weight);
console.log(' Badges: ' + JSON.stringify(p.badges));
console.log(' Image: ' + (p.imageUrl ? p.imageUrl.slice(0, 60) + '...' : 'none'));
console.log(' All Text: ' + p.allText);
console.log('');
});
// Now visit a product detail page
if (productData.length > 0) {
const productUrl = 'https://shop.bestdispensary.com' + productData[0].href;
console.log('\n=== PRODUCT DETAIL PAGE ===');
console.log('Visiting: ' + productUrl + '\n');
await page.goto(productUrl, { waitUntil: 'networkidle2', timeout: 30000 });
await sleep(2000);
const detailData = await page.evaluate(() => {
const data: any = {};
// Get all text elements
data.h1 = document.querySelector('h1')?.textContent?.trim();
data.h2s = Array.from(document.querySelectorAll('h2')).map(h => h.textContent?.trim());
// Price
const priceEls = document.querySelectorAll('[class*="price"], [class*="Price"]');
data.prices = Array.from(priceEls).map(p => p.textContent?.trim());
// Description
const descEl = document.querySelector('[class*="description"], [class*="Description"], p');
data.description = descEl?.textContent?.trim().slice(0, 300);
// THC/CBD
data.cannabinoids = [];
document.querySelectorAll('[class*="thc"], [class*="THC"], [class*="cbd"], [class*="CBD"], [class*="cannabinoid"]').forEach(el => {
data.cannabinoids.push(el.textContent?.trim());
});
// Category/strain type
const typeEls = document.querySelectorAll('[class*="strain"], [class*="type"], [class*="category"]');
data.types = Array.from(typeEls).map(t => t.textContent?.trim());
// Weight options
const weightEls = document.querySelectorAll('[class*="weight"], [class*="size"], [class*="option"]');
data.weights = Array.from(weightEls).map(w => w.textContent?.trim()).filter(w => w && w.length < 30);
// Images
const imgs = document.querySelectorAll('img[src*="product"], img[src*="menu"]');
data.images = Array.from(imgs).map(img => img.getAttribute('src')).slice(0, 3);
// Get body text for analysis
const main = document.querySelector('main');
data.mainText = main?.textContent?.replace(/\s+/g, ' ').trim().slice(0, 500);
return data;
});
console.log('Product Detail:');
console.log(' H1: ' + detailData.h1);
console.log(' H2s: ' + JSON.stringify(detailData.h2s));
console.log(' Prices: ' + JSON.stringify(detailData.prices));
console.log(' Description: ' + (detailData.description || 'none'));
console.log(' Cannabinoids: ' + JSON.stringify(detailData.cannabinoids));
console.log(' Types: ' + JSON.stringify(detailData.types));
console.log(' Weights: ' + JSON.stringify(detailData.weights));
console.log(' Images: ' + JSON.stringify(detailData.images));
console.log('\n Main text sample: ' + detailData.mainText);
}
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,143 @@
/**
* Test aggressive scrolling to load all products
*/
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
puppeteer.use(StealthPlugin());
function sleep(ms: number): Promise<void> {
return new Promise(r => setTimeout(r, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
const capturedProducts: any[] = [];
// CDP interception
const client = await page.target().createCDPSession();
await client.send('Network.enable');
client.on('Network.responseReceived', async (event: any) => {
if (event.response.url.includes('gapcommerceapi.com/product/search') && event.response.status === 200) {
try {
const response = await client.send('Network.getResponseBody', { requestId: event.requestId });
const body = response.base64Encoded ? Buffer.from(response.body, 'base64').toString('utf8') : response.body;
const json = JSON.parse(body);
const products = json.hits?.hits?.map((h: any) => h._source) || [];
capturedProducts.push(...products);
console.log('Captured ' + products.length + ' (total: ' + capturedProducts.length + ')');
} catch {}
}
});
// Try direct treez.io URL - may have more products
const url = process.argv[2] || 'https://best.treez.io/onlinemenu/';
console.log('Loading ' + url);
try {
await page.goto(url, { waitUntil: 'networkidle0', timeout: 60000 });
} catch (e: any) {
console.log('Navigation warning: ' + e.message);
}
await sleep(5000);
console.log('Current URL: ' + page.url());
// Age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Bypassing age gate...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(3000);
}
console.log('After initial load: ' + capturedProducts.length + ' products');
// Aggressive scrolling and clicking
let lastCount = 0;
let staleCount = 0;
for (let i = 0; i < 60; i++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(800);
try {
const btn = await page.$('button.collection__load-more');
if (btn) {
const visible = await page.evaluate((b: Element) => {
const rect = b.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
}, btn);
if (visible) {
await page.evaluate((b: Element) => b.scrollIntoView({ block: 'center' }), btn);
await sleep(300);
await btn.click();
await sleep(2000);
console.log('Clicked Load More at scroll ' + (i+1) + ' - ' + capturedProducts.length + ' products');
}
}
} catch {}
// Check for stale data
if (capturedProducts.length === lastCount) {
staleCount++;
if (staleCount >= 5) {
console.log('No new products for 5 iterations, stopping');
break;
}
} else {
staleCount = 0;
}
lastCount = capturedProducts.length;
}
console.log('\nFinal count: ' + capturedProducts.length + ' products');
// Dedupe
const seen = new Set<string>();
const unique = capturedProducts.filter(p => {
if (!p.id || seen.has(p.id)) return false;
seen.add(p.id);
return true;
});
console.log('Unique: ' + unique.length);
// Categories
const cats: Record<string, number> = {};
unique.forEach(p => {
cats[p.category] = (cats[p.category] || 0) + 1;
});
console.log('\nCategories:');
Object.entries(cats).sort((a, b) => b[1] - a[1]).forEach(([c, n]) => console.log(' ' + c + ': ' + n));
// Sample cannabis product
const cannabis = unique.find(p => p.category === 'FLOWER' || p.category === 'VAPE');
if (cannabis) {
console.log('\nSample cannabis product:');
console.log(JSON.stringify({
id: cannabis.id,
name: cannabis.name,
brand: cannabis.brand,
category: cannabis.category,
subtype: cannabis.subtype,
availableUnits: cannabis.availableUnits,
customMinPrice: cannabis.customMinPrice,
}, null, 2));
}
await browser.close();
}
main();

View File

@@ -0,0 +1,178 @@
/**
* Analyze all product element structures to find all selector patterns
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function main() {
console.log('Analyzing Treez product selectors...\n');
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
if (['image', 'font', 'media'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
// Analyze product elements
const analysis = await page.evaluate(() => {
const products = document.querySelectorAll('[class*="product_product__"]');
const results: {
hasName: number;
hasPrice: number;
noName: number;
noPrice: number;
nameClasses: Record<string, number>;
priceClasses: Record<string, number>;
sampleNoNameHTML: string[];
sampleWithNameHTML: string[];
h5Count: number;
h4Count: number;
h3Count: number;
allHeadingsWithName: number;
} = {
hasName: 0,
hasPrice: 0,
noName: 0,
noPrice: 0,
nameClasses: {},
priceClasses: {},
sampleNoNameHTML: [],
sampleWithNameHTML: [],
h5Count: 0,
h4Count: 0,
h3Count: 0,
allHeadingsWithName: 0,
};
products.forEach((el, i) => {
// Check current selectors
const nameEl = el.querySelector('[class*="product__name"], [class*="name__"]');
const priceEl = el.querySelector('[class*="price"]');
if (nameEl) {
results.hasName++;
const cls = nameEl.className?.toString?.() || '';
results.nameClasses[cls] = (results.nameClasses[cls] || 0) + 1;
} else {
results.noName++;
if (results.sampleNoNameHTML.length < 3) {
results.sampleNoNameHTML.push(el.innerHTML.slice(0, 500));
}
}
if (priceEl) {
results.hasPrice++;
} else {
results.noPrice++;
}
// Check for headings that might contain names
const h5 = el.querySelector('h5');
const h4 = el.querySelector('h4');
const h3 = el.querySelector('h3');
if (h5) results.h5Count++;
if (h4) results.h4Count++;
if (h3) results.h3Count++;
// Any heading with text
const anyHeading = el.querySelector('h1, h2, h3, h4, h5, h6');
if (anyHeading?.textContent?.trim()) {
results.allHeadingsWithName++;
}
});
return results;
});
console.log('Product Analysis:');
console.log(`Total products: ${analysis.hasName + analysis.noName}`);
console.log(`With name (current selector): ${analysis.hasName}`);
console.log(`Without name (current selector): ${analysis.noName}`);
console.log(`With price: ${analysis.hasPrice}`);
console.log(`\nHeading counts:`);
console.log(` H5: ${analysis.h5Count}`);
console.log(` H4: ${analysis.h4Count}`);
console.log(` H3: ${analysis.h3Count}`);
console.log(` Any heading with text: ${analysis.allHeadingsWithName}`);
console.log('\nName classes found:');
Object.entries(analysis.nameClasses).forEach(([cls, count]) => {
console.log(` (${count}x) ${cls.slice(0, 80)}`);
});
console.log('\n--- Sample products WITHOUT name selector ---');
analysis.sampleNoNameHTML.forEach((html, i) => {
console.log(`\n[Sample ${i + 1}]:`);
console.log(html);
});
// Try different selectors
console.log('\n\n--- Testing alternative selectors ---');
const altResults = await page.evaluate(() => {
const products = document.querySelectorAll('[class*="product_product__"]');
const tests: Record<string, number> = {};
const selectors = [
'h5',
'h4',
'h3',
'[class*="heading"]',
'[class*="title"]',
'[class*="name"]',
'a[href*="/product/"]',
'.product_product__name__JcEk0',
'[class*="ProductCard"]',
];
selectors.forEach(sel => {
let count = 0;
products.forEach(el => {
if (el.querySelector(sel)) count++;
});
tests[sel] = count;
});
return tests;
});
Object.entries(altResults).forEach(([sel, count]) => {
console.log(` ${sel}: ${count} products`);
});
await browser.close();
}
main().catch(console.error);

View File

@@ -0,0 +1,116 @@
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import fs from 'fs';
puppeteer.use(StealthPlugin());
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: 'new',
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
const capturedProducts: any[] = [];
// Use CDP to intercept responses
const client = await page.target().createCDPSession();
await client.send('Network.enable');
client.on('Network.responseReceived', async (event) => {
const url = event.response.url;
if (url.includes('gapcommerceapi.com/product/search') && event.response.status === 200) {
try {
const response = await client.send('Network.getResponseBody', {
requestId: event.requestId,
});
const body = response.base64Encoded
? Buffer.from(response.body, 'base64').toString('utf8')
: response.body;
const json = JSON.parse(body);
const products = json.hits?.hits?.map((h: any) => h._source) || [];
capturedProducts.push(...products);
console.log('Captured ' + products.length + ' products (total: ' + capturedProducts.length + ')');
} catch (err: any) {
// Ignore errors
}
}
});
console.log('Loading page with Stealth plugin...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Bypassing age gate...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(3000);
}
// Wait for API calls
await sleep(5000);
console.log('Initial capture: ' + capturedProducts.length + ' products');
// Scroll and click load more
console.log('\nScrolling and clicking Load More...');
for (let i = 0; i < 30; i++) {
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await sleep(1000);
try {
const btn = await page.$('button.collection__load-more');
if (btn) {
await btn.click();
await sleep(2000);
}
} catch {}
if (i % 5 === 0) {
console.log('Progress: ' + capturedProducts.length + ' products');
}
}
console.log('\n=== RESULTS ===\n');
if (capturedProducts.length > 0) {
const seen = new Set();
const unique = capturedProducts.filter(p => {
const id = p.id || p.productId;
if (!id || seen.has(id)) return false;
seen.add(id);
return true;
});
console.log('Total captured: ' + capturedProducts.length);
console.log('Unique products: ' + unique.length);
console.log('\nFields available:');
console.log(Object.keys(unique[0]).sort().join('\n'));
console.log('\nSample product:\n' + JSON.stringify(unique[0], null, 2));
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(unique, null, 2));
console.log('\nSaved to /tmp/treez-products.json');
} else {
console.log('No products captured - API still blocking');
}
await browser.close();
}
main();

View File

@@ -0,0 +1,108 @@
/**
* Test if blocking stylesheets affects product detection
*/
import puppeteer, { Page } from 'puppeteer';
const STORE_ID = 'best';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function bypassAgeGate(page: Page): Promise<void> {
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log(' Age gate detected, bypassing...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(2000);
}
}
async function countProducts(page: Page): Promise<{ total: number; withName: number; withPrice: number }> {
return page.evaluate(() => {
const all = document.querySelectorAll('[class*="product_product__"]');
let withName = 0;
let withPrice = 0;
all.forEach(el => {
const hasName = el.querySelector('[class*="product__name"]') || el.querySelector('[class*="name__"]');
const hasPrice = el.querySelector('[class*="price"]');
if (hasName) withName++;
if (hasPrice) withPrice++;
});
return { total: all.length, withName, withPrice };
});
}
async function testWithBlocking(blockStylesheets: boolean): Promise<void> {
console.log(`\n${'='.repeat(50)}`);
console.log(`Testing with ${blockStylesheets ? 'BLOCKED' : 'ALLOWED'} stylesheets`);
console.log('='.repeat(50));
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
await page.setViewport({ width: 1920, height: 1080 });
await page.setRequestInterception(true);
page.on('request', (req) => {
const type = req.resourceType();
if (type === 'image' || type === 'font' || type === 'media') {
req.abort();
} else if (type === 'stylesheet' && blockStylesheets) {
req.abort();
} else {
req.continue();
}
});
const url = `https://${STORE_ID}.treez.io/onlinemenu/brands?customerType=ADULT`;
console.log(`Navigating to ${url}`);
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
await sleep(3000);
await bypassAgeGate(page);
await sleep(2000);
const counts = await countProducts(page);
console.log(`Total product elements: ${counts.total}`);
console.log(`With name selector: ${counts.withName}`);
console.log(`With price selector: ${counts.withPrice}`);
// Check what classes exist on product elements
const sampleClasses = await page.evaluate(() => {
const products = document.querySelectorAll('[class*="product_product__"]');
const sample = products[0];
if (!sample) return 'No products found';
const children = Array.from(sample.querySelectorAll('*')).slice(0, 20);
return children.map(el => ({
tag: el.tagName,
class: el.className?.toString?.().slice(0, 80) || '',
}));
});
console.log('\nSample product children:');
if (Array.isArray(sampleClasses)) {
sampleClasses.forEach(c => console.log(` [${c.tag}] ${c.class}`));
} else {
console.log(` ${sampleClasses}`);
}
await browser.close();
}
async function main() {
console.log('Testing stylesheet impact on Treez product detection');
await testWithBlocking(true); // Block stylesheets
await testWithBlocking(false); // Allow stylesheets
}
main().catch(console.error);

View File

@@ -0,0 +1,117 @@
import puppeteer from 'puppeteer';
import fs from 'fs';
async function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function main() {
const browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
// Enable request interception but don't modify requests
await page.setRequestInterception(true);
const capturedProducts: any[] = [];
page.on('request', (request) => {
request.continue();
});
// Use CDP to intercept responses
const client = await page.target().createCDPSession();
await client.send('Network.enable');
client.on('Network.responseReceived', async (event) => {
const url = event.response.url;
if (url.includes('gapcommerceapi.com/product/search')) {
console.log('ES Response detected: ' + event.response.status);
try {
const response = await client.send('Network.getResponseBody', {
requestId: event.requestId,
});
const body = response.base64Encoded
? Buffer.from(response.body, 'base64').toString('utf8')
: response.body;
const json = JSON.parse(body);
const products = json.hits?.hits?.map((h: any) => h._source) || [];
capturedProducts.push(...products);
console.log('Captured ' + products.length + ' products (total: ' + capturedProducts.length + ')');
} catch (err: any) {
console.log('Could not get response body: ' + err.message);
}
}
});
console.log('Loading page with CDP interception...\n');
await page.goto('https://shop.bestdispensary.com/shop', {
waitUntil: 'networkidle2',
timeout: 60000
});
await sleep(3000);
// Bypass age gate
const ageGate = await page.$('[data-testid="age-gate-modal"]');
if (ageGate) {
console.log('Bypassing age gate...');
const btn = await page.$('[data-testid="age-gate-submit-button"]');
if (btn) await btn.click();
await sleep(3000);
}
// Click load more many times
console.log('\nClicking Load More...');
for (let i = 0; i < 30; i++) {
try {
const btn = await page.$('button.collection__load-more');
if (!btn) break;
const visible = await page.evaluate((b) => {
const rect = b.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
}, btn);
if (!visible) break;
await btn.click();
await sleep(1500);
console.log('Click ' + (i+1) + ': ' + capturedProducts.length + ' products');
} catch {
break;
}
}
console.log('\n=== RESULTS ===\n');
console.log('Total captured: ' + capturedProducts.length);
if (capturedProducts.length > 0) {
// Dedupe
const seen = new Set();
const unique = capturedProducts.filter(p => {
const id = p.id || p.productId;
if (!id || seen.has(id)) return false;
seen.add(id);
return true;
});
console.log('Unique products: ' + unique.length);
console.log('\nFields: ' + Object.keys(unique[0]).sort().join('\n'));
console.log('\nSample:\n' + JSON.stringify(unique[0], null, 2));
fs.writeFileSync('/tmp/treez-products.json', JSON.stringify(unique, null, 2));
console.log('\nSaved to /tmp/treez-products.json');
}
await browser.close();
}
main();

View File

@@ -7,6 +7,7 @@
* *
* NO username/password auth in API. Use tokens only. * NO username/password auth in API. Use tokens only.
* *
* Trusted origins are managed via /admin and stored in the trusted_origins table.
* Localhost bypass: curl from 127.0.0.1 gets automatic admin access. * Localhost bypass: curl from 127.0.0.1 gets automatic admin access.
*/ */
import { Request, Response, NextFunction } from 'express'; import { Request, Response, NextFunction } from 'express';
@@ -16,8 +17,8 @@ import { pool } from '../db/pool';
const JWT_SECRET = process.env.JWT_SECRET || 'change_this_in_production'; const JWT_SECRET = process.env.JWT_SECRET || 'change_this_in_production';
// Trusted origins that bypass auth for internal/same-origin requests // Fallback trusted origins (used if DB unavailable)
const TRUSTED_ORIGINS = [ const FALLBACK_TRUSTED_ORIGINS = [
'https://cannaiq.co', 'https://cannaiq.co',
'https://www.cannaiq.co', 'https://www.cannaiq.co',
'https://findadispo.com', 'https://findadispo.com',
@@ -29,31 +30,108 @@ const TRUSTED_ORIGINS = [
'http://localhost:5173', 'http://localhost:5173',
]; ];
// Pattern-based trusted origins (wildcards) const FALLBACK_TRUSTED_PATTERNS = [
const TRUSTED_ORIGIN_PATTERNS = [ /^https:\/\/.*\.cannabrands\.app$/,
/^https:\/\/.*\.cannabrands\.app$/, // *.cannabrands.app /^https:\/\/.*\.cannaiq\.co$/,
/^https:\/\/.*\.cannaiq\.co$/, // *.cannaiq.co
]; ];
// Trusted IPs for internal pod-to-pod communication const FALLBACK_TRUSTED_IPS = [
const TRUSTED_IPS = [
'127.0.0.1', '127.0.0.1',
'::1', '::1',
'::ffff:127.0.0.1', '::ffff:127.0.0.1',
]; ];
// Cache for DB-backed trusted origins
let trustedOriginsCache: {
ips: Set<string>;
domains: Set<string>;
patterns: RegExp[];
loadedAt: Date;
} | null = null;
/**
* Load trusted origins from DB with caching (5 min TTL)
*/
async function loadTrustedOrigins(): Promise<{
ips: Set<string>;
domains: Set<string>;
patterns: RegExp[];
}> {
// Return cached if fresh
if (trustedOriginsCache) {
const age = Date.now() - trustedOriginsCache.loadedAt.getTime();
if (age < 5 * 60 * 1000) {
return trustedOriginsCache;
}
}
try {
const result = await pool.query(`
SELECT origin_type, origin_value
FROM trusted_origins
WHERE active = true
`);
const ips = new Set<string>();
const domains = new Set<string>();
const patterns: RegExp[] = [];
for (const row of result.rows) {
switch (row.origin_type) {
case 'ip':
ips.add(row.origin_value);
break;
case 'domain':
// Store as full origin for comparison
if (!row.origin_value.startsWith('http')) {
domains.add(`https://${row.origin_value}`);
domains.add(`http://${row.origin_value}`);
} else {
domains.add(row.origin_value);
}
break;
case 'pattern':
try {
patterns.push(new RegExp(row.origin_value));
} catch {
console.warn(`[Auth] Invalid trusted origin pattern: ${row.origin_value}`);
}
break;
}
}
trustedOriginsCache = { ips, domains, patterns, loadedAt: new Date() };
return trustedOriginsCache;
} catch (error) {
// DB not available or table doesn't exist - use fallbacks
return {
ips: new Set(FALLBACK_TRUSTED_IPS),
domains: new Set(FALLBACK_TRUSTED_ORIGINS),
patterns: FALLBACK_TRUSTED_PATTERNS,
};
}
}
/**
* Clear trusted origins cache (called when admin updates origins)
*/
export function clearTrustedOriginsCache() {
trustedOriginsCache = null;
}
/** /**
* Check if request is from a trusted origin/IP * Check if request is from a trusted origin/IP
*/ */
function isTrustedRequest(req: Request): boolean { async function isTrustedRequest(req: Request): Promise<boolean> {
const { ips, domains, patterns } = await loadTrustedOrigins();
// Check origin header // Check origin header
const origin = req.headers.origin; const origin = req.headers.origin;
if (origin) { if (origin) {
if (TRUSTED_ORIGINS.includes(origin)) { if (domains.has(origin)) {
return true; return true;
} }
// Check pattern-based origins (wildcards like *.cannabrands.app) for (const pattern of patterns) {
for (const pattern of TRUSTED_ORIGIN_PATTERNS) {
if (pattern.test(origin)) { if (pattern.test(origin)) {
return true; return true;
} }
@@ -63,16 +141,15 @@ function isTrustedRequest(req: Request): boolean {
// Check referer header (for same-origin requests without CORS) // Check referer header (for same-origin requests without CORS)
const referer = req.headers.referer; const referer = req.headers.referer;
if (referer) { if (referer) {
for (const trusted of TRUSTED_ORIGINS) { for (const trusted of domains) {
if (referer.startsWith(trusted)) { if (referer.startsWith(trusted)) {
return true; return true;
} }
} }
// Check pattern-based referers
try { try {
const refererUrl = new URL(referer); const refererUrl = new URL(referer);
const refererOrigin = refererUrl.origin; const refererOrigin = refererUrl.origin;
for (const pattern of TRUSTED_ORIGIN_PATTERNS) { for (const pattern of patterns) {
if (pattern.test(refererOrigin)) { if (pattern.test(refererOrigin)) {
return true; return true;
} }
@@ -84,7 +161,7 @@ function isTrustedRequest(req: Request): boolean {
// Check IP for internal requests (pod-to-pod, localhost) // Check IP for internal requests (pod-to-pod, localhost)
const clientIp = req.ip || req.socket.remoteAddress || ''; const clientIp = req.ip || req.socket.remoteAddress || '';
if (TRUSTED_IPS.includes(clientIp)) { if (ips.has(clientIp)) {
return true; return true;
} }
@@ -200,7 +277,7 @@ export async function authMiddleware(req: AuthRequest, res: Response, next: Next
} }
// No token provided - check trusted origins for API access (WordPress, etc.) // No token provided - check trusted origins for API access (WordPress, etc.)
if (isTrustedRequest(req)) { if (await isTrustedRequest(req)) {
req.user = { req.user = {
id: 0, id: 0,
email: 'internal@system', email: 'internal@system',

View File

@@ -131,6 +131,14 @@ export interface PromotionSummary {
newDispensaryIds: number[]; newDispensaryIds: number[];
} }
/**
* Task tracking info for modification audit trail
*/
export interface TaskTrackingInfo {
taskId: number;
taskRole: string;
}
/** /**
* Generate a URL-safe slug from name and city * Generate a URL-safe slug from name and city
*/ */
@@ -143,6 +151,19 @@ function generateSlug(name: string, city: string, state: string): string {
return base; return base;
} }
/**
* Derive menu_type from platform_menu_url pattern
*/
function deriveMenuType(url: string | null): string {
if (!url) return 'unknown';
if (url.includes('/dispensary/')) return 'standalone';
if (url.includes('/embedded-menu/')) return 'embedded';
if (url.includes('/stores/')) return 'standalone';
// Custom domain = embedded widget on store's site
if (!url.includes('dutchie.com')) return 'embedded';
return 'unknown';
}
/** /**
* Log a promotion action to dutchie_promotion_log * Log a promotion action to dutchie_promotion_log
*/ */
@@ -283,7 +304,8 @@ async function ensureCrawlerProfile(
* Idempotent: uses ON CONFLICT on platform_dispensary_id * Idempotent: uses ON CONFLICT on platform_dispensary_id
*/ */
async function promoteLocation( async function promoteLocation(
loc: DiscoveryLocationRow loc: DiscoveryLocationRow,
taskTracking?: TaskTrackingInfo
): Promise<PromotionResult> { ): Promise<PromotionResult> {
const slug = loc.platform_slug || generateSlug(loc.name, loc.city || '', loc.state_code || ''); const slug = loc.platform_slug || generateSlug(loc.name, loc.city || '', loc.state_code || '');
@@ -325,13 +347,17 @@ async function promoteLocation(
dutchie_verified, dutchie_verified,
dutchie_verified_at, dutchie_verified_at,
dutchie_discovery_id, dutchie_discovery_id,
last_modified_at,
last_modified_by_task,
last_modified_task_id,
last_store_discovery_at,
created_at, created_at,
updated_at updated_at
) VALUES ( ) VALUES (
$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
$11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
$21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30,
$31, $32, $33, $34, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP $31, $32, $33, $34, $35, $36, $37, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP
) )
ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL ON CONFLICT (platform_dispensary_id) WHERE platform_dispensary_id IS NOT NULL
DO UPDATE SET DO UPDATE SET
@@ -362,6 +388,13 @@ async function promoteLocation(
country = EXCLUDED.country, country = EXCLUDED.country,
status = EXCLUDED.status, status = EXCLUDED.status,
dutchie_discovery_id = EXCLUDED.dutchie_discovery_id, dutchie_discovery_id = EXCLUDED.dutchie_discovery_id,
dutchie_verified = TRUE,
dutchie_verified_at = COALESCE(dispensaries.dutchie_verified_at, CURRENT_TIMESTAMP),
crawl_enabled = COALESCE(dispensaries.crawl_enabled, TRUE),
last_modified_at = EXCLUDED.last_modified_at,
last_modified_by_task = EXCLUDED.last_modified_by_task,
last_modified_task_id = EXCLUDED.last_modified_task_id,
last_store_discovery_at = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP updated_at = CURRENT_TIMESTAMP
RETURNING id, (xmax = 0) AS inserted RETURNING id, (xmax = 0) AS inserted
`, [ `, [
@@ -382,7 +415,7 @@ async function promoteLocation(
loc.timezone, // $15 timezone loc.timezone, // $15 timezone
loc.platform_location_id, // $16 platform_dispensary_id loc.platform_location_id, // $16 platform_dispensary_id
loc.platform_menu_url, // $17 menu_url loc.platform_menu_url, // $17 menu_url
'dutchie', // $18 menu_type deriveMenuType(loc.platform_menu_url), // $18 menu_type
loc.description, // $19 description loc.description, // $19 description
loc.logo_image, // $20 logo_image loc.logo_image, // $20 logo_image
loc.banner_image, // $21 banner_image loc.banner_image, // $21 banner_image
@@ -399,6 +432,9 @@ async function promoteLocation(
true, // $32 dutchie_verified true, // $32 dutchie_verified
new Date(), // $33 dutchie_verified_at new Date(), // $33 dutchie_verified_at
loc.id, // $34 dutchie_discovery_id loc.id, // $34 dutchie_discovery_id
taskTracking ? new Date() : null, // $35 last_modified_at
taskTracking?.taskRole || null, // $36 last_modified_by_task
taskTracking?.taskId || null, // $37 last_modified_task_id
]); ]);
const dispensaryId = upsertResult.rows[0].id; const dispensaryId = upsertResult.rows[0].id;
@@ -446,10 +482,12 @@ async function promoteLocation(
* *
* @param stateCode Optional filter by state (e.g., 'CA', 'AZ') * @param stateCode Optional filter by state (e.g., 'CA', 'AZ')
* @param dryRun If true, only validate without making changes * @param dryRun If true, only validate without making changes
* @param taskTracking Optional task info for modification audit trail
*/ */
export async function promoteDiscoveredLocations( export async function promoteDiscoveredLocations(
stateCode?: string, stateCode?: string,
dryRun = false dryRun = false,
taskTracking?: TaskTrackingInfo
): Promise<PromotionSummary> { ): Promise<PromotionSummary> {
const startTime = Date.now(); const startTime = Date.now();
@@ -524,7 +562,7 @@ export async function promoteDiscoveredLocations(
} }
try { try {
const promotionResult = await promoteLocation(loc); const promotionResult = await promoteLocation(loc, taskTracking);
results.push(promotionResult); results.push(promotionResult);
if (promotionResult.action === 'created') { if (promotionResult.action === 'created') {

View File

@@ -0,0 +1,250 @@
/**
* iHeartJane Platform Normalizer
*
* Normalizes raw Jane/Algolia product responses to canonical format.
*
* Jane uses Algolia for product search. Key differences from Dutchie:
* - Product ID is numeric (not MongoDB ObjectId)
* - Prices are per-weight (price_gram, price_eighth_ounce, etc.)
* - Category = strain type (hybrid, indica, sativa)
* - Kind = product type (vape, flower, edible, etc.)
*/
import { BaseNormalizer } from './base';
import {
NormalizedProduct,
NormalizedPricing,
NormalizedAvailability,
NormalizedBrand,
NormalizedCategory,
} from '../types';
export class JaneNormalizer extends BaseNormalizer {
readonly platform = 'jane';
readonly supportedVersions = [1];
// ============================================================
// EXTRACTION
// ============================================================
extractProducts(rawJson: any): any[] {
// Algolia response format: { hits: [...] }
if (rawJson?.hits && Array.isArray(rawJson.hits)) {
return rawJson.hits;
}
// Direct array of products
if (Array.isArray(rawJson)) {
return rawJson;
}
// Products array wrapper
if (rawJson?.products && Array.isArray(rawJson.products)) {
return rawJson.products;
}
// Try data.hits (nested response)
if (rawJson?.data?.hits && Array.isArray(rawJson.data.hits)) {
return rawJson.data.hits;
}
console.warn('[JaneNormalizer] Could not extract products from payload');
return [];
}
validatePayload(rawJson: any): { valid: boolean; errors: string[] } {
const errors: string[] = [];
if (!rawJson) {
errors.push('Payload is null or undefined');
return { valid: false, errors };
}
const products = this.extractProducts(rawJson);
if (products.length === 0) {
errors.push('No products found in payload');
}
// Check for Algolia errors
if (rawJson?.message) {
errors.push(`Algolia error: ${rawJson.message}`);
}
return { valid: errors.length === 0, errors };
}
// ============================================================
// NORMALIZATION
// ============================================================
protected normalizeProduct(rawProduct: any, dispensaryId: number): NormalizedProduct | null {
const externalId = rawProduct.product_id || rawProduct.objectID;
if (!externalId) {
console.warn('[JaneNormalizer] Product missing ID, skipping');
return null;
}
const name = rawProduct.name;
if (!name) {
console.warn(`[JaneNormalizer] Product ${externalId} missing name, skipping`);
return null;
}
return {
externalProductId: String(externalId),
dispensaryId,
platform: 'jane',
platformDispensaryId: '', // Will be set by handler
// Core fields
name,
brandName: rawProduct.brand || null,
brandId: rawProduct.product_brand_id ? String(rawProduct.product_brand_id) : null,
category: rawProduct.kind || null, // Jane's "kind" = product type (vape, flower, etc.)
subcategory: rawProduct.kind_subtype || rawProduct.root_subtype || null,
type: rawProduct.kind || null,
strainType: rawProduct.category || null, // Jane's "category" = strain (hybrid, indica, sativa)
// Potency
thcPercent: rawProduct.percent_thc ?? null,
cbdPercent: rawProduct.percent_cbd ?? null,
thcContent: rawProduct.percent_thc ?? null,
cbdContent: rawProduct.percent_cbd ?? null,
// Status - Jane products in search are always active
status: 'Active',
isActive: true,
medicalOnly: rawProduct.store_types?.includes('medical') && !rawProduct.store_types?.includes('recreational'),
recOnly: rawProduct.store_types?.includes('recreational') && !rawProduct.store_types?.includes('medical'),
// Images
primaryImageUrl: rawProduct.image_urls?.[0] || null,
images: (rawProduct.image_urls || []).map((url: string, i: number) => ({
url,
position: i,
})),
// Raw reference
rawProduct,
};
}
protected normalizePricing(rawProduct: any): NormalizedPricing | null {
const externalId = rawProduct.product_id || rawProduct.objectID;
if (!externalId) return null;
// Jane has multiple price fields by weight
const prices: number[] = [];
const specialPrices: number[] = [];
// Collect all regular prices
if (rawProduct.price_gram) prices.push(rawProduct.price_gram);
if (rawProduct.price_each) prices.push(rawProduct.price_each);
if (rawProduct.price_half_gram) prices.push(rawProduct.price_half_gram);
if (rawProduct.price_eighth_ounce) prices.push(rawProduct.price_eighth_ounce);
if (rawProduct.price_quarter_ounce) prices.push(rawProduct.price_quarter_ounce);
if (rawProduct.price_half_ounce) prices.push(rawProduct.price_half_ounce);
if (rawProduct.price_ounce) prices.push(rawProduct.price_ounce);
if (rawProduct.price_two_gram) prices.push(rawProduct.price_two_gram);
// Collect special/discounted prices
if (rawProduct.special_price_gram) specialPrices.push(rawProduct.special_price_gram);
if (rawProduct.special_price_each) specialPrices.push(rawProduct.special_price_each);
if (rawProduct.discounted_price_gram) specialPrices.push(rawProduct.discounted_price_gram);
if (rawProduct.discounted_price_each) specialPrices.push(rawProduct.discounted_price_each);
// Also check bucket_price and sort_price
if (rawProduct.bucket_price && !prices.includes(rawProduct.bucket_price)) {
prices.push(rawProduct.bucket_price);
}
// Determine if on special
const isOnSpecial = specialPrices.length > 0 || rawProduct.has_brand_discount === true;
// Calculate discount percent
let discountPercent: number | null = null;
if (isOnSpecial && prices.length > 0 && specialPrices.length > 0) {
const regularMin = Math.min(...prices);
const specialMin = Math.min(...specialPrices);
if (regularMin > 0 && specialMin < regularMin) {
discountPercent = Math.round(((regularMin - specialMin) / regularMin) * 100);
}
}
// Get special name from brand_special_prices
let specialName: string | null = null;
if (rawProduct.brand_special_prices) {
const firstSpecial = Object.values(rawProduct.brand_special_prices)[0] as any;
if (firstSpecial?.title) {
specialName = firstSpecial.title;
}
}
return {
externalProductId: String(externalId),
// Use minimum price for display
priceRec: this.toCents(this.getMin(prices)),
priceRecMin: this.toCents(this.getMin(prices)),
priceRecMax: this.toCents(this.getMax(prices)),
priceRecSpecial: this.toCents(this.getMin(specialPrices)),
// Jane doesn't distinguish med pricing in Algolia response
priceMed: null,
priceMedMin: null,
priceMedMax: null,
priceMedSpecial: null,
isOnSpecial,
specialName,
discountPercent,
};
}
protected normalizeAvailability(rawProduct: any): NormalizedAvailability | null {
const externalId = rawProduct.product_id || rawProduct.objectID;
if (!externalId) return null;
// Jane products in Algolia are in-stock (OOS products aren't returned)
const availableForPickup = rawProduct.available_for_pickup ?? true;
const availableForDelivery = rawProduct.available_for_delivery ?? false;
const inStock = availableForPickup || availableForDelivery;
// Jane doesn't expose quantity in Algolia
const quantity = rawProduct.max_cart_quantity || null;
return {
externalProductId: String(externalId),
inStock,
stockStatus: inStock ? 'in_stock' : 'out_of_stock',
quantity,
quantityAvailable: quantity,
isBelowThreshold: false, // Jane doesn't expose this
optionsBelowThreshold: false,
};
}
protected extractBrand(rawProduct: any): NormalizedBrand | null {
const brandName = rawProduct.brand;
if (!brandName) return null;
return {
externalBrandId: rawProduct.product_brand_id ? String(rawProduct.product_brand_id) : null,
name: brandName,
slug: this.slugify(brandName),
logoUrl: rawProduct.brand_logo_url || null,
};
}
protected extractCategory(rawProduct: any): NormalizedCategory | null {
// Use "kind" as the primary category (vape, flower, edible, etc.)
const categoryName = rawProduct.kind;
if (!categoryName) return null;
return {
name: categoryName,
slug: this.slugify(categoryName),
parentCategory: null,
};
}
}

Some files were not shown because too many files have changed in this diff Show More