fix: Add curl to Docker, add active flag to worker_tasks
- Install curl in Docker container for Dutchie HTTP requests - Add 'active' column to worker_tasks (default false) to prevent accidental task execution on startup - Update task-service to only claim tasks where active=true 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -25,8 +25,9 @@ ENV APP_GIT_SHA=${APP_GIT_SHA}
|
|||||||
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
ENV APP_BUILD_TIME=${APP_BUILD_TIME}
|
||||||
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
ENV CONTAINER_IMAGE_TAG=${CONTAINER_IMAGE_TAG}
|
||||||
|
|
||||||
# Install Chromium dependencies
|
# Install Chromium dependencies and curl for HTTP requests
|
||||||
RUN apt-get update && apt-get install -y \
|
RUN apt-get update && apt-get install -y \
|
||||||
|
curl \
|
||||||
chromium \
|
chromium \
|
||||||
fonts-liberation \
|
fonts-liberation \
|
||||||
libnss3 \
|
libnss3 \
|
||||||
|
|||||||
286
backend/node_modules/.package-lock.json
generated
vendored
286
backend/node_modules/.package-lock.json
generated
vendored
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "dutchie-menus-backend",
|
"name": "dutchie-menus-backend",
|
||||||
"version": "1.5.1",
|
"version": "1.6.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
@@ -46,6 +46,97 @@
|
|||||||
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.4.0.tgz",
|
||||||
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
"integrity": "sha512-aFT2yemJJo+TZCmieA7qnYGQooOS7QfNmYrzGtsYd3g9j5iDP8AimYYAesf79ohjbLG12XxC4nG5DyEnC88AsQ=="
|
||||||
},
|
},
|
||||||
|
"node_modules/@jsep-plugin/assignment": {
|
||||||
|
"version": "1.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@jsep-plugin/assignment/-/assignment-1.3.0.tgz",
|
||||||
|
"integrity": "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"jsep": "^0.4.0||^1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@jsep-plugin/regex": {
|
||||||
|
"version": "1.0.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/@jsep-plugin/regex/-/regex-1.0.4.tgz",
|
||||||
|
"integrity": "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"jsep": "^0.4.0||^1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@kubernetes/client-node/-/client-node-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-Zge3YvF7DJi264dU1b3wb/GmzR99JhUpqTvp+VGHfwZT+g7EOOYNScDJNZwXy9cszyIGPIs0VHr+kk8e95qqrA==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/js-yaml": "^4.0.1",
|
||||||
|
"@types/node": "^24.0.0",
|
||||||
|
"@types/node-fetch": "^2.6.13",
|
||||||
|
"@types/stream-buffers": "^3.0.3",
|
||||||
|
"form-data": "^4.0.0",
|
||||||
|
"hpagent": "^1.2.0",
|
||||||
|
"isomorphic-ws": "^5.0.0",
|
||||||
|
"js-yaml": "^4.1.0",
|
||||||
|
"jsonpath-plus": "^10.3.0",
|
||||||
|
"node-fetch": "^2.7.0",
|
||||||
|
"openid-client": "^6.1.3",
|
||||||
|
"rfc4648": "^1.3.0",
|
||||||
|
"socks-proxy-agent": "^8.0.4",
|
||||||
|
"stream-buffers": "^3.0.2",
|
||||||
|
"tar-fs": "^3.0.9",
|
||||||
|
"ws": "^8.18.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/@types/node": {
|
||||||
|
"version": "24.10.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.3.tgz",
|
||||||
|
"integrity": "sha512-gqkrWUsS8hcm0r44yn7/xZeV1ERva/nLgrLxFRUGb7aoNMIJfZJ3AC261zDQuOAKC7MiXai1WCpYc48jAHoShQ==",
|
||||||
|
"dependencies": {
|
||||||
|
"undici-types": "~7.16.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/tar-fs": {
|
||||||
|
"version": "3.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
|
||||||
|
"integrity": "sha512-LZA0oaPOc2fVo82Txf3gw+AkEd38szODlptMYejQUhndHMLQ9M059uXR+AfS7DNo0NpINvSqDsvyaCrBVkptWg==",
|
||||||
|
"dependencies": {
|
||||||
|
"pump": "^3.0.0",
|
||||||
|
"tar-stream": "^3.1.5"
|
||||||
|
},
|
||||||
|
"optionalDependencies": {
|
||||||
|
"bare-fs": "^4.0.1",
|
||||||
|
"bare-path": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/undici-types": {
|
||||||
|
"version": "7.16.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
|
||||||
|
"integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="
|
||||||
|
},
|
||||||
|
"node_modules/@kubernetes/client-node/node_modules/ws": {
|
||||||
|
"version": "8.18.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz",
|
||||||
|
"integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bufferutil": "^4.0.1",
|
||||||
|
"utf-8-validate": ">=5.0.2"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bufferutil": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"utf-8-validate": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@mapbox/node-pre-gyp": {
|
"node_modules/@mapbox/node-pre-gyp": {
|
||||||
"version": "1.0.11",
|
"version": "1.0.11",
|
||||||
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
"resolved": "https://registry.npmjs.org/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz",
|
||||||
@@ -251,6 +342,11 @@
|
|||||||
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
"integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/js-yaml": {
|
||||||
|
"version": "4.0.9",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz",
|
||||||
|
"integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="
|
||||||
|
},
|
||||||
"node_modules/@types/jsonwebtoken": {
|
"node_modules/@types/jsonwebtoken": {
|
||||||
"version": "9.0.10",
|
"version": "9.0.10",
|
||||||
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
"resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.10.tgz",
|
||||||
@@ -276,7 +372,6 @@
|
|||||||
"version": "20.19.25",
|
"version": "20.19.25",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
||||||
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
||||||
"devOptional": true,
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~6.21.0"
|
"undici-types": "~6.21.0"
|
||||||
}
|
}
|
||||||
@@ -287,6 +382,15 @@
|
|||||||
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
"integrity": "sha512-0ikrnug3/IyneSHqCBeslAhlK2aBfYek1fGo4bP4QnZPmiqSGRK+Oy7ZMisLWkesffJvQ1cqAcBnJC+8+nxIAg==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/node-fetch": {
|
||||||
|
"version": "2.6.13",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz",
|
||||||
|
"integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*",
|
||||||
|
"form-data": "^4.0.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/pg": {
|
"node_modules/@types/pg": {
|
||||||
"version": "8.15.6",
|
"version": "8.15.6",
|
||||||
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
"resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz",
|
||||||
@@ -340,6 +444,14 @@
|
|||||||
"@types/node": "*"
|
"@types/node": "*"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/stream-buffers": {
|
||||||
|
"version": "3.0.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/stream-buffers/-/stream-buffers-3.0.8.tgz",
|
||||||
|
"integrity": "sha512-J+7VaHKNvlNPJPEJXX/fKa9DZtR/xPMwuIbe+yNOwp1YB+ApUOBv2aUpEoBJEi8nJgbgs1x8e73ttg0r1rSUdw==",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/uuid": {
|
"node_modules/@types/uuid": {
|
||||||
"version": "9.0.8",
|
"version": "9.0.8",
|
||||||
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz",
|
||||||
@@ -520,6 +632,78 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/bare-fs": {
|
||||||
|
"version": "4.5.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
|
||||||
|
"integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-events": "^2.5.4",
|
||||||
|
"bare-path": "^3.0.0",
|
||||||
|
"bare-stream": "^2.6.4",
|
||||||
|
"bare-url": "^2.2.2",
|
||||||
|
"fast-fifo": "^1.3.2"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"bare": ">=1.16.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bare-buffer": "*"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bare-buffer": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-os": {
|
||||||
|
"version": "3.6.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
|
||||||
|
"integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
|
||||||
|
"optional": true,
|
||||||
|
"engines": {
|
||||||
|
"bare": ">=1.14.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-path": {
|
||||||
|
"version": "3.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
|
||||||
|
"integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-os": "^3.0.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-stream": {
|
||||||
|
"version": "2.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
|
||||||
|
"integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"streamx": "^2.21.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bare-buffer": "*",
|
||||||
|
"bare-events": "*"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bare-buffer": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"bare-events": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/bare-url": {
|
||||||
|
"version": "2.3.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
|
||||||
|
"integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
|
||||||
|
"optional": true,
|
||||||
|
"dependencies": {
|
||||||
|
"bare-path": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/base64-js": {
|
"node_modules/base64-js": {
|
||||||
"version": "1.5.1",
|
"version": "1.5.1",
|
||||||
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||||
@@ -2019,6 +2203,14 @@
|
|||||||
"node": ">=16.0.0"
|
"node": ">=16.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/hpagent": {
|
||||||
|
"version": "1.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
|
||||||
|
"integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=14"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/htmlparser2": {
|
"node_modules/htmlparser2": {
|
||||||
"version": "10.0.0",
|
"version": "10.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.0.0.tgz",
|
||||||
@@ -2382,6 +2574,22 @@
|
|||||||
"node": ">=0.10.0"
|
"node": ">=0.10.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/isomorphic-ws": {
|
||||||
|
"version": "5.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-5.0.0.tgz",
|
||||||
|
"integrity": "sha512-muId7Zzn9ywDsyXgTIafTry2sV3nySZeUDe6YedVd1Hvuuep5AsIlqK+XefWpYTyJG5e503F2xIuT2lcU6rCSw==",
|
||||||
|
"peerDependencies": {
|
||||||
|
"ws": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/jose": {
|
||||||
|
"version": "6.1.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
|
||||||
|
"integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/js-tokens": {
|
"node_modules/js-tokens": {
|
||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
|
||||||
@@ -2398,6 +2606,14 @@
|
|||||||
"js-yaml": "bin/js-yaml.js"
|
"js-yaml": "bin/js-yaml.js"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/jsep": {
|
||||||
|
"version": "1.4.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/jsep/-/jsep-1.4.0.tgz",
|
||||||
|
"integrity": "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 10.16.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/json-parse-even-better-errors": {
|
"node_modules/json-parse-even-better-errors": {
|
||||||
"version": "2.3.1",
|
"version": "2.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
||||||
@@ -2419,6 +2635,23 @@
|
|||||||
"graceful-fs": "^4.1.6"
|
"graceful-fs": "^4.1.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/jsonpath-plus": {
|
||||||
|
"version": "10.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-10.3.0.tgz",
|
||||||
|
"integrity": "sha512-8TNmfeTCk2Le33A3vRRwtuworG/L5RrgMvdjhKZxvyShO+mBu2fP50OWUjRLNtvw344DdDarFh9buFAZs5ujeA==",
|
||||||
|
"dependencies": {
|
||||||
|
"@jsep-plugin/assignment": "^1.3.0",
|
||||||
|
"@jsep-plugin/regex": "^1.0.4",
|
||||||
|
"jsep": "^1.4.0"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"jsonpath": "bin/jsonpath-cli.js",
|
||||||
|
"jsonpath-plus": "bin/jsonpath-cli.js"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/jsonwebtoken": {
|
"node_modules/jsonwebtoken": {
|
||||||
"version": "9.0.2",
|
"version": "9.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz",
|
||||||
@@ -2493,6 +2726,11 @@
|
|||||||
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
|
||||||
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/lodash.clonedeep": {
|
||||||
|
"version": "4.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz",
|
||||||
|
"integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ=="
|
||||||
|
},
|
||||||
"node_modules/lodash.defaults": {
|
"node_modules/lodash.defaults": {
|
||||||
"version": "4.2.0",
|
"version": "4.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||||
@@ -2942,6 +3180,14 @@
|
|||||||
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
"url": "https://github.com/fb55/nth-check?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/oauth4webapi": {
|
||||||
|
"version": "3.8.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/oauth4webapi/-/oauth4webapi-3.8.3.tgz",
|
||||||
|
"integrity": "sha512-pQ5BsX3QRTgnt5HxgHwgunIRaDXBdkT23tf8dfzmtTIL2LTpdmxgbpbBm0VgFWAIDlezQvQCTgnVIUmHupXHxw==",
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/object-assign": {
|
"node_modules/object-assign": {
|
||||||
"version": "4.1.1",
|
"version": "4.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
|
||||||
@@ -2980,6 +3226,18 @@
|
|||||||
"wrappy": "1"
|
"wrappy": "1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/openid-client": {
|
||||||
|
"version": "6.8.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/openid-client/-/openid-client-6.8.1.tgz",
|
||||||
|
"integrity": "sha512-VoYT6enBo6Vj2j3Q5Ec0AezS+9YGzQo1f5Xc42lreMGlfP4ljiXPKVDvCADh+XHCV/bqPu/wWSiCVXbJKvrODw==",
|
||||||
|
"dependencies": {
|
||||||
|
"jose": "^6.1.0",
|
||||||
|
"oauth4webapi": "^3.8.2"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://github.com/sponsors/panva"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/pac-proxy-agent": {
|
"node_modules/pac-proxy-agent": {
|
||||||
"version": "7.2.0",
|
"version": "7.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.2.0.tgz",
|
||||||
@@ -3883,6 +4141,11 @@
|
|||||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/rfc4648": {
|
||||||
|
"version": "1.5.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/rfc4648/-/rfc4648-1.5.4.tgz",
|
||||||
|
"integrity": "sha512-rRg/6Lb+IGfJqO05HZkN50UtY7K/JhxJag1kP23+zyMfrvoB0B7RWv06MbOzoc79RgCdNTiUaNsTT1AJZ7Z+cg=="
|
||||||
|
},
|
||||||
"node_modules/rimraf": {
|
"node_modules/rimraf": {
|
||||||
"version": "3.0.2",
|
"version": "3.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
|
||||||
@@ -4313,6 +4576,14 @@
|
|||||||
"node": ">= 0.8"
|
"node": ">= 0.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/stream-buffers": {
|
||||||
|
"version": "3.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/stream-buffers/-/stream-buffers-3.0.3.tgz",
|
||||||
|
"integrity": "sha512-pqMqwQCso0PBJt2PQmDO0cFj0lyqmiwOMiMSkVtRokl7e+ZTRYgDHKnuZNbqjiJXgsg4nuqtD/zxuo9KqTp0Yw==",
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 0.10.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/streamx": {
|
"node_modules/streamx": {
|
||||||
"version": "2.23.0",
|
"version": "2.23.0",
|
||||||
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
"resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
|
||||||
@@ -4532,8 +4803,7 @@
|
|||||||
"node_modules/undici-types": {
|
"node_modules/undici-types": {
|
||||||
"version": "6.21.0",
|
"version": "6.21.0",
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="
|
||||||
"devOptional": true
|
|
||||||
},
|
},
|
||||||
"node_modules/universalify": {
|
"node_modules/universalify": {
|
||||||
"version": "2.0.1",
|
"version": "2.0.1",
|
||||||
@@ -4556,6 +4826,14 @@
|
|||||||
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
||||||
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg=="
|
||||||
},
|
},
|
||||||
|
"node_modules/user-agents": {
|
||||||
|
"version": "1.1.669",
|
||||||
|
"resolved": "https://registry.npmjs.org/user-agents/-/user-agents-1.1.669.tgz",
|
||||||
|
"integrity": "sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==",
|
||||||
|
"dependencies": {
|
||||||
|
"lodash.clonedeep": "^4.5.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/util": {
|
"node_modules/util": {
|
||||||
"version": "0.12.5",
|
"version": "0.12.5",
|
||||||
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
"resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz",
|
||||||
|
|||||||
@@ -170,6 +170,7 @@ class TaskService {
|
|||||||
WHERE id = (
|
WHERE id = (
|
||||||
SELECT id FROM worker_tasks
|
SELECT id FROM worker_tasks
|
||||||
WHERE status = 'pending'
|
WHERE status = 'pending'
|
||||||
|
AND active = true
|
||||||
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
AND (scheduled_for IS NULL OR scheduled_for <= NOW())
|
||||||
-- Exclude stores that already have an active task
|
-- Exclude stores that already have an active task
|
||||||
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
AND (dispensary_id IS NULL OR dispensary_id NOT IN (
|
||||||
|
|||||||
365
workflow-12102025.md
Normal file
365
workflow-12102025.md
Normal file
@@ -0,0 +1,365 @@
|
|||||||
|
# Workflow Documentation - December 10, 2025
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
This document captures the intended behavior for the CannaiQ crawl system, specifically around proxy rotation, fingerprinting, and anti-detection.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Stealth & Anti-Detection Requirements
|
||||||
|
|
||||||
|
### 1. Task Determines Work, Proxy Determines Identity
|
||||||
|
|
||||||
|
The task payload contains:
|
||||||
|
- `dispensary_id` - which store to crawl
|
||||||
|
- `role` - what type of work (product_resync, entry_point_discovery, etc.)
|
||||||
|
|
||||||
|
The **proxy** determines the session identity:
|
||||||
|
- Proxy location (city, state, timezone) → sets Accept-Language and timezone headers
|
||||||
|
- Language is always English (`en-US`)
|
||||||
|
|
||||||
|
**Flow:**
|
||||||
|
```
|
||||||
|
Task claimed
|
||||||
|
│
|
||||||
|
└─► Get proxy from rotation
|
||||||
|
│
|
||||||
|
└─► Proxy has location (city, state, timezone)
|
||||||
|
│
|
||||||
|
└─► Build headers using proxy's timezone
|
||||||
|
- Accept-Language: en-US,en;q=0.9
|
||||||
|
- Timezone-consistent behavior
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. On 403 Block - Immediate Backoff
|
||||||
|
|
||||||
|
When a 403 is received:
|
||||||
|
|
||||||
|
1. **Immediately** stop using current IP
|
||||||
|
2. Get a new proxy (new IP)
|
||||||
|
3. Get a new UA/fingerprint
|
||||||
|
4. Retry the request
|
||||||
|
|
||||||
|
**Per-proxy failure tracking:**
|
||||||
|
- Track UA rotation attempts per proxy
|
||||||
|
- After 3 UA/fingerprint rotations on the same proxy → disable that proxy
|
||||||
|
- This means: if we rotate UA 3 times and still get 403, the proxy is burned
|
||||||
|
|
||||||
|
### 3. Fingerprint Rotation Rules
|
||||||
|
|
||||||
|
Each request uses:
|
||||||
|
- Proxy (IP)
|
||||||
|
- User-Agent
|
||||||
|
- sec-ch-ua headers (Client Hints)
|
||||||
|
- Accept-Language (from proxy location)
|
||||||
|
|
||||||
|
On 403:
|
||||||
|
1. Record failure on current proxy
|
||||||
|
2. Rotate to new proxy
|
||||||
|
3. Pick new random fingerprint
|
||||||
|
4. If same proxy fails 3 times with different fingerprints → disable proxy
|
||||||
|
|
||||||
|
### 4. Proxy Table Schema
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE proxies (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
host VARCHAR(255) NOT NULL,
|
||||||
|
port INTEGER NOT NULL,
|
||||||
|
username VARCHAR(100),
|
||||||
|
password VARCHAR(100),
|
||||||
|
protocol VARCHAR(10) DEFAULT 'http',
|
||||||
|
active BOOLEAN DEFAULT true,
|
||||||
|
|
||||||
|
-- Location (determines session headers)
|
||||||
|
city VARCHAR(100),
|
||||||
|
state VARCHAR(50),
|
||||||
|
country VARCHAR(100),
|
||||||
|
country_code VARCHAR(10),
|
||||||
|
timezone VARCHAR(50),
|
||||||
|
|
||||||
|
-- Health tracking
|
||||||
|
failure_count INTEGER DEFAULT 0,
|
||||||
|
consecutive_403_count INTEGER DEFAULT 0, -- Track 403s specifically
|
||||||
|
last_used_at TIMESTAMPTZ,
|
||||||
|
last_failure_at TIMESTAMPTZ,
|
||||||
|
last_error TEXT,
|
||||||
|
|
||||||
|
-- Performance
|
||||||
|
response_time_ms INTEGER,
|
||||||
|
max_connections INTEGER DEFAULT 1
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Failure Threshold
|
||||||
|
|
||||||
|
- **3 consecutive 403s** with different fingerprints → disable proxy
|
||||||
|
- Reset `consecutive_403_count` to 0 on successful request
|
||||||
|
- General `failure_count` tracks all errors (timeouts, connection errors, etc.)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation Status
|
||||||
|
|
||||||
|
### COMPLETED - December 10, 2025
|
||||||
|
|
||||||
|
All code changes have been implemented per this specification:
|
||||||
|
|
||||||
|
#### 1. crawl-rotator.ts ✅
|
||||||
|
|
||||||
|
- [x] Added `consecutive403Count` to Proxy interface
|
||||||
|
- [x] Added `markBlocked()` method that increments `consecutive_403_count` and disables proxy at 3
|
||||||
|
- [x] Added `getProxyTimezone()` to return current proxy's timezone
|
||||||
|
- [x] `markSuccess()` now resets `consecutive_403_count` to 0
|
||||||
|
- [x] Replaced hardcoded UA list with `intoli/user-agents` library for realistic fingerprints
|
||||||
|
- [x] `BrowserFingerprint` interface includes full fingerprint data (UA, platform, screen size, viewport, sec-ch-ua headers)
|
||||||
|
|
||||||
|
#### 2. client.ts ✅
|
||||||
|
|
||||||
|
- [x] `startSession()` no longer takes state/timezone params
|
||||||
|
- [x] `startSession()` gets identity from proxy via `crawlRotator.getProxyLocation()`
|
||||||
|
- [x] Added `handle403Block()` that:
|
||||||
|
- Calls `crawlRotator.recordBlock()` (tracks consecutive 403s)
|
||||||
|
- Immediately rotates both proxy and fingerprint via `rotateBoth()`
|
||||||
|
- Returns false if no more proxies available
|
||||||
|
- [x] `executeGraphQL()` calls `handle403Block()` on 403 (not `rotateProxyOn403`)
|
||||||
|
- [x] `fetchPage()` uses same 403 handling
|
||||||
|
- [x] 500ms backoff after rotation (not linear delay)
|
||||||
|
|
||||||
|
#### 3. Task Handlers ✅
|
||||||
|
|
||||||
|
- [x] `entry-point-discovery.ts`: `startSession()` called with no params
|
||||||
|
- [x] `product-refresh.ts`: `startSession()` called with no params
|
||||||
|
|
||||||
|
#### 4. Dependencies ✅
|
||||||
|
|
||||||
|
- [x] Added `user-agents` npm package for realistic UA generation
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Files Changed
|
||||||
|
|
||||||
|
| File | Changes |
|
||||||
|
|------|---------|
|
||||||
|
| `backend/src/services/crawl-rotator.ts` | Complete rewrite with `consecutive403Count`, `markBlocked()`, `intoli/user-agents` |
|
||||||
|
| `backend/src/platforms/dutchie/client.ts` | `startSession()` uses proxy location, `handle403Block()` for 403 handling |
|
||||||
|
| `backend/src/tasks/handlers/entry-point-discovery.ts` | `startSession()` no params |
|
||||||
|
| `backend/src/tasks/handlers/product-refresh.ts` | `startSession()` no params |
|
||||||
|
| `backend/package.json` | Added `user-agents` dependency |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Migration Required
|
||||||
|
|
||||||
|
The `proxies` table needs `consecutive_403_count` column if not already present:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
ALTER TABLE proxies ADD COLUMN IF NOT EXISTS consecutive_403_count INTEGER DEFAULT 0;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Behaviors Summary
|
||||||
|
|
||||||
|
| Behavior | Implementation |
|
||||||
|
|----------|----------------|
|
||||||
|
| Session identity | From proxy location (`getProxyLocation()`) |
|
||||||
|
| Language | Always `en-US,en;q=0.9` |
|
||||||
|
| 403 handling | `handle403Block()` → `recordBlock()` → `rotateBoth()` |
|
||||||
|
| Proxy disable | After 3 consecutive 403s (`consecutive403Count >= 3`) |
|
||||||
|
| Success reset | `markSuccess()` resets `consecutive403Count` to 0 |
|
||||||
|
| UA generation | `intoli/user-agents` library (daily updated, realistic fingerprints) |
|
||||||
|
| Fingerprint data | Full: UA, platform, screen size, viewport, sec-ch-ua headers |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## User-Agent Generation
|
||||||
|
|
||||||
|
### Data Source
|
||||||
|
|
||||||
|
The `intoli/user-agents` npm library provides daily-updated market share data collected from Intoli's residential proxy network (millions of real users). The package auto-releases new versions daily to npm.
|
||||||
|
|
||||||
|
### Device Category Distribution (hardcoded)
|
||||||
|
|
||||||
|
| Category | Share |
|
||||||
|
|----------|-------|
|
||||||
|
| Mobile | 62% |
|
||||||
|
| Desktop | 36% |
|
||||||
|
| Tablet | 2% |
|
||||||
|
|
||||||
|
### Browser Filter (whitelist only)
|
||||||
|
|
||||||
|
Only these browsers are allowed:
|
||||||
|
- Chrome (67%)
|
||||||
|
- Safari (20%)
|
||||||
|
- Edge (6%)
|
||||||
|
- Firefox (3%)
|
||||||
|
|
||||||
|
Samsung Internet, Opera, and other niche browsers are filtered out.
|
||||||
|
|
||||||
|
### Desktop OS Distribution (from library)
|
||||||
|
|
||||||
|
| OS | Share |
|
||||||
|
|----|-------|
|
||||||
|
| Windows | 72% |
|
||||||
|
| macOS | 17% |
|
||||||
|
| Linux | 4% |
|
||||||
|
|
||||||
|
### UA Lifecycle
|
||||||
|
|
||||||
|
1. **Session start** (new proxy IP obtained) → Roll device category (62/36/2) → Generate UA filtered to device + top 4 browsers → Store on session
|
||||||
|
2. **UA sticks** until IP rotates (403 block or manual rotation)
|
||||||
|
3. **IP rotation** triggers new UA generation
|
||||||
|
|
||||||
|
### Failure Handling
|
||||||
|
|
||||||
|
- If UA generation fails → Alert admin dashboard, **stop crawl immediately**
|
||||||
|
- No fallback to static UA list
|
||||||
|
- This forces investigation rather than silent degradation
|
||||||
|
|
||||||
|
### Session Logging
|
||||||
|
|
||||||
|
Each session logs:
|
||||||
|
- Device category (mobile/desktop/tablet)
|
||||||
|
- Full UA string
|
||||||
|
- Browser name (Chrome/Safari/Edge/Firefox)
|
||||||
|
- IP address (from proxy)
|
||||||
|
- Session start timestamp
|
||||||
|
|
||||||
|
Logs are rotated monthly.
|
||||||
|
|
||||||
|
### Implementation
|
||||||
|
|
||||||
|
Located in `backend/src/services/crawl-rotator.ts`:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Per workflow-12102025.md: Device category distribution
|
||||||
|
const DEVICE_WEIGHTS = { mobile: 62, desktop: 36, tablet: 2 };
|
||||||
|
|
||||||
|
// Per workflow-12102025.md: Browser whitelist
|
||||||
|
const ALLOWED_BROWSERS = ['Chrome', 'Safari', 'Edge', 'Firefox'];
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## HTTP Fingerprinting
|
||||||
|
|
||||||
|
### Goal
|
||||||
|
|
||||||
|
Make HTTP requests indistinguishable from real browser traffic. No repeatable footprint.
|
||||||
|
|
||||||
|
### Components
|
||||||
|
|
||||||
|
1. **Full Header Set** - All headers a real browser sends
|
||||||
|
2. **Header Ordering** - Browser-specific order (Chrome vs Firefox vs Safari)
|
||||||
|
3. **TLS Fingerprint** - Use `curl-impersonate` to match browser TLS signature
|
||||||
|
4. **Dynamic Referer** - Set per dispensary being crawled
|
||||||
|
5. **Natural Randomization** - Vary optional headers like real users
|
||||||
|
|
||||||
|
### Required Headers
|
||||||
|
|
||||||
|
| Header | Chrome | Firefox | Safari | Notes |
|
||||||
|
|--------|--------|---------|--------|-------|
|
||||||
|
| `User-Agent` | ✅ | ✅ | ✅ | From UA generation |
|
||||||
|
| `Accept` | ✅ | ✅ | ✅ | Content types |
|
||||||
|
| `Accept-Language` | ✅ | ✅ | ✅ | Always `en-US,en;q=0.9` |
|
||||||
|
| `Accept-Encoding` | ✅ | ✅ | ✅ | `gzip, deflate, br` |
|
||||||
|
| `Connection` | ✅ | ✅ | ✅ | `keep-alive` |
|
||||||
|
| `Origin` | ✅ | ✅ | ✅ | `https://dutchie.com` (POST only) |
|
||||||
|
| `Referer` | ✅ | ✅ | ✅ | Dynamic per dispensary |
|
||||||
|
| `sec-ch-ua` | ✅ | ❌ | ❌ | Chromium only |
|
||||||
|
| `sec-ch-ua-mobile` | ✅ | ❌ | ❌ | Chromium only |
|
||||||
|
| `sec-ch-ua-platform` | ✅ | ❌ | ❌ | Chromium only |
|
||||||
|
| `sec-fetch-dest` | ✅ | ✅ | ❌ | `empty` for XHR |
|
||||||
|
| `sec-fetch-mode` | ✅ | ✅ | ❌ | `cors` for XHR |
|
||||||
|
| `sec-fetch-site` | ✅ | ✅ | ❌ | `same-origin` |
|
||||||
|
| `Upgrade-Insecure-Requests` | ✅ | ✅ | ✅ | `1` (page loads only) |
|
||||||
|
| `DNT` | ~30% | ~30% | ~30% | Randomized per session |
|
||||||
|
|
||||||
|
### Header Ordering
|
||||||
|
|
||||||
|
Each browser sends headers in a specific order. Fingerprinting services detect mismatches.
|
||||||
|
|
||||||
|
**Chrome order (GraphQL request):**
|
||||||
|
1. Host
|
||||||
|
2. Connection
|
||||||
|
3. Content-Length (POST)
|
||||||
|
4. sec-ch-ua
|
||||||
|
5. DNT (if enabled)
|
||||||
|
6. sec-ch-ua-mobile
|
||||||
|
7. User-Agent
|
||||||
|
8. sec-ch-ua-platform
|
||||||
|
9. Content-Type (POST)
|
||||||
|
10. Accept
|
||||||
|
11. Origin (POST)
|
||||||
|
12. sec-fetch-site
|
||||||
|
13. sec-fetch-mode
|
||||||
|
14. sec-fetch-dest
|
||||||
|
15. Referer
|
||||||
|
16. Accept-Encoding
|
||||||
|
17. Accept-Language
|
||||||
|
|
||||||
|
**Firefox order (GraphQL request):**
|
||||||
|
1. Host
|
||||||
|
2. User-Agent
|
||||||
|
3. Accept
|
||||||
|
4. Accept-Language
|
||||||
|
5. Accept-Encoding
|
||||||
|
6. Content-Type (POST)
|
||||||
|
7. Content-Length (POST)
|
||||||
|
8. Origin (POST)
|
||||||
|
9. DNT (if enabled)
|
||||||
|
10. Connection
|
||||||
|
11. Referer
|
||||||
|
12. sec-fetch-dest
|
||||||
|
13. sec-fetch-mode
|
||||||
|
14. sec-fetch-site
|
||||||
|
|
||||||
|
**Safari order (GraphQL request):**
|
||||||
|
1. Host
|
||||||
|
2. Connection
|
||||||
|
3. Content-Length (POST)
|
||||||
|
4. Accept
|
||||||
|
5. User-Agent
|
||||||
|
6. Content-Type (POST)
|
||||||
|
7. Origin (POST)
|
||||||
|
8. Referer
|
||||||
|
9. Accept-Encoding
|
||||||
|
10. Accept-Language
|
||||||
|
|
||||||
|
### TLS Fingerprinting
|
||||||
|
|
||||||
|
Use `curl-impersonate` instead of standard curl:
|
||||||
|
- `curl_chrome131` - Mimics Chrome 131 TLS handshake
|
||||||
|
- `curl_ff133` - Mimics Firefox 133 TLS handshake
|
||||||
|
- `curl_safari17` - Mimics Safari 17 TLS handshake
|
||||||
|
|
||||||
|
Match TLS binary to browser in UA.
|
||||||
|
|
||||||
|
### Dynamic Referer
|
||||||
|
|
||||||
|
Set Referer to the dispensary's actual page URL:
|
||||||
|
|
||||||
|
```
|
||||||
|
Crawling "harvest-of-tempe" → Referer: https://dutchie.com/dispensary/harvest-of-tempe
|
||||||
|
Crawling "zen-leaf-mesa" → Referer: https://dutchie.com/dispensary/zen-leaf-mesa
|
||||||
|
```
|
||||||
|
|
||||||
|
Derived from dispensary's `menu_url` field.
|
||||||
|
|
||||||
|
### Natural Randomization
|
||||||
|
|
||||||
|
Per-session randomization (set once when session starts, consistent for session):
|
||||||
|
|
||||||
|
| Feature | Distribution | Implementation |
|
||||||
|
|---------|--------------|----------------|
|
||||||
|
| DNT header | 30% have it | `Math.random() < 0.30` |
|
||||||
|
| Accept quality values | Slight variation | `q=0.9` vs `q=0.8` |
|
||||||
|
|
||||||
|
### Implementation Files
|
||||||
|
|
||||||
|
| File | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `src/services/crawl-rotator.ts` | `BrowserFingerprint` includes full header config |
|
||||||
|
| `src/platforms/dutchie/client.ts` | Build headers from fingerprint, use curl-impersonate |
|
||||||
|
| `src/services/http-fingerprint.ts` | Header ordering per browser (NEW) |
|
||||||
Reference in New Issue
Block a user